@@ -1,174 +0,0 @@ | |||
macos_instance: | |||
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest | |||
#task: | |||
# name: AppleM1/LLVM | |||
# compile_script: | |||
# - brew install llvm | |||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang | |||
#task: | |||
# name: AppleM1/LLVM/ILP64 | |||
# compile_script: | |||
# - brew install llvm | |||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1 | |||
#task: | |||
# name: AppleM1/LLVM/CMAKE | |||
# compile_script: | |||
# - brew install llvm | |||
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
# - mkdir build | |||
# - cd build | |||
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON .. | |||
# - make -j 4 | |||
#task: | |||
# name: AppleM1/GCC/MAKE/OPENMP | |||
# compile_script: | |||
# - brew install gcc@11 | |||
# - export PATH=/opt/homebrew/bin:$PATH | |||
# - export LDFLAGS="-L/opt/homebrew/lib" | |||
# - export CPPFLAGS="-I/opt/homebrew/include" | |||
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1 | |||
macos_instance: | |||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest | |||
task: | |||
name: AppleM1/LLVM x86_64 xbuild | |||
compile_script: | |||
- #brew install llvm | |||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
- export ARCHS="i386 x86_64" | |||
- export ARCHS_STANDARD="i386 x86_64" | |||
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64" | |||
- export ARCHS_STANDARD_64_BIT=x86_64 | |||
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64" | |||
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64" | |||
- export VALID_ARCHS="i386 x86_64" | |||
- xcrun --sdk macosx --show-sdk-path | |||
- xcodebuild -version | |||
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX15.4.sdk -arch x86_64" | |||
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l" | |||
always: | |||
config_artifacts: | |||
path: "*conf*" | |||
type: text/plain | |||
# lib_artifacts: | |||
# path: "libopenblas*" | |||
# type: application/octet-streamm | |||
macos_instance: | |||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest | |||
task: | |||
name: AppleM1/LLVM armv8-ios xbuild | |||
compile_script: | |||
- #brew install llvm | |||
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH | |||
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib" | |||
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk -arch arm64 -miphoneos-version-min=10.0" | |||
- xcrun --sdk iphoneos --show-sdk-path | |||
- ls -l /Applications | |||
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1 | |||
always: | |||
config_artifacts: | |||
path: "*conf*" | |||
type: text/plain | |||
macos_instance: | |||
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest | |||
task: | |||
name: AppleM1/LLVM armv7-androidndk xbuild | |||
compile_script: | |||
- brew install --cask android-ndk | |||
- export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk" | |||
- export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang | |||
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l" | |||
always: | |||
config_artifacts: | |||
path: "*conf*" | |||
type: text/plain | |||
task: | |||
name: NeoverseN1 | |||
arm_container: | |||
image: node:latest | |||
compile_script: | |||
- make | |||
task: | |||
name: NeoverseN1-ILP64 | |||
arm_container: | |||
image: node:latest | |||
compile_script: | |||
- make INTERFACE64=1 | |||
task: | |||
name: NeoverseN1-OMP | |||
arm_container: | |||
image: node:latest | |||
cpu: 8 | |||
compile_script: | |||
- make USE_OPENMP=1 | |||
FreeBSD_task: | |||
name: FreeBSD-gcc | |||
freebsd_instance: | |||
image_family: freebsd-14-2 | |||
install_script: | |||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc | |||
compile_script: | |||
- ls -l /usr/local/lib | |||
- gmake CC=gcc | |||
FreeBSD_task: | |||
name: freebsd-gcc-ilp64 | |||
freebsd_instance: | |||
image_family: freebsd-14-2 | |||
install_script: | |||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc | |||
compile_script: | |||
- ls -l /usr/local/lib | |||
- gmake CC=gcc INTERFACE64=1 | |||
FreeBSD_task: | |||
name: FreeBSD-clang-openmp | |||
freebsd_instance: | |||
image_family: freebsd-14-2 | |||
install_script: | |||
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc | |||
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so | |||
compile_script: | |||
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1 | |||
#task: | |||
# name: Windows/LLVM16 --- too slow --- | |||
# windows_container: | |||
# image: cirrusci/windowsservercore:cmake-2021.12.07 | |||
# install_script: | |||
# - choco list --localonly | |||
# - choco install -y llvm | |||
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"' | |||
# - choco install -y ninja | |||
# - refreshenv | |||
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build" | |||
# - vcvarsall x64 | |||
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build" | |||
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release | |||
# - cd build | |||
# - cmake --build . | |||
# - ctest |
@@ -1,16 +0,0 @@ | |||
# Self-Hosted Github Action Runners on AWS via Cirun.io | |||
# Reference: https://docs.cirun.io/reference/yaml | |||
runners: | |||
- name: "aws-runner-graviton" | |||
# Cloud Provider: AWS | |||
cloud: "aws" | |||
region: "us-east-1" | |||
# Cheapest VM on AWS | |||
instance_type: "c7g.large" | |||
# Ubuntu-22.04, ami image | |||
machine_image: "ami-0a0c8eebcdd6dcbd0" | |||
preemptible: false | |||
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml | |||
# So that this runner is created for running the workflow | |||
labels: | |||
- "cirun-aws-runner-graviton" |
@@ -1,216 +0,0 @@ | |||
--- | |||
kind: pipeline | |||
name: arm64_gcc_make | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: gcc | |||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran perl | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
- make -C ctest $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
--- | |||
kind: pipeline | |||
name: arm32_gcc_make | |||
platform: | |||
os: linux | |||
arch: arm | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: gcc | |||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran perl | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
- make -C ctest $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
--- | |||
kind: pipeline | |||
name: arm64_clang_make | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: clang | |||
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran perl | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
- make -C ctest $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
--- | |||
kind: pipeline | |||
name: arm32_clang_cmake | |||
platform: | |||
os: linux | |||
arch: arm | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: clang | |||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV6 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON' | |||
commands: | |||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC g++ perl cmake | |||
- $CC --version | |||
- mkdir build && cd build | |||
- cmake $CMAKE_FLAGS .. | |||
- make -j | |||
- ctest -V | |||
--- | |||
kind: pipeline | |||
name: arm64_gcc_cmake | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: gcc | |||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON' | |||
commands: | |||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC g++ perl cmake | |||
- $CC --version | |||
- mkdir build && cd build | |||
- cmake $CMAKE_FLAGS .. | |||
- make -j | |||
- ctest -V | |||
--- | |||
kind: pipeline | |||
name: arm64_clang_cmake | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: clang | |||
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON' | |||
commands: | |||
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC g++ perl cmake | |||
- $CC --version | |||
- mkdir build && cd build | |||
- cmake $CMAKE_FLAGS .. | |||
- make -j | |||
- ctest -V | |||
--- | |||
kind: pipeline | |||
name: arm64_native_test | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: gcc | |||
COMMON_FLAGS: 'USE_OPENMP=1' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran perl python g++ | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
- make -C ctest $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
- make -C cpp_thread_test dgemm_tester | |||
--- | |||
kind: pipeline | |||
name: epyc_native_test | |||
platform: | |||
os: linux | |||
arch: amd64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:18.04 | |||
environment: | |||
CC: gcc | |||
COMMON_FLAGS: 'USE_OPENMP=1' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran perl python g++ | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
- make -C ctest $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
- make -C cpp_thread_test dgemm_tester | |||
--- | |||
kind: pipeline | |||
name: arm64_gcc10 | |||
platform: | |||
os: linux | |||
arch: arm64 | |||
steps: | |||
- name: Build and Test | |||
image: ubuntu:20.04 | |||
environment: | |||
CC: gcc-10 | |||
FC: gfortran-10 | |||
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1' | |||
commands: | |||
- echo "MAKE_FLAGS:= $COMMON_FLAGS" | |||
- apt-get update -y | |||
- apt-get install -y make $CC gfortran-10 perl python g++ | |||
- $CC --version | |||
- make QUIET_MAKE=1 $COMMON_FLAGS | |||
- make -C utest $COMMON_FLAGS | |||
- make -C test $COMMON_FLAGS | |||
@@ -1,156 +0,0 @@ | |||
name: apple m | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: macos-14 | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
build: [cmake, make] | |||
fortran: [gfortran] | |||
openmp: [0, 1] | |||
ilp64: [0, 1] | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Print system information | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
cat /proc/cpuinfo | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
sysctl -a | grep machdep.cpu | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Install Dependencies | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
sudo apt-get install -y gfortran cmake ccache libtinfo5 | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. | |||
brew reinstall gcc | |||
brew install coreutils ccache | |||
brew install llvm | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
# GNU make and cmake call the compilers differently. It looks like | |||
# that causes the cache to mismatch. Keep the ccache for both build | |||
# tools separate to avoid polluting each other. | |||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }} | |||
ccache-${{ runner.os }}-${{ matrix.build }} | |||
- name: Configure ccache | |||
run: | | |||
if [ "${{ matrix.build }}" = "make" ]; then | |||
# Add ccache to path | |||
if [ "$RUNNER_OS" = "Linux" ]; then | |||
echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
elif [ "$RUNNER_OS" = "macOS" ]; then | |||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH | |||
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH | |||
echo "" >>$GITHUB_PATH | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
fi | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Add gfortran runtime to link path | |||
if: matrix.build == 'make' && runner.os == 'macOS' | |||
run: | | |||
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) | |||
echo "Using gfortran runtime in $GFORTRAN_LIBDIR" | |||
echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV | |||
- name: Build OpenBLAS | |||
run: | | |||
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include" | |||
export CC="/opt/homebrew/opt/llvm/bin/clang" | |||
case "${{ matrix.build }}" in | |||
"make") | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}" | |||
;; | |||
"cmake") | |||
export LDFLAGS="$LDFLAGS -Wl,-ld_classic" | |||
mkdir build && cd build | |||
cmake -DDYNAMIC_ARCH=1 \ | |||
-DUSE_OPENMP=${{matrix.openmp}} \ | |||
-DOpenMP_Fortran_LIB_NAMES=omp \ | |||
-DINTERFACE64=${{matrix.ilp64}} \ | |||
-DNOFORTRAN=0 \ | |||
-DBUILD_WITHOUT_LAPACK=0 \ | |||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
cmake --build . | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Run tests | |||
timeout-minutes: 60 | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' | |||
echo "::group::Tests in 'test' directory" | |||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'ctest' directory" | |||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'utest' directory" | |||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
;; | |||
"cmake") | |||
cd build && ctest | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac |
@@ -1,140 +0,0 @@ | |||
name: arm64 graviton cirun | |||
on: | |||
push: | |||
branches: | |||
- develop | |||
- release-** | |||
pull_request: | |||
branches: | |||
- develop | |||
- release-** | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}" | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
fortran: [gfortran] | |||
build: [cmake, make] | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Print system information | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
cat /proc/cpuinfo | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Install Dependencies | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
sudo apt update | |||
sudo apt-get install -y gfortran cmake ccache libtinfo5 | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
# GNU make and cmake call the compilers differently. It looks like | |||
# that causes the cache to mismatch. Keep the ccache for both build | |||
# tools separate to avoid polluting each other. | |||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} | |||
ccache-${{ runner.os }}-${{ matrix.build }} | |||
- name: Configure ccache | |||
run: | | |||
if [ "${{ matrix.build }}" = "make" ]; then | |||
# Add ccache to path | |||
if [ "$RUNNER_OS" = "Linux" ]; then | |||
echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
fi | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Build OpenBLAS | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||
;; | |||
"cmake") | |||
mkdir build && cd build | |||
cmake -DDYNAMIC_ARCH=1 \ | |||
-DNOFORTRAN=0 \ | |||
-DBUILD_WITHOUT_LAPACK=0 \ | |||
-DBUILD_BFLOAT16=1 \ | |||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
cmake --build . | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Run tests | |||
timeout-minutes: 60 | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' | |||
echo "::group::Tests in 'test' directory" | |||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'ctest' directory" | |||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'utest' directory" | |||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
;; | |||
"cmake") | |||
cd build && ctest | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac |
@@ -1,158 +0,0 @@ | |||
name: c910v qemu test | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
TEST: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
env: | |||
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618 | |||
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: RISCV64_GENERIC | |||
triple: riscv64-linux-gnu | |||
apt_triple: riscv64-linux-gnu | |||
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC | |||
- target: C910V | |||
triple: riscv64-unknown-linux-gnu | |||
apt_triple: riscv64-linux-gnu | |||
opts: NO_SHARED=1 TARGET=C910V | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v4 | |||
- name: install build deps | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | |||
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross libglib2.0-dev | |||
- name: checkout qemu | |||
uses: actions/checkout@v4 | |||
with: | |||
repository: XUANTIE-RV/qemu | |||
path: qemu | |||
ref: e0ace167effcd36d1f82c7ccb4522b3126011479 # xuantie-qemu-9.0 | |||
- name: build qemu | |||
run: | | |||
# Force use c910v qemu-user | |||
wget https://github.com/revyos/qemu/commit/222729c7455784dd855216d7a2bec4bd8f2a6800.patch | |||
cd qemu | |||
patch -p1 < ../222729c7455784dd855216d7a2bec4bd8f2a6800.patch | |||
export CXXFLAGS="-Wno-error"; export CFLAGS="-Wno-error" | |||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system | |||
make -j$(nproc) | |||
make install | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: build OpenBLAS | |||
run: | | |||
wget ${xuetie_toolchain}/${toolchain_file_name} | |||
tar -xvf ${toolchain_file_name} -C /opt | |||
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH" | |||
make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) | |||
- name: test | |||
run: | | |||
run_with_retry() { | |||
local cmd="$1" | |||
local time_out=10 | |||
local retries=10 | |||
local attempt=0 | |||
for ((i=1; i<=retries; i++)); do | |||
attempt=$((i)) | |||
if timeout -s 12 --preserve-status $time_out $cmd; then | |||
echo "Command succeeded on attempt $i." | |||
return 0 | |||
else | |||
local exit_code=$? | |||
if [ $exit_code -eq 140 ]; then | |||
echo "Attempt $i timed out (retrying...)" | |||
time_out=$((time_out + 5)) | |||
else | |||
echo "Attempt $i failed with exit code $exit_code. Aborting workflow." | |||
exit $exit_code | |||
fi | |||
fi | |||
done | |||
echo "All $retries attempts failed, giving up." | |||
echo "Final failure was due to timeout." | |||
echo "Aborting workflow." | |||
exit $exit_code | |||
} | |||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH | |||
which qemu-riscv64 | |||
export QEMU_BIN=$(which qemu-riscv64) | |||
run_with_retry "$QEMU_BIN ./utest/openblas_utest" | |||
run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext" | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1 | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat |
@@ -1,157 +0,0 @@ | |||
name: Run codspeed benchmarks | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
benchmarks: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
os: [ubuntu-22.04] | |||
fortran: [gfortran] | |||
build: [make] | |||
pyver: ["3.12"] | |||
runs-on: ${{ matrix.os }} | |||
steps: | |||
- uses: actions/checkout@v3 | |||
- uses: actions/setup-python@v3 | |||
with: | |||
python-version: ${{ matrix.pyver }} | |||
- name: Print system information | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
cat /proc/cpuinfo | |||
fi | |||
- name: Install Dependencies | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
sudo apt-get update | |||
sudo apt-get install -y gfortran cmake ccache libtinfo5 | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
# GNU make and cmake call the compilers differently. It looks like | |||
# that causes the cache to mismatch. Keep the ccache for both build | |||
# tools separate to avoid polluting each other. | |||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} | |||
ccache-${{ runner.os }}-${{ matrix.build }} | |||
- name: Write out the .pc | |||
run: | | |||
cd benchmark/pybench | |||
cat > openblas.pc << EOF | |||
libdir=${{ github.workspace }} | |||
includedir= ${{ github.workspace }} | |||
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64 | |||
version=0.0.99 | |||
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas | |||
Name: openblas | |||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version | |||
Version: ${version} | |||
URL: https://github.com/xianyi/OpenBLAS | |||
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }} | |||
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas | |||
Cflags: -I${{ github.workspace}} | |||
EOF | |||
cat openblas.pc | |||
- name: Configure ccache | |||
run: | | |||
if [ "${{ matrix.build }}" = "make" ]; then | |||
# Add ccache to path | |||
if [ "$RUNNER_OS" = "Linux" ]; then | |||
echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
elif [ "$RUNNER_OS" = "macOS" ]; then | |||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
fi | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Build OpenBLAS | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||
;; | |||
"cmake") | |||
mkdir build && cd build | |||
cmake -DDYNAMIC_ARCH=1 \ | |||
-DNOFORTRAN=0 \ | |||
-DBUILD_WITHOUT_LAPACK=0 \ | |||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
cmake --build . | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Install benchmark dependencies | |||
run: pip install meson ninja numpy pytest pytest-codspeed --user | |||
- name: Build the wrapper | |||
run: | | |||
cd benchmark/pybench | |||
export PKG_CONFIG_PATH=$PWD | |||
meson setup build --prefix=$PWD/build-install | |||
meson install -C build | |||
# | |||
# sanity check | |||
cd build/openblas_wrap | |||
python -c'import _flapack; print(dir(_flapack))' | |||
- name: Run benchmarks under pytest-benchmark | |||
run: | | |||
cd benchmark/pybench | |||
pip install pytest-benchmark | |||
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/ | |||
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py -k 'gesdd' | |||
- name: Run benchmarks | |||
uses: CodSpeedHQ/action@v3 | |||
with: | |||
token: ${{ secrets.CODSPEED_TOKEN }} | |||
run: | | |||
cd benchmark/pybench | |||
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/ | |||
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed | |||
@@ -1,40 +0,0 @@ | |||
name: Publish docs via GitHub Pages | |||
on: | |||
push: | |||
branches: | |||
- develop | |||
pull_request: | |||
branches: | |||
- develop | |||
jobs: | |||
build: | |||
name: Deploy docs | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
steps: | |||
- uses: actions/checkout@v4 | |||
with: | |||
fetch-depth: 0 | |||
- uses: actions/setup-python@v5 | |||
with: | |||
python-version: "3.10" | |||
- name: Install MkDocs and doc theme packages | |||
run: pip install mkdocs mkdocs-material mkdocs-git-revision-date-localized-plugin mkdocs-mermaid2-plugin | |||
- name: Build docs site | |||
run: mkdocs build | |||
# mkdocs gh-deploy command only builds to the top-level, hence deploying | |||
# with this action instead. | |||
# Deploys to http://www.openmathlib.org/OpenBLAS/docs/ | |||
- name: Deploy docs | |||
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0 | |||
if: ${{ github.ref == 'refs/heads/develop' }} | |||
with: | |||
github_token: ${{ secrets.GITHUB_TOKEN }} | |||
publish_dir: ./site | |||
destination_dir: docs/ |
@@ -1,386 +0,0 @@ | |||
name: continuous build | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ${{ matrix.os }} | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
os: [ubuntu-latest, macos-latest] | |||
fortran: [gfortran, flang] | |||
build: [cmake, make] | |||
exclude: | |||
- os: macos-latest | |||
fortran: flang | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Print system information | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
cat /proc/cpuinfo | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
sysctl -a | grep machdep.cpu | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Install Dependencies | |||
run: | | |||
if [ "$RUNNER_OS" == "Linux" ]; then | |||
sudo apt-get update | |||
sudo apt-get install -y gfortran cmake ccache | |||
wget http://security.ubuntu.com/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb | |||
sudo apt install ./libtinfo5_6.3-2ubuntu0.1_amd64.deb | |||
elif [ "$RUNNER_OS" == "macOS" ]; then | |||
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed. | |||
brew reinstall gcc | |||
brew install coreutils ccache | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
# GNU make and cmake call the compilers differently. It looks like | |||
# that causes the cache to mismatch. Keep the ccache for both build | |||
# tools separate to avoid polluting each other. | |||
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }} | |||
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler. | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }} | |||
ccache-${{ runner.os }}-${{ matrix.build }} | |||
- name: Configure ccache | |||
run: | | |||
if [ "${{ matrix.build }}" = "make" ]; then | |||
# Add ccache to path | |||
if [ "$RUNNER_OS" = "Linux" ]; then | |||
echo "/usr/lib/ccache" >> $GITHUB_PATH | |||
elif [ "$RUNNER_OS" = "macOS" ]; then | |||
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH | |||
else | |||
echo "::error::$RUNNER_OS not supported" | |||
exit 1 | |||
fi | |||
fi | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Add gfortran runtime to link path | |||
if: matrix.build == 'make' && runner.os == 'macOS' | |||
run: | | |||
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname) | |||
echo "Using gfortran runtime in $GFORTRAN_LIBDIR" | |||
# Preserve whatever LDFLAGS may already contain | |||
echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV" | |||
- name: Build OpenBLAS | |||
run: | | |||
if [ "${{ matrix.fortran }}" = "flang" ]; then | |||
# download and install classic flang | |||
cd /usr/ | |||
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz | |||
sudo tar xf flang-20190329-x86-70.tgz | |||
sudo rm flang-20190329-x86-70.tgz | |||
cd - | |||
fi | |||
case "${{ matrix.build }}" in | |||
"make") | |||
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}" | |||
;; | |||
"cmake") | |||
mkdir build && cd build | |||
cmake -DDYNAMIC_ARCH=1 \ | |||
-DNOFORTRAN=0 \ | |||
-DBUILD_WITHOUT_LAPACK=0 \ | |||
-DCMAKE_VERBOSE_MAKEFILE=ON \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
cmake --build . | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Run tests | |||
timeout-minutes: 60 | |||
run: | | |||
case "${{ matrix.build }}" in | |||
"make") | |||
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0' | |||
echo "::group::Tests in 'test' directory" | |||
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'ctest' directory" | |||
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
echo "::group::Tests in 'utest' directory" | |||
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}" | |||
echo "::endgroup::" | |||
;; | |||
"cmake") | |||
cd build && ctest | |||
;; | |||
*) | |||
echo "::error::Configuration not supported" | |||
exit 1 | |||
;; | |||
esac | |||
msys2: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: windows-latest | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
msystem: [UCRT64, MINGW32, CLANG64] | |||
idx: [int32, int64] | |||
build-type: [Release] | |||
include: | |||
- msystem: UCRT64 | |||
idx: int32 | |||
target-prefix: mingw-w64-ucrt-x86_64 | |||
fc-pkg: fc | |||
- msystem: MINGW32 | |||
idx: int32 | |||
target-prefix: mingw-w64-i686 | |||
fc-pkg: fc | |||
- msystem: CLANG64 | |||
idx: int32 | |||
target-prefix: mingw-w64-clang-x86_64 | |||
fc-pkg: fc | |||
- msystem: UCRT64 | |||
idx: int64 | |||
idx64-flags: -DBINARY=64 -DINTERFACE64=1 | |||
target-prefix: mingw-w64-ucrt-x86_64 | |||
fc-pkg: fc | |||
- msystem: CLANG64 | |||
idx: int64 | |||
idx64-flags: -DBINARY=64 -DINTERFACE64=1 | |||
target-prefix: mingw-w64-clang-x86_64 | |||
fc-pkg: fc | |||
- msystem: UCRT64 | |||
idx: int32 | |||
target-prefix: mingw-w64-ucrt-x86_64 | |||
fc-pkg: fc | |||
build-type: None | |||
exclude: | |||
- msystem: MINGW32 | |||
idx: int64 | |||
defaults: | |||
run: | |||
# Use MSYS2 bash as default shell | |||
shell: msys2 {0} | |||
env: | |||
CHERE_INVOKING: 1 | |||
steps: | |||
- name: Get CPU name | |||
shell: pwsh | |||
run : | | |||
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name | |||
- name: Install build dependencies | |||
uses: msys2/setup-msys2@v2 | |||
with: | |||
msystem: ${{ matrix.msystem }} | |||
update: true | |||
release: false # Use pre-installed version | |||
install: >- | |||
base-devel | |||
${{ matrix.target-prefix }}-cc | |||
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }} | |||
${{ matrix.target-prefix }}-cmake | |||
${{ matrix.target-prefix }}-ninja | |||
${{ matrix.target-prefix }}-ccache | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Prepare ccache | |||
# Get cache location of ccache | |||
# Create key that is used in action/cache/restore and action/cache/save steps | |||
id: ccache-prepare | |||
run: | | |||
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT | |||
# We include the commit sha in the cache key, as new cache entries are | |||
# only created if there is no existing entry for the key yet. | |||
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT | |||
- name: Restore ccache | |||
uses: actions/cache/restore@v3 | |||
with: | |||
path: ${{ steps.ccache-prepare.outputs.ccachedir }} | |||
key: ${{ steps.ccache-prepare.outputs.key }} | |||
# Restore a matching ccache cache entry. Prefer same branch. | |||
restore-keys: | | |||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }} | |||
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }} | |||
- name: Configure ccache | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota. | |||
run: | | |||
which ccache | |||
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }} | |||
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf | |||
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf | |||
ccache -p | |||
ccache -s | |||
echo $HOME | |||
cygpath -w $HOME | |||
- name: Configure OpenBLAS | |||
run: | | |||
mkdir build && cd build | |||
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \ | |||
-DBUILD_SHARED_LIBS=ON \ | |||
-DBUILD_STATIC_LIBS=ON \ | |||
-DDYNAMIC_ARCH=ON \ | |||
-DUSE_THREAD=ON \ | |||
-DNUM_THREADS=64 \ | |||
-DTARGET=CORE2 \ | |||
${{ matrix.idx64-flags }} \ | |||
-DCMAKE_C_COMPILER_LAUNCHER=ccache \ | |||
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \ | |||
.. | |||
- name: Build OpenBLAS | |||
run: cd build && cmake --build . | |||
- name: Show ccache status | |||
continue-on-error: true | |||
run: ccache -s | |||
- name: Save ccache | |||
# Save the cache after we are done (successfully) building | |||
uses: actions/cache/save@v3 | |||
with: | |||
path: ${{ steps.ccache-prepare.outputs.ccachedir }} | |||
key: ${{ steps.ccache-prepare.outputs.key }} | |||
- name: Run tests | |||
id: run-ctest | |||
timeout-minutes: 60 | |||
run: cd build && ctest | |||
- name: Re-run tests | |||
if: always() && (steps.run-ctest.outcome == 'failure') | |||
timeout-minutes: 60 | |||
run: | | |||
cd build | |||
echo "::group::Re-run ctest" | |||
ctest --rerun-failed --output-on-failure || true | |||
echo "::endgroup::" | |||
echo "::group::Log from these tests" | |||
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log | |||
echo "::endgroup::" | |||
cross_build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-22.04 | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: mips64el | |||
triple: mips64el-linux-gnuabi64 | |||
opts: DYNAMIC_ARCH=1 TARGET=GENERIC | |||
- target: riscv64 | |||
triple: riscv64-linux-gnu | |||
opts: TARGET=RISCV64_GENERIC | |||
- target: mipsel | |||
triple: mipsel-linux-gnu | |||
opts: TARGET=MIPS1004K | |||
- target: alpha | |||
triple: alpha-linux-gnu | |||
opts: TARGET=EV4 | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Install Dependencies | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB). | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Build OpenBLAS | |||
run: | | |||
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }} | |||
neoverse_build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-24.04-arm | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Install Dependencies | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install -y gcc gfortran make | |||
- name: Build OpenBLAS | |||
run: | | |||
make -j${nproc} | |||
make -j${nproc} lapack-test | |||
@@ -1,37 +0,0 @@ | |||
name: harmonyos | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
env: | |||
OHOS_NDK_CMAKE: $GITHUB_WORKSPACE/ohos-sdk/linux/native/build-tools/cmake/bin/cmake | |||
COMMON_CMAKE_OPTIONS: | | |||
-DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/ohos-sdk/linux/native/build/cmake/ohos.toolchain.cmake \ | |||
-DCMAKE_INSTALL_PREFIX=install \ | |||
-DCMAKE_BUILD_TYPE=Release \ | |||
steps: | |||
- uses: actions/checkout@v4 | |||
- name: ndk-install | |||
run: | | |||
wget https://repo.huaweicloud.com/harmonyos/os/4.1.1-Release/ohos-sdk-windows_linux-public.tar.gz | |||
tar -xf ohos-sdk-windows_linux-public.tar.gz | |||
cd ohos-sdk/linux | |||
unzip -q native-linux-x64-4.1.7.8-Release.zip | |||
cd - | |||
- name: build-armv8 | |||
run: | | |||
mkdir build && cd build | |||
${{ env.OHOS_NDK_CMAKE }} ${{ env.COMMON_CMAKE_OPTIONS }} -DOHOS_ARCH="arm64-v8a" \ | |||
-DTARGET=ARMV8 -DNOFORTRAN=1 .. | |||
${{ env.OHOS_NDK_CMAKE }} --build . -j $(nproc) | |||
@@ -1,119 +0,0 @@ | |||
name: loongarch64 qemu test | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
jobs: | |||
TEST: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-24.04 | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: LOONGSONGENERIC | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC | |||
- target: LOONGSON3R5 | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5 | |||
- target: LOONGSON2K1000 | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 | |||
- target: LA64_GENERIC | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC | |||
- target: LA464 | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464 | |||
- target: LA264 | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264 | |||
- target: DYNAMIC_ARCH | |||
triple: loongarch64-linux-gnu | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Install APT deps | |||
run: | | |||
sudo apt-get update && \ | |||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache qemu-user-static \ | |||
gcc-14-loongarch64-linux-gnu g++-14-loongarch64-linux-gnu gfortran-14-loongarch64-linux-gnu | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Disable utest dsdot:dsdot_n_1 | |||
run: | | |||
echo -n > utest/test_dsdot.c | |||
echo "Due to the current version of qemu causing utest cases to fail," | |||
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled." | |||
- name: Build OpenBLAS | |||
run: | | |||
make CC='ccache ${{ matrix.triple }}-gcc-14 -static' FC='ccache ${{ matrix.triple }}-gfortran-14 -static' \ | |||
RANLIB='ccache ${{ matrix.triple }}-gcc-ranlib-14' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) | |||
- name: Test | |||
run: | | |||
qemu-loongarch64-static ./utest/openblas_utest | |||
qemu-loongarch64-static ./utest/openblas_utest_ext | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat |
@@ -1,141 +0,0 @@ | |||
name: loongarch64 clang qemu test | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
jobs: | |||
TEST: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: LOONGSONGENERIC | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC | |||
- target: LOONGSON3R5 | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5 | |||
- target: LOONGSON2K1000 | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000 | |||
- target: LA64_GENERIC | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC | |||
- target: LA464 | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464 | |||
- target: LA264 | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264 | |||
- target: DYNAMIC_ARCH | |||
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Install libffi6 | |||
run: | | |||
wget https://download.nvidia.com/cumulus/apt.cumulusnetworks.com/pool/upstream/libf/libffi/libffi6_3.2.1-9_amd64.deb | |||
sudo dpkg -i libffi6_3.2.1-9_amd64.deb | |||
- name: Install APT deps | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache libglib2.0-dev | |||
- name: Download and install loongarch64-toolchain | |||
run: | | |||
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz | |||
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz | |||
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt | |||
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt | |||
- name: Checkout qemu | |||
uses: actions/checkout@v3 | |||
with: | |||
repository: qemu/qemu | |||
path: qemu | |||
ref: master | |||
- name: Install qemu | |||
run: | | |||
cd qemu | |||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static | |||
make -j$(nproc) | |||
make install | |||
- name: Set env | |||
run: | | |||
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: Disable utest dsdot:dsdot_n_1 | |||
run: | | |||
echo -n > utest/test_dsdot.c | |||
echo "Due to the qemu versions 7.2 causing utest cases to fail," | |||
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled." | |||
- name: Build OpenBLAS | |||
run: make CC='ccache clang --target=loongarch64-linux-gnu --sysroot=/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/loongarch64-linux-gnu/sysroot/ -static' FC='ccache loongarch64-linux-gnu-gfortran -static' HOSTCC='ccache clang' CROSS_SUFFIX=llvm- NO_SHARED=1 ${{ matrix.opts }} -j$(nproc) | |||
- name: Test | |||
run: | | |||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
qemu-loongarch64 ./utest/openblas_utest | |||
qemu-loongarch64 ./utest/openblas_utest_ext | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
@@ -1,123 +0,0 @@ | |||
name: mips64 qemu test | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
TEST: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: MIPS64_GENERIC | |||
triple: mips64el-linux-gnuabi64 | |||
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC | |||
- target: SICORTEX | |||
triple: mips64el-linux-gnuabi64 | |||
opts: NO_SHARED=1 TARGET=SICORTEX | |||
- target: I6400 | |||
triple: mipsisa64r6el-linux-gnuabi64 | |||
opts: NO_SHARED=1 TARGET=I6400 | |||
- target: P6600 | |||
triple: mipsisa64r6el-linux-gnuabi64 | |||
opts: NO_SHARED=1 TARGET=P6600 | |||
- target: I6500 | |||
triple: mipsisa64r6el-linux-gnuabi64 | |||
opts: NO_SHARED=1 TARGET=I6500 | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: install build deps | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | |||
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross libglib2.0-dev | |||
- name: checkout qemu | |||
uses: actions/checkout@v3 | |||
with: | |||
repository: qemu/qemu | |||
path: qemu | |||
ref: ae35f033b874c627d81d51070187fbf55f0bf1a7 | |||
- name: build qemu | |||
run: | | |||
cd qemu | |||
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system | |||
make -j$(nproc) | |||
make install | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: build OpenBLAS | |||
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc) | |||
- name: test | |||
run: | | |||
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
qemu-mips64el ./utest/openblas_utest | |||
qemu-mips64el ./utest/openblas_utest_ext | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1 | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1 | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT2.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat | |||
rm -f ./test/?BLAT3.SUMM | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat | |||
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat |
@@ -1,90 +0,0 @@ | |||
# Only the "head" branch of the OpenBLAS package is tested | |||
on: | |||
push: | |||
paths: | |||
- '**/nightly-Homebrew-build.yml' | |||
pull_request: | |||
branches: | |||
- develop | |||
paths: | |||
- '**/nightly-Homebrew-build.yml' | |||
schedule: | |||
- cron: 45 7 * * * | |||
# This is 7:45 AM UTC daily, late at night in the USA | |||
# Since push and pull_request will still always be building and testing the `develop` branch, | |||
# it only makes sense to test if this file has been changed | |||
name: Nightly-Homebrew-Build | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build-OpenBLAS-with-Homebrew: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: macos-latest | |||
env: | |||
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer | |||
HOMEBREW_DEVELOPER: "ON" | |||
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON" | |||
HOMEBREW_NO_ANALYTICS: "ON" | |||
HOMEBREW_NO_AUTO_UPDATE: "ON" | |||
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON" | |||
HOMEBREW_NO_INSTALL_CLEANUP: "ON" | |||
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON" | |||
HOMEBREW_NO_INSTALL_FROM_API: "ON" | |||
steps: | |||
- name: Random delay for cron job | |||
run: | | |||
delay=$(( RANDOM % 600 )) | |||
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}" | |||
sleep ${delay} | |||
if: github.event_name == 'schedule' | |||
- uses: actions/checkout@v2 | |||
# This isn't even needed, technically. Homebrew will get `develop` via git | |||
- name: Update Homebrew | |||
if: github.event_name != 'pull_request' | |||
run: brew update || true | |||
- name: Install prerequisites | |||
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas | |||
- name: Install and bottle OpenBLAS | |||
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas | |||
# the HEAD flags tell Homebrew to build the develop branch fetch via git | |||
- name: Create bottle | |||
run: | | |||
brew bottle -v openblas | |||
mkdir bottles | |||
mv *.bottle.tar.gz bottles | |||
- name: Upload bottle | |||
uses: actions/upload-artifact@v4 | |||
with: | |||
name: openblas--HEAD.catalina.bottle.tar.gz | |||
path: bottles | |||
- name: Show linkage | |||
run: brew linkage -v openblas | |||
- name: Test openblas | |||
run: brew test --HEAD --verbose openblas | |||
- name: Audit openblas formula | |||
run: | | |||
brew audit --strict openblas | |||
brew cat openblas | |||
- name: Post logs on failure | |||
if: failure() | |||
run: brew gist-logs --with-hostname -v openblas |
@@ -1,256 +0,0 @@ | |||
name: riscv64 zvl256b qemu test | |||
on: [push, pull_request] | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
TEST: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: ubuntu-latest | |||
env: | |||
triple: riscv64-unknown-linux-gnu | |||
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain | |||
riscv_gnu_toolchain_version: 13.2.0 | |||
riscv_gnu_toolchain_nightly_download_path: /releases/download/2025.08.29/riscv64-glibc-ubuntu-22.04-llvm-nightly-2025.08.29-nightly.tar.xz | |||
strategy: | |||
fail-fast: false | |||
matrix: | |||
include: | |||
- target: RISCV64_ZVL128B | |||
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64 | |||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64 | |||
- target: RISCV64_ZVL256B | |||
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64 | |||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64 | |||
- target: DYNAMIC_ARCH=1 | |||
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1 | |||
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64 | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: install build deps | |||
run: | | |||
sudo apt-get update | |||
sudo apt-get install autoconf automake autotools-dev ninja-build make \ | |||
libgomp1-riscv64-cross ccache | |||
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path} | |||
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt | |||
- name: Compilation cache | |||
uses: actions/cache@v3 | |||
with: | |||
path: ~/.ccache | |||
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }} | |||
restore-keys: | | |||
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }} | |||
ccache-${{ runner.os }}-${{ matrix.target }} | |||
- name: Configure ccache | |||
run: | | |||
test -d ~/.ccache || mkdir -p ~/.ccache | |||
echo "max_size = 300M" > ~/.ccache/ccache.conf | |||
echo "compression = true" >> ~/.ccache/ccache.conf | |||
ccache -s | |||
- name: build OpenBLAS libs | |||
run: | | |||
export PATH="/opt/riscv/bin:$PATH" | |||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ | |||
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \ | |||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ | |||
RANLIB='ccache ${triple}-ranlib' \ | |||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ | |||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) | |||
- name: build OpenBLAS tests | |||
run: | | |||
export PATH="/opt/riscv/bin:$PATH" | |||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ | |||
CC='${triple}-gcc' \ | |||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ | |||
RANLIB='ccache ${triple}-ranlib' \ | |||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ | |||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests | |||
- name: build lapack-netlib tests | |||
working-directory: ./lapack-netlib/TESTING | |||
run: | | |||
export PATH="/opt/riscv/bin:$PATH" | |||
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \ | |||
CC='${triple}-gcc' \ | |||
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \ | |||
RANLIB='ccache ${triple}-ranlib' \ | |||
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \ | |||
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \ | |||
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \ | |||
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \ | |||
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \ | |||
- name: OpenBLAS tests | |||
shell: bash | |||
run: | | |||
export PATH="/opt/riscv/bin:$PATH" | |||
export QEMU_CPU=${{ matrix.qemu_cpu }} | |||
rm -rf ./test_out | |||
mkdir -p ./test_out | |||
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \ | |||
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \ | |||
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \ | |||
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \ | |||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \ | |||
} | |||
run_test test cblat1 & | |||
run_test test cblat2 cblat2.dat & | |||
run_test test cblat3 cblat3.dat & | |||
run_test test dblat1 & | |||
run_test test dblat2 dblat2.dat & | |||
run_test test dblat3 dblat3.dat & | |||
run_test test sblat1 & | |||
run_test test sblat2 sblat2.dat & | |||
run_test test sblat3 sblat3.dat & | |||
run_test test zblat1 & | |||
run_test test zblat2 zblat2.dat & | |||
run_test test zblat3 zblat3.dat & | |||
run_test ctest xccblat1 & | |||
run_test ctest xccblat2 cin2 & | |||
run_test ctest xccblat3 cin3 & | |||
run_test ctest xdcblat1 & | |||
run_test ctest xdcblat2 din2 & | |||
run_test ctest xdcblat3 din3 & | |||
run_test ctest xscblat1 & | |||
run_test ctest xscblat2 sin2 & | |||
run_test ctest xscblat3 sin3 & | |||
run_test ctest xzcblat1 & | |||
run_test ctest xzcblat2 zin2 & | |||
run_test ctest xzcblat3 zin3 & | |||
wait | |||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*) | |||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi | |||
- name: netlib tests | |||
shell: bash | |||
run: | | |||
: # these take a very long time | |||
echo "Skipping netlib tests in CI" | |||
exit 0 | |||
: # comment out exit above to enable the tests | |||
: # probably we want to identify a subset to run in CI | |||
export PATH="/opt/riscv/bin:$PATH" | |||
export QEMU_CPU=${{ matrix.qemu_cpu }} | |||
rm -rf ./test_out | |||
mkdir -p ./test_out | |||
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \ | |||
echo "$4" >> $OUTPUT; \ | |||
echo "$CMD" >> $OUTPUT; \ | |||
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \ | |||
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \ | |||
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \ | |||
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \ | |||
} | |||
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" & | |||
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" & | |||
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" & | |||
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" & | |||
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" & | |||
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" & | |||
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" & | |||
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" & | |||
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" & | |||
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" & | |||
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & | |||
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" & | |||
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" & | |||
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" & | |||
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" & | |||
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" & | |||
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" & | |||
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" & | |||
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" & | |||
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" & | |||
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" & | |||
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" & | |||
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" & | |||
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" & | |||
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" & | |||
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" & | |||
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" & | |||
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" & | |||
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & | |||
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" & | |||
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" & | |||
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" & | |||
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" & | |||
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" & | |||
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" & | |||
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" & | |||
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" & | |||
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" & | |||
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" & | |||
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" & | |||
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" & | |||
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" & | |||
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" & | |||
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" & | |||
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" & | |||
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" & | |||
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & | |||
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" & | |||
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" & | |||
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" & | |||
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" & | |||
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" & | |||
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" & | |||
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" & | |||
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" & | |||
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" & | |||
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" & | |||
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" & | |||
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" & | |||
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" & | |||
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" & | |||
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" & | |||
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" & | |||
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" & | |||
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" & | |||
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" & | |||
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" & | |||
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" & | |||
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" & | |||
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" & | |||
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" & | |||
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" & | |||
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" & | |||
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" & | |||
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" & | |||
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" & | |||
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" & | |||
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" & | |||
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" & | |||
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" & | |||
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" & | |||
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" & | |||
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" & | |||
wait | |||
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*) | |||
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary | |||
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)" | |||
NUMERICAL_ERRORS=-1 | |||
OTHER_ERRORS=-1 | |||
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary | |||
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi | |||
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi |
@@ -1,84 +0,0 @@ | |||
name: Windows ARM64 CI | |||
on: | |||
push: | |||
branches: | |||
- develop | |||
pull_request: | |||
branches: | |||
- develop | |||
concurrency: | |||
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} | |||
cancel-in-progress: true | |||
permissions: | |||
contents: read # to fetch code (actions/checkout) | |||
jobs: | |||
build: | |||
if: "github.repository == 'OpenMathLib/OpenBLAS'" | |||
runs-on: windows-11-arm | |||
steps: | |||
- name: Checkout repository | |||
uses: actions/checkout@v3 | |||
- name: Install LLVM for Win-ARM64 | |||
shell: pwsh | |||
run: | | |||
Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe | |||
Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait | |||
echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append | |||
#dir "C:\Program Files\LLVM\include\flang" | |||
#rmdir /Q /S "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/ARM64" | |||
- name: Install CMake and Ninja for Win-ARM64 | |||
shell: pwsh | |||
run: | | |||
Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi | |||
Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait | |||
echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH | |||
Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip | |||
Expand-Archive ninja-winarm64.zip -DestinationPath ninja | |||
Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32" | |||
- name: Configure OpenBLAS | |||
shell: cmd | |||
run: | | |||
CALL "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsarm64.bat" | |||
set PATH=C:\Program Files\LLVM\bin;%PATH% | |||
mkdir build | |||
cd build | |||
cmake .. -G Ninja ^ | |||
-DCMAKE_BUILD_TYPE=Release ^ | |||
-DTARGET=ARMV8 ^ | |||
-DBINARY=64 ^ | |||
-DCMAKE_C_COMPILER=clang-cl ^ | |||
-DCMAKE_Fortran_COMPILER=flang-new ^ | |||
-DBUILD_SHARED_LIBS=ON ^ | |||
-DCMAKE_SYSTEM_PROCESSOR=arm64 ^ | |||
-DCMAKE_SYSTEM_NAME=Windows ^ | |||
-DCMAKE_INSTALL_PREFIX=C:/opt | |||
- name: Build OpenBLAS | |||
shell: cmd | |||
run: | | |||
cd build | |||
ninja -j16 | |||
- name: Install OpenBLAS | |||
shell: cmd | |||
run: | | |||
cd build | |||
cmake --install . | |||
- name: Run ctests | |||
shell: pwsh | |||
run: | | |||
$env:PATH = "C:\opt\bin;$env:PATH" | |||
cd build | |||
ctest | |||
@@ -1,118 +0,0 @@ | |||
*.obj | |||
*.lib | |||
*.dll | |||
*.dylib | |||
*.def | |||
*.o | |||
*.out | |||
*.tmp | |||
lapack-3.1.1 | |||
lapack-3.1.1.tgz | |||
lapack-3.4.1 | |||
lapack-3.4.1.tgz | |||
lapack-3.4.2 | |||
lapack-3.4.2.tgz | |||
lapack-netlib/make.inc | |||
lapack-netlib/SRC/la_constants.mod | |||
lapack-netlib/SRC/la_xisnan.mod | |||
lapack-netlib/TESTING/testing_results.txt | |||
lapack-netlib/INSTALL/test* | |||
lapack-netlib/TESTING/xeigtstc | |||
lapack-netlib/TESTING/xeigtstd | |||
lapack-netlib/TESTING/xeigtsts | |||
lapack-netlib/TESTING/xeigtstz | |||
lapack-netlib/TESTING/xlintstc | |||
lapack-netlib/TESTING/xlintstd | |||
lapack-netlib/TESTING/xlintstds | |||
lapack-netlib/TESTING/xlintstrfc | |||
lapack-netlib/TESTING/xlintstrfd | |||
lapack-netlib/TESTING/xlintstrfs | |||
lapack-netlib/TESTING/xlintstrfz | |||
lapack-netlib/TESTING/xlintsts | |||
lapack-netlib/TESTING/xlintstz | |||
lapack-netlib/TESTING/xlintstzc | |||
*.so | |||
*.so.* | |||
*.a | |||
.svn | |||
*~ | |||
lib.grd | |||
nohup.out | |||
config.h | |||
config_kernel.h | |||
Makefile.conf | |||
Makefile.conf_last | |||
Makefile_kernel.conf | |||
config_last.h | |||
getarch | |||
getarch_2nd | |||
utest/openblas_utest | |||
utest/openblas_utest_ext | |||
ctest/xccblat1 | |||
ctest/xccblat2 | |||
ctest/xccblat3 | |||
ctest/xccblat3_3m | |||
ctest/xdcblat1 | |||
ctest/xdcblat2 | |||
ctest/xdcblat3 | |||
ctest/xdcblat3_3m | |||
ctest/xscblat1 | |||
ctest/xscblat2 | |||
ctest/xscblat3 | |||
ctest/xscblat3_3m | |||
ctest/xzcblat1 | |||
ctest/xzcblat2 | |||
ctest/xzcblat3 | |||
ctest/xzcblat3_3m | |||
exports/linktest.c | |||
exports/linux.def | |||
kernel/setparam_*.c | |||
kernel/kernel_*.h | |||
test/CBLAT2.SUMM | |||
test/CBLAT3.SUMM | |||
test/CBLAT3_3M.SUMM | |||
test/DBLAT2.SUMM | |||
test/DBLAT3.SUMM | |||
test/DBLAT3_3M.SUMM | |||
test/SBLAT2.SUMM | |||
test/SBLAT3.SUMM | |||
test/SBLAT3_3M.SUMM | |||
test/ZBLAT2.SUMM | |||
test/ZBLAT3.SUMM | |||
test/ZBLAT3_3M.SUMM | |||
test/SHBLAT3.SUMM | |||
test/SBBLAT2.SUMM | |||
test/SBBLAT3.SUMM | |||
test/BBLAT2.SUMM | |||
test/BBLAT3.SUMM | |||
test/cblat1 | |||
test/cblat2 | |||
test/cblat3 | |||
test/cblat3_3m | |||
test/dblat1 | |||
test/dblat2 | |||
test/dblat3 | |||
test/dblat3_3m | |||
test/sblat1 | |||
test/sblat2 | |||
test/sblat3 | |||
test/sblat3_3m | |||
test/test_shgemm | |||
test/test_sbgemm | |||
test/test_sbgemv | |||
test/test_bgemm | |||
test/test_bgemv | |||
test/zblat1 | |||
test/zblat2 | |||
test/zblat3 | |||
test/zblat3_3m | |||
build | |||
build.* | |||
*.swp | |||
benchmark/*.goto | |||
benchmark/smallscaling | |||
.vscode | |||
CMakeCache.txt | |||
CMakeFiles/* | |||
.vscode | |||
**/__pycache__ |
@@ -1,320 +0,0 @@ | |||
# XXX: Precise is already deprecated, new default is Trusty. | |||
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming | |||
dist: focal | |||
sudo: true | |||
language: c | |||
matrix: | |||
include: | |||
- &test-ubuntu | |||
# os: linux | |||
compiler: gcc | |||
addons: | |||
apt: | |||
packages: | |||
- gfortran | |||
# before_script: &common-before | |||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" | |||
# script: | |||
# - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# - make -C test $COMMON_FLAGS $BTYPE | |||
# - make -C ctest $COMMON_FLAGS $BTYPE | |||
# - make -C utest $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - TARGET_BOX=LINUX64 | |||
# - BTYPE="BINARY=64" | |||
# | |||
# - <<: *test-ubuntu | |||
os: linux | |||
arch: ppc64le | |||
before_script: &common-before | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32" | |||
script: | |||
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
- make -C test $COMMON_FLAGS $BTYPE | |||
- make -C ctest $COMMON_FLAGS $BTYPE | |||
- make -C utest $COMMON_FLAGS $BTYPE | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=PPC64LE_LINUX | |||
- BTYPE="BINARY=64 USE_OPENMP=1" | |||
- <<: *test-ubuntu | |||
os: linux | |||
arch: s390x | |||
before_script: | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32" | |||
- sudo apt-get install --only-upgrade binutils | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=IBMZ_LINUX | |||
- BTYPE="BINARY=64 USE_OPENMP=1" | |||
- <<: *test-ubuntu | |||
os: linux | |||
dist: focal | |||
arch: s390x | |||
compiler: clang | |||
before_script: | |||
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32" | |||
- sudo apt-get install --only-upgrade binutils | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=IBMZ_LINUX | |||
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang" | |||
# - <<: *test-ubuntu | |||
# env: | |||
# - TARGET_BOX=LINUX64 | |||
# - BTYPE="BINARY=64 USE_OPENMP=1" | |||
# | |||
# - <<: *test-ubuntu | |||
# env: | |||
# - TARGET_BOX=LINUX64 | |||
# - BTYPE="BINARY=64 INTERFACE64=1" | |||
# | |||
# - <<: *test-ubuntu | |||
# compiler: clang | |||
# env: | |||
# - TARGET_BOX=LINUX64 | |||
# - BTYPE="BINARY=64 CC=clang" | |||
# | |||
# - <<: *test-ubuntu | |||
# compiler: clang | |||
# env: | |||
# - TARGET_BOX=LINUX64 | |||
# - BTYPE="BINARY=64 INTERFACE64=1 CC=clang" | |||
# | |||
# - <<: *test-ubuntu | |||
# addons: | |||
# apt: | |||
# packages: | |||
# - gcc-multilib | |||
# - gfortran-multilib | |||
# env: | |||
# - TARGET_BOX=LINUX32 | |||
# - BTYPE="BINARY=32" | |||
# | |||
- os: linux | |||
arch: ppc64le | |||
dist: bionic | |||
compiler: gcc | |||
before_script: | |||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y | |||
- sudo apt-get update | |||
- sudo apt-get install gcc-9 gfortran-9 -y | |||
script: | |||
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 | |||
- make -C test $COMMON_FLAGS $BTYPE | |||
- make -C ctest $COMMON_FLAGS $BTYPE | |||
- make -C utest $COMMON_FLAGS $BTYPE | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=PPC64LE_LINUX_P9 | |||
- os: linux | |||
arch: ppc64le | |||
dist: bionic | |||
compiler: gcc | |||
before_script: | |||
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y | |||
- sudo apt-get update | |||
- sudo apt-get install gcc-9 gfortran-9 -y | |||
script: | |||
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9 | |||
- make -C test $COMMON_FLAGS $BTYPE | |||
- make -C ctest $COMMON_FLAGS $BTYPE | |||
- make -C utest $COMMON_FLAGS $BTYPE | |||
env: | |||
# for matrix annotation only | |||
- TARGET_BOX=PPC64LE_LINUX_P9 | |||
# - os: linux | |||
# compiler: gcc | |||
# addons: | |||
# apt: | |||
# packages: | |||
# - binutils-mingw-w64-x86-64 | |||
# - gcc-mingw-w64-x86-64 | |||
# - gfortran-mingw-w64-x86-64 | |||
# before_script: *common-before | |||
# script: | |||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - TARGET_BOX=WIN64 | |||
# - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" | |||
# | |||
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc. | |||
# These jobs needs sudo, so Travis runs them on VM-based infrastructure | |||
# which is slower than container-based infrastructure used for jobs | |||
# that don't require sudo. | |||
# - &test-alpine | |||
# os: linux | |||
# dist: trusty | |||
# sudo: true | |||
# language: minimal | |||
# before_install: | |||
# - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \ | |||
# && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1" | |||
# - alpine() { /alpine/enter-chroot -u "$USER" "$@"; } | |||
# install: | |||
# - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers' | |||
# before_script: *common-before | |||
# script: | |||
# # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size. | |||
# - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types" | |||
# - alpine make -C test $COMMON_FLAGS $BTYPE | |||
# - alpine make -C ctest $COMMON_FLAGS $BTYPE | |||
# - alpine make -C utest $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - TARGET_BOX=LINUX64_MUSL | |||
# - BTYPE="BINARY=64" | |||
# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS, | |||
# but only on Travis CI, cannot reproduce it elsewhere. | |||
#- &test-alpine-openmp | |||
# <<: *test-alpine | |||
# env: | |||
# - TARGET_BOX=LINUX64_MUSL | |||
# - BTYPE="BINARY=64 USE_OPENMP=1" | |||
# - <<: *test-alpine | |||
# env: | |||
# - TARGET_BOX=LINUX64_MUSL | |||
# - BTYPE="BINARY=64 INTERFACE64=1" | |||
# | |||
# # Build with the same flags as Alpine do in OpenBLAS package. | |||
# - <<: *test-alpine | |||
# env: | |||
# - TARGET_BOX=LINUX64_MUSL | |||
# - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2" | |||
# - &test-cmake | |||
# os: linux | |||
# compiler: clang | |||
# addons: | |||
# apt: | |||
# packages: | |||
# - gfortran | |||
# - cmake | |||
# dist: trusty | |||
# sudo: true | |||
# before_script: | |||
# - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32" | |||
# script: | |||
# - mkdir build | |||
# - CONFIG=Release | |||
# - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG | |||
# - cmake --build build --config $CONFIG -- -j2 | |||
# env: | |||
# - CMAKE=1 | |||
# - <<: *test-cmake | |||
# env: | |||
# - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1" | |||
# - <<: *test-cmake | |||
# compiler: gcc | |||
# env: | |||
# - CMAKE=1 | |||
# - &test-macos | |||
# os: osx | |||
# osx_image: xcode11.5 | |||
# before_script: | |||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | |||
# script: | |||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9" | |||
# | |||
# - <<: *test-macos | |||
# osx_image: xcode12 | |||
# before_script: | |||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | |||
# - brew update | |||
# script: | |||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10" | |||
# | |||
# - <<: *test-macos | |||
# osx_image: xcode12 | |||
# before_script: | |||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | |||
# - brew update | |||
# script: | |||
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | |||
# env: | |||
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" | |||
# - <<: *test-macos | |||
# osx_image: xcode10 | |||
# env: | |||
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1" | |||
# - <<: *test-macos | |||
# osx_image: xcode11.5 | |||
# before_script: | |||
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | |||
# - brew update | |||
# env: | |||
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0" | |||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0" | |||
# - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1" | |||
# - <<: *test-macos | |||
# osx_image: xcode11.5 | |||
# env: | |||
## - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||
## - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1" | |||
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1" | |||
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" | |||
- &test-neoversen1 | |||
os: linux | |||
arch: arm64 | |||
dist: focal | |||
group: edge | |||
virt: lxd | |||
compiler: gcc | |||
addons: | |||
apt: | |||
packages: | |||
- gfortran | |||
script: | |||
- travis_wait 45 make && make lapack-test | |||
env: | |||
- TARGET_BOX=NEOVERSE_N1 | |||
- &test-neon1-gcc8 | |||
os: linux | |||
arch: arm64 | |||
dist: focal | |||
group: edge | |||
virt: lxd | |||
compiler: gcc | |||
addons: | |||
apt: | |||
packages: | |||
- gcc-8 | |||
- gfortran-8 | |||
script: | |||
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1 | |||
env: | |||
- TARGET_BOX=NEOVERSE_N1-GCC8 | |||
# whitelist | |||
branches: | |||
only: | |||
- master | |||
- develop | |||
notifications: | |||
webhooks: | |||
urls: | |||
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344 | |||
on_success: change # options: [always|never|change] default: always | |||
on_failure: always # options: [always|never|change] default: always | |||
on_start: never # options: [always|never|change] default: always |
@@ -1,43 +0,0 @@ | |||
Thank you for the support. | |||
### [2019.12/2021.9] [Chan-Zuckerberg Foundation EOSS Initiative](https://chanzuckerberg.com/eoss/) | |||
Between December 2019 and September 2021, development and maintaining of OpenBLAS was funded in part by the Chan-Zuckerberg Foundation in the context of two grants awarded to the NumPy Foundation and managed by NumFocus (Cycles 1 and 3 of the Essential Open Source Software for Science (EOSS) Initiative of the Chan-Zuckerberg Foundation) | |||
### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project) | |||
https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges | |||
In chronological order: | |||
* aeberspaecher | |||
* fmolina | |||
* saullocastro | |||
* xianyi | |||
* cuda | |||
* carter | |||
* StefanKarpinski | |||
* staticfloat | |||
* sebastien-villemot | |||
* JeffBezanson | |||
* ihnorton | |||
* simonp0420 | |||
* andrioni | |||
* Tim Holy | |||
* ivarne | |||
* johnmyleswhite | |||
* traz | |||
* Jean-Francis Roy | |||
* bkalpert | |||
* Anirban | |||
* pgermain | |||
* alexandre.lacoste.18 | |||
* foges | |||
* ssam | |||
* WestleyArgentum | |||
* daniebmariani | |||
* pjpuglia | |||
* albarrentine | |||
* Alexander Vogt | |||
@@ -1,729 +0,0 @@ | |||
## | |||
## Author: Hank Anderson <hank@statease.com> | |||
## | |||
cmake_minimum_required(VERSION 3.16.0) | |||
set (CMAKE_ASM_SOURCE_FILE_EXTENSIONS "S") | |||
project(OpenBLAS C ASM) | |||
set(OpenBLAS_MAJOR_VERSION 0) | |||
set(OpenBLAS_MINOR_VERSION 3) | |||
set(OpenBLAS_PATCH_VERSION 30.dev) | |||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | |||
# Adhere to GNU filesystem layout conventions | |||
include(GNUInstallDirs) | |||
include(CMakePackageConfigHelpers) | |||
####### | |||
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF) | |||
option(BUILD_WITHOUT_LAPACKE "Do not build the C interface to LAPACK)" OFF) | |||
option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON) | |||
set(LAPACK_STRLEN "" CACHE STRING "When building LAPACK, use this type (e.g. \"int\") for character lengths (defaults to size_t)") | |||
option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON) | |||
option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF) | |||
option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF) | |||
option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF) | |||
option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64, ppc or RISCV64-RVV1.0 only)" OFF) | |||
option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF) | |||
option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF) | |||
option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF) | |||
option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF) | |||
option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON) | |||
option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF) | |||
set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" ) | |||
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" ) | |||
if(${CMAKE_SYSTEM_NAME} MATCHES "Linux") | |||
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON) | |||
else() | |||
set(NO_AFFINITY 1) | |||
endif() | |||
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF) | |||
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF) | |||
option(BUILD_STATIC_LIBS "Build static library" OFF) | |||
option(BUILD_SHARED_LIBS "Build shared library" OFF) | |||
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS) | |||
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE) | |||
endif() | |||
if((BUILD_STATIC_LIBS AND BUILD_SHARED_LIBS) AND MSVC) | |||
message(WARNING "Could not enable both BUILD_STATIC_LIBS and BUILD_SHARED_LIBS with MSVC, Disable BUILD_SHARED_LIBS") | |||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library" FORCE) | |||
endif() | |||
# Add a prefix or suffix to all exported symbol names in the shared library. | |||
# Avoids conflicts with other BLAS libraries, especially when using | |||
# 64 bit integer interfaces in OpenBLAS. | |||
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" ) | |||
set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" ) | |||
if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND BUILD_SHARED_LIBS AND NOT ("${SYMBOLPREFIX}${SYMBOLSUFFIX}" STREQUAL "")) | |||
set (DELETE_STATIC_LIBS "") | |||
if (NOT BUILD_STATIC_LIBS) | |||
message (STATUS "forcing build of a temporary static library for symbol renaming") | |||
set (BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared library" FORCE) | |||
set (BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE) | |||
set (DELETE_STATIC_LIBS file (REMOVE $<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.lib)) | |||
endif () | |||
endif() | |||
####### | |||
if(BUILD_WITHOUT_LAPACK) | |||
set(NO_LAPACK 1) | |||
set(NO_LAPACKE 1) | |||
endif() | |||
if (BUILD_WITHOUT_LAPACKE) | |||
set(NO_LAPACKE 1) | |||
endif() | |||
if(BUILD_WITHOUT_CBLAS) | |||
set(NO_CBLAS 1) | |||
endif() | |||
####### | |||
if(MSVC AND MSVC_STATIC_CRT) | |||
set(CompilerFlags | |||
CMAKE_CXX_FLAGS | |||
CMAKE_CXX_FLAGS_DEBUG | |||
CMAKE_CXX_FLAGS_RELEASE | |||
CMAKE_C_FLAGS | |||
CMAKE_C_FLAGS_DEBUG | |||
CMAKE_C_FLAGS_RELEASE | |||
) | |||
foreach(CompilerFlag ${CompilerFlags}) | |||
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}") | |||
endforeach() | |||
endif() | |||
message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.") | |||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake") | |||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") | |||
string(FIND "${LIBNAMESUFFIX}" "${SUFFIX64_UNDERSCORE}" HAVE64) | |||
if (${HAVE64} GREATER -1) | |||
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}) | |||
else () | |||
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE}) | |||
endif () | |||
set(BLASDIRS interface driver/level2 driver/level3 driver/others) | |||
if (NOT DYNAMIC_ARCH) | |||
list(APPEND BLASDIRS kernel) | |||
endif () | |||
if (DEFINED SANITY_CHECK) | |||
list(APPEND BLASDIRS reference) | |||
endif () | |||
set(SUBDIRS ${BLASDIRS}) | |||
if (NOT NO_LAPACK) | |||
if(BUILD_RELAPACK) | |||
list(APPEND SUBDIRS relapack/src) | |||
endif() | |||
list(APPEND SUBDIRS lapack) | |||
endif () | |||
if (NOT DEFINED BUILD_BFLOAT16) | |||
set (BUILD_BFLOAT16 false) | |||
endif () | |||
if (NOT DEFINED BUILD_HFLOAT16) | |||
set (BUILD_HFLOAT16 false) | |||
endif () | |||
# set which float types we want to build for | |||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | |||
# if none are defined, build for all | |||
# set(BUILD_BFLOAT16 true) | |||
set(BUILD_SINGLE true) | |||
set(BUILD_DOUBLE true) | |||
set(BUILD_COMPLEX true) | |||
set(BUILD_COMPLEX16 true) | |||
endif () | |||
if (NOT DEFINED BUILD_MATGEN) | |||
set(BUILD_MATGEN true) | |||
endif() | |||
set(FLOAT_TYPES "") | |||
if (BUILD_SINGLE) | |||
message(STATUS "Building Single Precision") | |||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing | |||
endif () | |||
if (BUILD_DOUBLE) | |||
message(STATUS "Building Double Precision") | |||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE | |||
endif () | |||
if (BUILD_COMPLEX) | |||
message(STATUS "Building Complex Precision") | |||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX | |||
endif () | |||
if (BUILD_COMPLEX16) | |||
message(STATUS "Building Double Complex Precision") | |||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | |||
endif () | |||
if (BUILD_BFLOAT16) | |||
message(STATUS "Building Half Precision") | |||
# list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing | |||
endif () | |||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | |||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") | |||
endif () | |||
#Set default output directory | |||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
if(MSVC) | |||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug) | |||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release) | |||
endif () | |||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | |||
set(TARGET_OBJS "") | |||
foreach (SUBDIR ${SUBDIRS}) | |||
add_subdirectory(${SUBDIR}) | |||
string(REPLACE "/" "_" subdir_obj ${SUBDIR}) | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>") | |||
endforeach () | |||
# netlib: | |||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | |||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | |||
if (NOT NO_LAPACK) | |||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake") | |||
if (NOT NO_LAPACKE) | |||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake") | |||
endif () | |||
endif () | |||
# Only generate .def for dll on MSVC and always produce pdb files for debug and release | |||
if(MSVC) | |||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4) | |||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") | |||
endif() | |||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi") | |||
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF") | |||
endif() | |||
if (${DYNAMIC_ARCH}) | |||
add_subdirectory(kernel) | |||
foreach(TARGET_CORE ${DYNAMIC_CORE}) | |||
message("${TARGET_CORE}") | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>") | |||
endforeach() | |||
endif () | |||
# add objects to the openblas lib | |||
if(NOT NO_LAPACK) | |||
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES}) | |||
if (USE_OPENMP AND (NOT NOFORTRAN)) | |||
# Disable OpenMP for LAPACK Fortran codes on Windows. | |||
if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||
target_link_libraries(LAPACK_OVERRIDES OpenMP::OpenMP_Fortran) | |||
endif() | |||
endif() | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>") | |||
endif() | |||
if(NOT NO_LAPACKE) | |||
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES}) | |||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>") | |||
endif() | |||
#if(BUILD_RELAPACK) | |||
# add_library(RELAPACK OBJECT ${RELA_SOURCES}) | |||
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>") | |||
#endif() | |||
set(OpenBLAS_LIBS "") | |||
if(BUILD_STATIC_LIBS) | |||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | |||
target_include_directories(${OpenBLAS_LIBNAME}_static INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>) | |||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_static) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
add_library(${OpenBLAS_LIBNAME}_shared SHARED ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | |||
target_include_directories(${OpenBLAS_LIBNAME}_shared INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>) | |||
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_shared) | |||
endif() | |||
if(BUILD_STATIC_LIBS) | |||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_static) | |||
else() | |||
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_shared) | |||
endif() | |||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) | |||
# Android needs to explicitly link against libm | |||
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin") | |||
if(BUILD_STATIC_LIBS) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_static m) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_shared m) | |||
endif() | |||
endif() | |||
if (USE_OPENMP) | |||
if(BUILD_STATIC_LIBS) | |||
if(NOFORTRAN) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C) | |||
else() | |||
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C OpenMP::OpenMP_Fortran) | |||
endif() | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
if(NOFORTRAN) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C) | |||
else() | |||
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C OpenMP::OpenMP_Fortran) | |||
endif() | |||
endif() | |||
endif() | |||
# Fix "Argument list too long" for macOS with POWERPC or Intel CPUs | |||
if(APPLE AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64")) | |||
# Use response files | |||
set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1) | |||
# Always build static library first | |||
if(BUILD_STATIC_LIBS) | |||
set(STATIC_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/lib${OpenBLAS_LIBNAME}.a") | |||
else() | |||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | |||
set(STATIC_PATH "lib${OpenBLAS_LIBNAME}.a") | |||
endif() | |||
set(CREATE_STATIC_LIBRARY_COMMAND | |||
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/${OpenBLAS_LIBNAME}_static.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru ${STATIC_PATH} && exit 0' " | |||
"sh -c '${CMAKE_AR} -rs ${STATIC_PATH} ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' ") | |||
if(BUILD_SHARED_LIBS) | |||
add_dependencies(${OpenBLAS_LIBNAME}_shared ${OpenBLAS_LIBNAME}_static) | |||
set(SHARED_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib") | |||
endif() | |||
if(USE_OPENMP) | |||
get_target_property(OMP_LIB OpenMP::OpenMP_C INTERFACE_LINK_LIBRARIES) | |||
else() | |||
set(OMP_LIB "") | |||
endif() | |||
if(NOT NOFORTRAN) | |||
set(CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1) | |||
set(CMAKE_Fortran_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND}) | |||
if(BUILD_SHARED_LIBS) | |||
set(CMAKE_Fortran_CREATE_SHARED_LIBRARY | |||
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '" | |||
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,${STATIC_PATH} dummy.o -o ${SHARED_PATH} ${OMP_LIB}'") | |||
endif() | |||
else() | |||
set(CMAKE_C_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND}) | |||
if(BUILD_SHARED_LIBS) | |||
set(CMAKE_C_CREATE_SHARED_LIBRARY | |||
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,${STATIC_PATH} -o ${SHARED_PATH} ${OMP_LIB}'") | |||
endif() | |||
endif() | |||
endif() | |||
# Handle MSVC exports | |||
if(MSVC AND BUILD_SHARED_LIBS) | |||
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4) | |||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake") | |||
else() | |||
# Creates verbose .def file (51KB vs 18KB) | |||
set_target_properties(${OpenBLAS_LIBNAME}_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true) | |||
endif() | |||
endif() | |||
# Set output for libopenblas | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d") | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES EXPORT_NAME "OpenBLAS") | |||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES}) | |||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) | |||
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} ) | |||
endforeach() | |||
enable_testing() | |||
if (USE_THREAD) | |||
# Add threading library to linker | |||
find_package(Threads) | |||
if (THREADS_HAVE_PTHREAD_ARG) | |||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES | |||
COMPILE_OPTIONS "-pthread" | |||
INTERFACE_COMPILE_OPTIONS "-pthread" | |||
) | |||
endif() | |||
if(BUILD_STATIC_LIBS) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_static ${CMAKE_THREAD_LIBS_INIT}) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_shared ${CMAKE_THREAD_LIBS_INIT}) | |||
endif() | |||
endif() | |||
#if (MSVC OR NOT NOFORTRAN) | |||
if (NOT NO_CBLAS) | |||
if (NOT ONLY_CBLAS) | |||
# Broken without fortran on unix | |||
add_subdirectory(utest) | |||
endif() | |||
endif() | |||
if (NOT NOFORTRAN) | |||
if (NOT ONLY_CBLAS) | |||
# Build test and ctest | |||
add_subdirectory(test) | |||
endif() | |||
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK) | |||
add_subdirectory(lapack-netlib/TESTING) | |||
endif() | |||
endif() | |||
if(NOT NO_CBLAS) | |||
if (NOT ONLY_CBLAS) | |||
add_subdirectory(ctest) | |||
endif() | |||
endif() | |||
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | |||
add_subdirectory(cpp_thread_test) | |||
endif() | |||
if (NOT FIXED_LIBNAME) | |||
set_target_properties(${OpenBLAS_LIBS} PROPERTIES | |||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} | |||
SOVERSION ${OpenBLAS_MAJOR_VERSION} | |||
) | |||
endif() | |||
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK) | |||
if (NOT MSVC) | |||
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition") | |||
else() | |||
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE") | |||
endif() | |||
endif() | |||
if (BUILD_SHARED_LIBS OR DELETE_STATIC_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "") | |||
if (NOT DEFINED ARCH) | |||
set(ARCH_IN "x86_64") | |||
else() | |||
set(ARCH_IN ${ARCH}) | |||
endif() | |||
if (${CORE} STREQUAL "generic") | |||
set(ARCH_IN "GENERIC") | |||
endif () | |||
if (NOT DEFINED EXPRECISION) | |||
set(EXPRECISION_IN 0) | |||
else() | |||
set(EXPRECISION_IN ${EXPRECISION}) | |||
endif() | |||
if (NOT DEFINED NO_CBLAS) | |||
set(NO_CBLAS_IN 0) | |||
else() | |||
set(NO_CBLAS_IN ${NO_CBLAS}) | |||
endif() | |||
if (NOT DEFINED NO_LAPACK) | |||
set(NO_LAPACK_IN 0) | |||
else() | |||
set(NO_LAPACK_IN ${NO_LAPACK}) | |||
endif() | |||
if (NOT DEFINED NO_LAPACKE) | |||
set(NO_LAPACKE_IN 0) | |||
else() | |||
set(NO_LAPACKE_IN ${NO_LAPACKE}) | |||
endif() | |||
if (NOT DEFINED NEED2UNDERSCORES) | |||
set(NEED2UNDERSCORES_IN 0) | |||
else() | |||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) | |||
endif() | |||
if (NOT DEFINED ONLY_CBLAS) | |||
set(ONLY_CBLAS_IN 0) | |||
else() | |||
set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||
endif() | |||
if (NOT DEFINED BU) | |||
set(BU _) | |||
endif() | |||
if (NOT ${SYMBOLPREFIX} STREQUAL "") | |||
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") | |||
endif() | |||
if (NOT ${SYMBOLSUFFIX} STREQUAL "") | |||
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}") | |||
endif() | |||
if (${BUILD_LAPACK_DEPRECATED}) | |||
set (BLD 1) | |||
else () | |||
set (BLD 0) | |||
endif() | |||
if (${BUILD_BFLOAT16}) | |||
set (BBF16 1) | |||
else () | |||
set (BBF16 0) | |||
endif() | |||
if (${BUILD_SINGLE}) | |||
set (BS 1) | |||
else () | |||
set (BS 0) | |||
endif() | |||
if (${BUILD_DOUBLE}) | |||
set (BD 1) | |||
else () | |||
set (BD 0) | |||
endif() | |||
if (${BUILD_COMPLEX}) | |||
set (BC 1) | |||
else () | |||
set (BC 0) | |||
endif() | |||
if (${BUILD_COMPLEX16}) | |||
set (BZ 1) | |||
else () | |||
set (BZ 0) | |||
endif() | |||
if (CMAKE_SYSTEM_NAME MATCHES "Windows") | |||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
if (CMAKE_BUILD_TYPE MATCHES "Debug") | |||
set (CRTLIB msvcrtd) | |||
set (PDBOPT -debug -pdb:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.pdb) | |||
set (PDB_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||
else () | |||
set (CRTLIB msvcrt) | |||
set (PDBOPT "") | |||
endif() | |||
#if (USE_PERL) | |||
message(STATUS "adding postbuild instruction to rename syms") | |||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_static POST_BUILD | |||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def | |||
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} -c -o ${PROJECT_BINARY_DIR}/dllinit.o ${PROJECT_SOURCE_DIR}/exports/dllinit.c | |||
COMMAND lld-link -nodefaultlib:libcmt -defaultlib:${CRTLIB} ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -wholearchive:$<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -dll -out:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll -implib:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll.a ${PDBOPT} | |||
#COMMAND lld-link -nodefaultlib:libcmt -defaultlib:msvcrt ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -wholearchive:$<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -dll -out:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll -implib:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll.a | |||
${REMOVE_STATIC_LIB} VERBATIM | |||
) | |||
#endif () | |||
else () | |||
if (NOT USE_PERL) | |||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD | |||
COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so | |||
COMMENT "renaming symbols" | |||
) | |||
else() | |||
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD | |||
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def | |||
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so | |||
COMMENT "renaming symbols" | |||
) | |||
endif() | |||
endif() | |||
endif() | |||
if (BUILD_BENCHMARKS) | |||
#find_package(OpenMP REQUIRED) | |||
file(GLOB SOURCES "benchmark/*.c") | |||
if (NOT USE_OPENMP) | |||
file(GLOB REMFILE "benchmark/smallscaling.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
endif() | |||
if (BUILD_WITHOUT_LAPACK) | |||
file(GLOB REMFILE "benchmark/cholesky.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/geev.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/gesv.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/getri.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/potrf.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/spmv.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/symv.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
file(GLOB REMFILE "benchmark/linpack.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
endif() | |||
if (NOT USE_GEMM3M) | |||
file(GLOB REMFILE "benchmark/gemm3m.c") | |||
list(REMOVE_ITEM SOURCES ${REMFILE}) | |||
endif() | |||
foreach(source ${SOURCES}) | |||
get_filename_component(name ${source} NAME_WE) | |||
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper")) | |||
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE") | |||
foreach(define ${defines}) | |||
set(target_name "benchmark_${name}") | |||
if (NOT "${define}" STREQUAL "DEFAULT") | |||
string(JOIN "_" define_str ${define}) | |||
set(target_name "${target_name}_${define_str}") | |||
endif() | |||
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND | |||
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND | |||
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND | |||
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE")) | |||
add_executable(${target_name} ${source}) | |||
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR}) | |||
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} ) | |||
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C) | |||
if (NOT "${define}" STREQUAL "DEFAULT") | |||
target_compile_definitions(${target_name} PRIVATE ${define}) | |||
endif() | |||
endif() | |||
endforeach() | |||
endif() | |||
endforeach() | |||
endif() | |||
# Install project | |||
# Install libraries | |||
if(BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS) | |||
install(TARGETS ${OpenBLAS_LIBNAME}_shared | |||
EXPORT "OpenBLAS${SUFFIX64}Targets" | |||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) | |||
install(TARGETS ${OpenBLAS_LIBNAME}_static | |||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) | |||
else() | |||
install(TARGETS ${OpenBLAS_LIBS} | |||
EXPORT "OpenBLAS${SUFFIX64}Targets" | |||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} | |||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} | |||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ) | |||
endif() | |||
# Install headers | |||
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64}) | |||
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR}) | |||
message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}") | |||
set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h) | |||
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n") | |||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n") | |||
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines) | |||
foreach(line ${__lines}) | |||
string(REPLACE "#define " "" line ${line}) | |||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n") | |||
endforeach() | |||
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n") | |||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS) | |||
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n") | |||
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n") | |||
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
if(NOT NOFORTRAN) | |||
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}") | |||
set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h) | |||
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n") | |||
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n") | |||
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n") | |||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS) | |||
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n") | |||
file(APPEND ${F77BLAS_H} "#endif") | |||
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
endif() | |||
if(NOT NO_CBLAS) | |||
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}") | |||
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h) | |||
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS) | |||
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
if (NOT ${SYMBOLPREFIX} STREQUAL "") | |||
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}") | |||
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}") | |||
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
endif() | |||
if (NOT ${SYMBOLSUFFIX} STREQUAL "") | |||
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}") | |||
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}") | |||
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}") | |||
endif() | |||
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}") | |||
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
endif() | |||
if(NOT NO_LAPACKE) | |||
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}") | |||
if(BUILD_STATIC_LIBS) | |||
add_dependencies( ${OpenBLAS_LIBNAME}_static genlapacke) | |||
endif() | |||
if(BUILD_SHARED_LIBS) | |||
add_dependencies( ${OpenBLAS_LIBNAME}_shared genlapacke) | |||
endif() | |||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h") | |||
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
ADD_CUSTOM_TARGET(genlapacke | |||
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h" | |||
) | |||
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) | |||
endif() | |||
# Install pkg-config files | |||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY) | |||
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/) | |||
set(PN OpenBLAS) | |||
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}") | |||
configure_package_config_file(cmake/${PN}Config.cmake.in | |||
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake" | |||
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | |||
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | |||
VERSION ${${PN}_VERSION} | |||
COMPATIBILITY AnyNewerVersion) | |||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake | |||
DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | |||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake | |||
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake | |||
DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | |||
install(EXPORT "${PN}${SUFFIX64}Targets" | |||
NAMESPACE "${PN}${SUFFIX64}::" | |||
DESTINATION ${CMAKECONFIG_INSTALL_DIR}) | |||
@@ -0,0 +1 @@ | |||
openmathlib.org |
@@ -1,269 +0,0 @@ | |||
# Contributions to the OpenBLAS project | |||
## Creator & Maintainer | |||
* Zhang Xianyi <traits.zhang@gmail.com> | |||
## Active Developers | |||
* Wang Qian <traz0824@gmail.com> | |||
* Optimize BLAS3 on ICT Loongson 3A. | |||
* Optimize BLAS3 on Intel Sandy Bridge. | |||
* Werner Saar <wernsaar@googlemail.com> | |||
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer | |||
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer | |||
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer | |||
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer | |||
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer | |||
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer | |||
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer | |||
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer | |||
* Porting and Optimization on ARM Cortex-A9 | |||
* Optimization on AMD Piledriver | |||
* Optimization on Intel Haswell | |||
* Chris Sidebottom <chris.sidebottom@arm.com> | |||
* Optimizations and other improvements targeting AArch64 | |||
* Annop Wongwathanarat <annop.wongwathanarat@arm.com> | |||
* Optimizations and other improvements targeting AArch64 | |||
## Previous Developers | |||
* Zaheer Chothia <zaheer.chothia@gmail.com> | |||
* Improve the compatibility about complex number | |||
* Build LAPACKE: C interface to LAPACK | |||
* Improve the windows build. | |||
* Chen Shaohu <huhumartinwar@gmail.com> | |||
* Optimize GEMV on the Loongson 3A processor. | |||
* Luo Wen | |||
* Intern. Test Level-2 BLAS. | |||
## Contributors | |||
In chronological order: | |||
* pipping <http://page.mi.fu-berlin.de/pipping> | |||
* [2011-06-11] Make USE_OPENMP=0 disable openmp. | |||
* Stefan Karpinski <stefan@karpinski.org> | |||
* [2011-12-28] Fix a bug about SystemStubs on Mac OS X. | |||
* Alexander Eberspächer <https://github.com/aeberspaecher> | |||
* [2012-05-02] Add note on patch for segfaults on Linux kernel 2.6.32. | |||
* Mike Nolta <mike@nolta.net> | |||
* [2012-05-19] Fix building bug on FreeBSD and NetBSD. | |||
* Sylvestre Ledru <https://github.com/sylvestre> | |||
* [2012-07-01] Improve the detection of sparc. Fix building bug under | |||
Hurd and kfreebsd. | |||
* Jameson Nash <https://github.com/vtjnash> | |||
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to | |||
make on the command line. | |||
* Alexander Nasonov <alnsn@yandex.ru> | |||
* [2012-11-10] Fix NetBSD build. | |||
* Sébastien Villemot <sebastien@debian.org> | |||
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package. | |||
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow! | |||
* Kang-Che Sung <Explorer09@gmail.com> | |||
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c. | |||
* Kenneth Hoste <kenneth.hoste@gmail.com> | |||
* [2013-05-22] Adjust Makefile about downloading LAPACK source files. | |||
* Lei WANG <https://github.com/wlbksy> | |||
* [2013-05-22] Fix a bug about wget. | |||
* Dan Luu <http://www.linkedin.com/in/danluu> | |||
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations). | |||
* grisuthedragon <https://github.com/grisuthedragon> | |||
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization | |||
model is used by OpenBLAS. | |||
* Elliot Saba <staticfloat@gmail.com> | |||
* [2013-07-22] Add in return value for `interface/trtri.c` | |||
* Sébastien Fabbro <bicatali@gentoo.org> | |||
* [2013-07-24] Modify makefile to respect user's LDFLAGS | |||
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files | |||
* Viral B. Shah <viral@mayin.org> | |||
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340 | |||
* Lars Buitinck <https://github.com/larsmans> | |||
* [2013-08-28] get rid of the generated cblas_noconst.h file | |||
* [2013-08-28] Missing threshold in gemm.c | |||
* [2013-08-28] fix default prefix handling in makefiles | |||
* yieldthought <https://github.com/yieldthought> | |||
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support | |||
* Keno Fischer <https://github.com/loladiro> | |||
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X | |||
* Christopher Meng <cickumqt@gmail.com> | |||
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros. | |||
Use install command instead of cp to install files with permissions control. | |||
* Lucas Beyer <lucasb.eyer.be@gmail.com> | |||
* [2013-12-10] Added support for NO_SHARED in make install. | |||
* carlkl <https://github.com/carlkl> | |||
* [2013-12-13] Fixed LAPACKE building bug on Windows | |||
* Isaac Dunham <https://github.com/idunham> | |||
* [2014-08-03] Fixed link error on Linux/musl | |||
* Dave Nuechterlein | |||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | |||
ARMv8 support. | |||
* Jerome Robert <jeromerobert@gmx.com> | |||
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) | |||
* [2015-12-23] `stack_check` in `gemv.c` (bug #722) | |||
* [2015-12-28] Allow to force the number of parallel make job | |||
* [2015-12-28] Fix detection of AMD E2-3200 detection | |||
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected | |||
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) | |||
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742) | |||
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760) | |||
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727) | |||
* Dan Kortschak | |||
* [2015-01-07] Added test for drotmg bug #484. | |||
* Ton van den Heuvel <https://github.com/ton> | |||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity(). | |||
* Martin Koehler <https://github.com/grisuthedragon/> | |||
* [2015-09-07] Improved imatcopy | |||
* Ashwin Sekhar T K <https://github.com/ashwinyes/> | |||
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8) | |||
* [2015-11-20] lapack-test fixes for Cortex-A57 | |||
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57 | |||
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57 | |||
* theoractice <https://github.com/theoractice/> | |||
* [2016-03-20] Fix compiler error in VisualStudio with CMake | |||
* [2016-03-22] Fix access violation on Windows while static linking | |||
* Paul Mustière <https://github.com/buffer51/> | |||
* [2016-02-04] Fix Android build on ARMV7 | |||
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8 | |||
* Shivraj Patil <https://github.com/sva-img/> | |||
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA | |||
* Kaustubh Raste <https://github.com/ksraste/> | |||
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA | |||
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA | |||
* Abdelrauf <https://github.com/quickwritereader> | |||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13 | |||
* [2017-02-26] ztrmm kernel for IBM z13 | |||
* [2017-03-13] strmm and ctrmm kernel for IBM z13 | |||
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13 | |||
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes | |||
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes | |||
* [2019-03-14] power9 dgemm/dtrmm kernel | |||
* [2019-04-29] power9 sgemm/strmm kernel | |||
* Jiachen Wang <https://github.com/wjc404> | |||
* [2019-07-29] optimize AVX2 DGEMM | |||
* [2019-10-20] AVX512 DGEMM kernel (4x8) | |||
* [2019-11-06] optimize AVX512 SGEMM | |||
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels | |||
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM | |||
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels | |||
* [2020-01-07] optimize AVX2 SGEMM and STRMM | |||
* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR> | |||
* [2020-04-15] Half-precision GEMM for bfloat16 | |||
* Marius Hillenbrand <https://github.com/mhillenibm> | |||
* [2020-05-12] Revise dynamic architecture detection for IBM z | |||
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14 | |||
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support | |||
* Danfeng Zhang <https://github.com/craft-zhang> | |||
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 | |||
* PingTouGe Semiconductor Co., Ltd. | |||
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910 | |||
* Jake Arkinstall <https://github.com/jake-arkinstall> | |||
* [2021-02-10] Remove in-source configure_file to enable builds in read-only contexts (issue #3100, PR #3101) | |||
* River Dillon <oss@outerpassage.net> | |||
* [2021-07-10] fix compilation with musl libc | |||
* Bine Brank <https://github.com/binebrank> | |||
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE | |||
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM | |||
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions | |||
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions | |||
* [2022-01-18] SVE kernels and copy functions for TRSM | |||
* Ilya Kurdyukov <https://github.com/ilyakurdyukov> | |||
* [2021-02-21] Add basic support for the Elbrus E2000 architecture | |||
* PLCT Lab, Institute of Software Chinese Academy of Sciences | |||
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version. | |||
* Pablo Romero <https://github.com/pablorcum> | |||
* [2022-08] Fix building from sources for QNX | |||
* Mark Seminatore <https://github.com/mseminatore> | |||
* [2023-11-09] Improve Windows threading performance scaling | |||
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency | |||
* Dirreke <https://github.com/mseminatore> | |||
* [2024-01-16] Add basic support for the CSKY architecture | |||
* Christopher Daley <https://github.com/cdaley> | |||
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems | |||
* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32> | |||
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE | |||
* Annop Wongwathanarat <annop.wongwathanarat@arm.com> | |||
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1 | |||
* [2025-01-21] Optimize gemv_t_sve_v1x3 kernel | |||
* [2025-02-26] Add sbgemv_t_bfdot kernel | |||
* [2025-03-12] Fix aarch64 sbgemv_t compilation error for GCC < 13 | |||
* [2025-03-12] Optimize aarch64 sgemm_ncopy | |||
* Marek Michalowski <marek.michalowski@arm.com> | |||
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1` | |||
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2` | |||
* [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2` | |||
* Ye Tao <ye.tao@arm.com> | |||
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1 | |||
* [2025-02-27] Add sbgemv_n_neon kernel | |||
* [2025-05-17] Impl prototype of BGEMM inferface | |||
* Abhishek Kumar <https://github.com/abhishek-iitmadras> | |||
* [2025-04-22] Optimise dot kernel for NEOVERSE V1 | |||
* [2025-07-23] ARM64-Enable bfloat16 kernels by default | |||
* Sharif Inamdar <sharif.inamdar@arm.com> | |||
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel | |||
* Guoyuan Li <https://github.com/guoyuanplct> | |||
* [2025-04-11] Optimise gemv kernel for RISCV64_ZVL256B | |||
* [2025-05-01] Optimise zgemv kernel for RISCV64_ZVL256B | |||
* [2025-05-17] Optimise omatcopy/zomatcopy kernel for RISCV64_ZVL256B | |||
* [2025-05-29] Optimise axpby kernel for RISCV64_ZVL256B | |||
* [2025-06-05] Optimise hbmv kernel for RISCV64_ZVL256B | |||
@@ -1,32 +0,0 @@ | |||
Copyright 2009, 2010 The University of Texas at Austin. | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT AUSTIN ``AS IS'' | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, | |||
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | |||
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT | |||
AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED | |||
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | |||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | |||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | |||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | |||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
The views and conclusions contained in the software and documentation | |||
are those of the authors and should not be interpreted as representing | |||
official policies, either expressed or implied, of The University of | |||
Texas at Austin. |
@@ -1,93 +0,0 @@ | |||
Optimized GotoBLAS2 libraries version 1.13 | |||
By Kazushige Goto <kgoto@tacc.utexas.edu> | |||
# This is the last update and done on 5th Feb. 2010. | |||
0. License | |||
See 00TACC_Research_License.txt. | |||
1. Supported OS | |||
Linux | |||
FreeBSD(Also it may work on NetBSD) | |||
OSX | |||
Soralis | |||
Windows 2k, XP, Server 2003 and 2008(both 32bit and 64bit) | |||
AIX | |||
Tru64 UNIX | |||
2. Supported Architecture | |||
X86 : Pentium3 Katmai | |||
Coppermine | |||
Athlon (not well optimized, though) | |||
PentiumM Banias, Yonah | |||
Pentium4 Northwood | |||
Nocona (Prescott) | |||
Core 2 Woodcrest | |||
Core 2 Penryn | |||
Nehalem-EP Corei{3,5,7} | |||
Atom | |||
AMD Opteron | |||
AMD Barlcelona, Shanghai, Istanbul | |||
VIA NANO | |||
X86_64: Pentium4 Nocona | |||
Core 2 Woodcrest | |||
Core 2 Penryn | |||
Nehalem | |||
Atom | |||
AMD Opteron | |||
AMD Barlcelona, Shanghai, Istanbul | |||
VIA NANO | |||
IA64 : Itanium2 | |||
Alpha : EV4, EV5, EV6 | |||
POWER : POWER4 | |||
PPC970/PPC970FX | |||
PPC970MP | |||
CELL (PPU only) | |||
POWER5 | |||
PPC440 (QCDOC) | |||
PPC440FP2(BG/L) | |||
POWERPC G4(PPC7450) | |||
POWER6 | |||
SPARC : SPARC IV | |||
SPARC VI, VII (Fujitsu chip) | |||
MIPS64/32: Sicortex | |||
3. Supported compiler | |||
C compiler : GNU CC | |||
Cygwin, MinGW | |||
Other commercial compiler(especially for x86/x86_64) | |||
Fortran Compiler : GNU G77, GFORTRAN | |||
G95 | |||
Open64 | |||
Compaq | |||
F2C | |||
IBM | |||
Intel | |||
PathScale | |||
PGI | |||
SUN | |||
Fujitsu | |||
4. Supported precision | |||
Now x86/x86_64 version support 80bit FP precision in addition to | |||
normal double presicion and single precision. Currently only | |||
gfortran supports 80bit FP with "REAL*10". | |||
5. How to build library? | |||
Please see 02QuickInstall.txt or just type "make". | |||
@@ -1,118 +0,0 @@ | |||
Quick installation for GotoBLAS2 | |||
*************************************************************************** | |||
*************************************************************************** | |||
** ** | |||
** ** | |||
** Just type "make" <<return>>. ** | |||
** ** | |||
** If you're not satisfied with this library, ** | |||
** please read following instruction and customize it. ** | |||
** ** | |||
** ** | |||
*************************************************************************** | |||
*************************************************************************** | |||
1. REALLY REALLY quick way to build library | |||
Type "make" or "gmake". | |||
$shell> make | |||
The script will detect Fortran compiler, number of cores and | |||
architecture which you're using. If default gcc binary type is | |||
64bit, 64 bit library will be created. Otherwise 32 bit library | |||
will be created. | |||
After finishing compile, you'll find various information about | |||
generated library. | |||
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= | |||
GotoBLAS2 build complete. | |||
OS ... Linux | |||
Architecture ... x86_64 | |||
BINARY ... 64bit | |||
C compiler ... GCC (command line : gcc) | |||
Fortran compiler ... PATHSCALE (command line : pathf90) | |||
Library Name ... libgoto_barcelonap-r1.27.a (Multi threaded; Max | |||
num-threads is 16) | |||
=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= | |||
2. Specifying 32bit or 64bit library | |||
If you need 32bit binary, | |||
$shell> make BINARY=32 | |||
If you need 64bit binary, | |||
$shell> make BINARY=64 | |||
3. Specifying target architecture | |||
If you need library for different architecture, you can use TARGET | |||
option. You can find current available options in top of getarch.c. | |||
For example, if you need library for Intel core2 architecture, | |||
you'll find FORCE_CORE2 option in getarch.c. Therefore you can | |||
specify TARGET=CORE2 (get rid of FORCE_) with make. | |||
$shell> make TARGET=CORE2 | |||
Also if you want GotoBLAS2 to support multiple architecture, | |||
$shell> make DYNAMIC_ARCH=1 | |||
All kernel will be included in the library and dynamically switched | |||
the best architecutre at run time. | |||
4. Specifying for enabling multi-threaded | |||
Script will detect number of cores and will enable multi threaded | |||
library if number of cores is more than two. If you still want to | |||
create single threaded library, | |||
$shell> make USE_THREAD=0 | |||
Or if you need threaded library by force, | |||
$shell> make USE_THREAD=1 | |||
5. Specifying target OS | |||
Target architecture will be determined by the CC. If you | |||
specify cross compiler for MIPS, you can create library for | |||
MIPS architecture. | |||
$shell> make CC=mips64el-linux-gcc TARGET=SICORTEX | |||
Or you can specify your favorite C compiler with absolute path. | |||
$shell> make CC=/opt/intel/cc/32/10.0.026/bin/icc TARGET=BARCELONA | |||
Binary type (32bit/64bit) is determined by checking CC, you | |||
can control binary type with this option. | |||
$shell> make CC="pathcc -m32" | |||
In this case, 32bit library will be created. | |||
6. Specifying Fortran compiler | |||
If you need to support other Fortran compiler, you can specify with | |||
FC option. | |||
$shell> make FC=gfortran | |||
7. Other useful options | |||
You'll find other useful options in Makefile.rule. |
@@ -1,128 +0,0 @@ | |||
GotoBLAS2 FAQ | |||
1. General | |||
1.1 Q Can I find useful paper about GotoBLAS2? | |||
A You may check following URL. | |||
http://www.cs.utexas.edu/users/flame/Publications/index.htm | |||
11. Kazushige Goto and Robert A. van de Geijn, " Anatomy of | |||
High-Performance Matrix Multiplication," ACM Transactions on | |||
Mathematical Software, accepted. | |||
15. Kazushige Goto and Robert van de Geijn, "High-Performance | |||
Implementation of the Level-3 BLAS." ACM Transactions on | |||
Mathematical Software, submitted. | |||
1.2 Q Does GotoBLAS2 work with Hyperthread (SMT)? | |||
A Yes, it will work. GotoBLAS2 detects Hyperthread and | |||
avoid scheduling on the same core. | |||
1.3 Q When I type "make", following error occured. What's wrong? | |||
$shell> make | |||
"./Makefile.rule", line 58: Missing dependency operator | |||
"./Makefile.rule", line 61: Need an operator | |||
... | |||
A This error occurs because you didn't use GNU make. Some binary | |||
packages install GNU make as "gmake" and it's worth to try. | |||
1.4 Q Function "xxx" is slow. Why? | |||
A Generally GotoBLAS2 has many well optimized functions, but it's | |||
far and far from perfect. Especially Level 1/2 function | |||
performance depends on how you call BLAS. You should understand | |||
what happends between your function and GotoBLAS2 by using profile | |||
enabled version or hardware performance counter. Again, please | |||
don't regard GotoBLAS2 as a black box. | |||
1.5 Q I have a commercial C compiler and want to compile GotoBLAS2 with | |||
it. Is it possible? | |||
A All function that affects performance is written in assembler | |||
and C code is just used for wrapper of assembler functions or | |||
complicated functions. Also I use many inline assembler functions, | |||
unfortunately most of commercial compiler can't handle inline | |||
assembler. Therefore you should use gcc. | |||
1.6 Q I use OpenMP compiler. How can I use GotoBLAS2 with it? | |||
A Please understand that OpenMP is a compromised method to use | |||
thread. If you want to use OpenMP based code with GotoBLAS2, you | |||
should enable "USE_OPENMP=1" in Makefile.rule. | |||
1.7 Q Could you tell me how to use profiled library? | |||
A You need to build and link your application with -pg | |||
option. After executing your application, "gmon.out" is | |||
generated in your current directory. | |||
$shell> gprof <your application name> gmon.out | |||
Each sample counts as 0.01 seconds. | |||
% cumulative self self total | |||
time seconds seconds calls Ks/call Ks/call name | |||
89.86 975.02 975.02 79317 0.00 0.00 .dgemm_kernel | |||
4.19 1020.47 45.45 40 0.00 0.00 .dlaswp00N | |||
2.28 1045.16 24.69 2539 0.00 0.00 .dtrsm_kernel_LT | |||
1.19 1058.03 12.87 79317 0.00 0.00 .dgemm_otcopy | |||
1.05 1069.40 11.37 4999 0.00 0.00 .dgemm_oncopy | |||
.... | |||
I think profiled BLAS library is really useful for your | |||
research. Please find bottleneck of your application and | |||
improve it. | |||
1.8 Q Is number of thread limited? | |||
A Basically, there is no limitation about number of threads. You | |||
can specify number of threads as many as you want, but larger | |||
number of threads will consume extra resource. I recommend you to | |||
specify minimum number of threads. | |||
1.9 Q I have segfaults when I compile with USE_OPENMP=1. What's wrong? | |||
A This may be related to a bug in the Linux kernel 2.6.32. Try applying | |||
the patch segaults.patch using | |||
patch < segfaults.patch | |||
and see if the crashes persist. Note that this patch will lead to many | |||
compiler warnings. | |||
2. Architecture Specific issue or Implementation | |||
2.1 Q GotoBLAS2 seems to support any combination with OS and | |||
architecture. Is it possible? | |||
A Combination is limited by current OS and architecture. For | |||
examble, the combination OSX with SPARC is impossible. But it | |||
will be possible with slight modification if these combination | |||
appears in front of us. | |||
2.2 Q I have POWER architecture systems. Do I need extra work? | |||
A Although POWER architecture defined special instruction | |||
like CPUID to detect correct architecture, it's privileged | |||
and can't be accessed by user process. So you have to set | |||
the architecture that you have manually in getarch.c. | |||
2.3 Q I can't create DLL on Cygwin (Error 53). What's wrong? | |||
A You have to make sure if lib.exe and mspdb80.dll are in Microsoft | |||
Studio PATH. The easiest way is to use 'which' command. | |||
$shell> which lib.exe | |||
/cygdrive/c/Program Files/Microsoft Visual Studio/VC98/bin/lib.exe |
@@ -1,13 +0,0 @@ | |||
Quick guide to build library for Windows 64bit. | |||
1. What you need | |||
a. Windows Server 2003 or later | |||
b. Cygwin environment(make, gcc, g77, perl, sed, wget) | |||
c. MinGW64 compiler | |||
d. Microsoft Visual Studio (lib.exe and mspdb80.dll are required to create dll) | |||
2. Do ./quickbuild.win64 | |||
Good luck |
@@ -1,53 +0,0 @@ | |||
To enhance perfomance, I'd recommend you to enable large page on | |||
your OS (root account is required). | |||
A) Linux | |||
x86 32bit ... (number of core) * 4 pages | |||
x86 64bit ... (number of core) * 8 pages | |||
POWER 32/64bit ... (number of core) * 1 pages | |||
If you want to allocate 64 large pages, | |||
$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset | |||
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page | |||
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number | |||
$shell> echo 3355443200 > /proc/sys/kernel/shmall | |||
Also may add a few lines into /etc/security/limits.conf file. | |||
* hard memlock unlimited | |||
* soft memlock unlimited | |||
Then restart sshd (/etc/init.d/sshd restart). | |||
B) Solaris | |||
You don't have to set up. | |||
C) Windows (Windows Server 2003 or later, XP 64bit) | |||
You have to assign memory lock operation to your account. | |||
Control Panel -> Administrative Tools -> Local Security Policy -> | |||
Local Policies -> User Rights Assignment -> Lock pages in memory | |||
D) AIX | |||
Ask your administrator | |||
E) Tru64 UNIX | |||
Assign shared memory at boot time. | |||
F) Other aarchitecture which doesn't have Large TLB enhancement | |||
If you have root permission, please install device driver which | |||
located in drivers/mapper. | |||
$shell> cd drivers/mapper | |||
$shell> make | |||
$shell> insmod mapper.ko | |||
$shell> ./device_setup | |||
Then enable DEVICEDRIVER_ALLOCATION = 1 in Makefile.rule. |
@@ -1,22 +0,0 @@ | |||
Weird Performance | |||
1. If you see serious performance loss (extremely low performance), | |||
probably you created too many threads or process. Basically GotoBLAS | |||
assumes that available cores that you specify are exclusively for | |||
BLAS computation. Even one small thread/process conflicts with BLAS | |||
threads, performance will become worse. | |||
The best solution is to reduce your number of threads or insert | |||
some synchronization mechanism and suspend your threads until BLAS | |||
operation is finished. | |||
2. Similar problem may happen under virtual machine. If supervisor | |||
allocates different cores for each scheduling, BLAS performnace | |||
will be bad. This is because BLAS also utilizes all cache, | |||
unexpected re-schedule for different core may result of heavy | |||
performance loss. | |||
Anyway, if you see any weird performance loss, it means your code or | |||
algorithm is not optimal. |
@@ -1,14 +0,0 @@ | |||
pipeline { | |||
agent { | |||
docker { | |||
image 'osuosl/ubuntu-s390x' | |||
} | |||
} | |||
stages { | |||
stage('Build') { | |||
steps { | |||
sh 'make clean && make' | |||
} | |||
} | |||
} | |||
} |
@@ -1,16 +0,0 @@ | |||
pipeline { | |||
agent { | |||
docker { | |||
image 'osuosl/ubuntu-ppc64le:18.04' | |||
} | |||
} | |||
stages { | |||
stage('Build') { | |||
steps { | |||
sh 'sudo apt update' | |||
sh 'sudo apt install gfortran -y' | |||
sh 'make clean && make' | |||
} | |||
} | |||
} | |||
} |
@@ -1,29 +0,0 @@ | |||
Copyright (c) 2011-2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written | |||
permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
@@ -1,459 +0,0 @@ | |||
TOPDIR = . | |||
include ./Makefile.system | |||
LNCMD = ln -fs | |||
ifeq ($(FIXED_LIBNAME), 1) | |||
LNCMD = true | |||
endif | |||
BLASDIRS = interface driver/level2 driver/level3 driver/others | |||
ifneq ($(DYNAMIC_ARCH), 1) | |||
BLASDIRS += kernel | |||
endif | |||
ifdef SANITY_CHECK | |||
BLASDIRS += reference | |||
endif | |||
SUBDIRS = $(BLASDIRS) | |||
ifneq ($(NO_LAPACK), 1) | |||
SUBDIRS += lapack | |||
endif | |||
RELA = | |||
ifeq ($(BUILD_RELAPACK), 1) | |||
RELA = re_lapack | |||
endif | |||
ifeq ($(NO_FORTRAN), 1) | |||
define NOFORTRAN | |||
1 | |||
endef | |||
ifneq ($(NO_LAPACK), 1) | |||
define C_LAPACK | |||
1 | |||
endef | |||
endif | |||
export NOFORTRAN | |||
export NO_LAPACK | |||
export C_LAPACK | |||
endif | |||
ifeq ($(F_COMPILER),CRAY) | |||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS)) | |||
else | |||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS)) | |||
endif | |||
ifdef LAPACK_STRLEN | |||
LAPACK_FFLAGS += -DLAPACK_STRLEN=$(LAPACK_STRLEN) | |||
endif | |||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test | |||
.PHONY : all libs netlib $(RELA) test ctest shared install | |||
.NOTPARALLEL : shared | |||
all :: tests | |||
@echo | |||
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))" | |||
@echo | |||
@echo " OS ... $(OSNAME) " | |||
@echo " Architecture ... $(ARCH) " | |||
ifndef BINARY64 | |||
@echo " BINARY ... 32bit " | |||
else | |||
@echo " BINARY ... 64bit " | |||
endif | |||
ifdef INTERFACE64 | |||
ifneq ($(INTERFACE64), 0) | |||
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) " | |||
endif | |||
endif | |||
@$(CC) --version > /dev/null 2>&1;\ | |||
if [ $$? -eq 0 ]; then \ | |||
cverinfo=`$(CC) --version | sed -n '1p'`; \ | |||
if [ -z "$${cverinfo}" ]; then \ | |||
cverinfo=`$(CC) --version | sed -n '2p'`; \ | |||
fi; \ | |||
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\ | |||
else \ | |||
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\ | |||
fi | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
@$(FC) --version > /dev/null 2>&1;\ | |||
if [ $$? -eq 0 ]; then \ | |||
fverinfo=`$(FC) --version | sed -n '1p'`; \ | |||
if [ -z "$${fverinfo}" ]; then \ | |||
fverinfo=`$(FC) --version | sed -n '2p'`; \ | |||
fi; \ | |||
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\ | |||
else \ | |||
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\ | |||
fi | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
endif | |||
ifneq ($(OSNAME), AIX) | |||
@echo -n " Library Name ... $(LIBNAME)" | |||
else | |||
@echo " Library Name ... $(LIBNAME)" | |||
endif | |||
ifndef SMP | |||
@echo " (Single-threading) " | |||
else | |||
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))" | |||
endif | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)" | |||
endif | |||
ifeq ($(USE_OPENMP), 1) | |||
@echo | |||
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, " | |||
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads." | |||
@echo | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
@echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:" | |||
@echo | |||
@echo "\"make PREFIX=/your_installation_path/ install\"." | |||
@echo | |||
@echo "(or set PREFIX in Makefile.rule and run make install." | |||
@echo | |||
@echo "Note that any flags passed to make during build should also be passed to make install" | |||
@echo "to circumvent any install errors." | |||
@echo | |||
@echo "If you want to move the .dylib to a new location later, make sure you change" | |||
@echo "the internal name of the dylib with:" | |||
@echo | |||
@echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)" | |||
endif | |||
@echo | |||
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"." | |||
@echo | |||
@echo "Note that any flags passed to make during build should also be passed to make install" | |||
@echo "to circumvent any install errors." | |||
@echo | |||
shared : libs netlib $(RELA) | |||
ifneq ($(NO_SHARED), 1) | |||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly)) | |||
@$(MAKE) -C exports so | |||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so | |||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||
endif | |||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD)) | |||
@$(MAKE) -C exports so | |||
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
@$(MAKE) -C exports dyn | |||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib | |||
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
@$(MAKE) -C exports dll | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
@$(MAKE) -C exports dll | |||
endif | |||
ifeq ($(OSNAME), AIX) | |||
@$(MAKE) -C exports so | |||
endif | |||
endif | |||
tests : shared | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
touch $(LIBNAME) | |||
ifndef NO_FBLAS | |||
$(MAKE) -C test all | |||
endif | |||
endif | |||
ifneq ($(ONLY_CBLAS), 1) | |||
$(MAKE) -C utest all | |||
endif | |||
ifneq ($(NO_CBLAS), 1) | |||
ifneq ($(ONLY_CBLAS), 1) | |||
$(MAKE) -C ctest all | |||
endif | |||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1) | |||
$(MAKE) -C cpp_thread_test all | |||
endif | |||
endif | |||
libs : | |||
ifeq ($(CORE), UNKNOWN) | |||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | |||
endif | |||
ifeq ($(NOFORTRAN), 1) | |||
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.) | |||
endif | |||
ifeq ($(NO_STATIC), 1) | |||
ifeq ($(NO_SHARED), 1) | |||
$(error OpenBLAS: neither static nor shared are enabled.) | |||
endif | |||
endif | |||
@for d in $(SUBDIRS) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
#Save the config files for installation | |||
@cp Makefile.conf Makefile.conf_last | |||
@cp config.h config_last.h | |||
ifdef QUAD_PRECISION | |||
@echo "#define QUAD_PRECISION">> config_last.h | |||
endif | |||
ifeq ($(EXPRECISION), 1) | |||
@echo "#define EXPRECISION">> config_last.h | |||
endif | |||
## | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
@$(MAKE) -C kernel commonlibs || exit 1 | |||
@for d in $(DYNAMIC_CORE) ; \ | |||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||
done | |||
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||
ifeq ($(DYNAMIC_OLDER), 1) | |||
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last | |||
endif | |||
endif | |||
@echo TARGET=$(CORE) >> Makefile.conf_last | |||
ifdef USE_THREAD | |||
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last | |||
endif | |||
ifdef SMP | |||
ifdef NUM_THREADS | |||
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last | |||
else | |||
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last | |||
endif | |||
endif | |||
ifeq ($(USE_OPENMP),1) | |||
@echo USE_OPENMP=1 >> Makefile.conf_last | |||
endif | |||
ifeq ($(INTERFACE64),1) | |||
@echo INTERFACE64=1 >> Makefile.conf_last | |||
endif | |||
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last | |||
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last | |||
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
@touch lib.grd | |||
prof : prof_blas prof_lapack | |||
prof_blas : | |||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) | |||
for d in $(SUBDIRS) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d prof || exit 1 ; \ | |||
fi; \ | |||
done | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
$(MAKE) -C kernel commonprof || exit 1 | |||
endif | |||
blas : | |||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
for d in $(BLASDIRS) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d libs || exit 1 ; \ | |||
fi; \ | |||
done | |||
hpl : | |||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
for d in $(BLASDIRS) ../laswp exports ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
$(MAKE) -C kernel commonlibs || exit 1 | |||
for d in $(DYNAMIC_CORE) ; \ | |||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||
done | |||
endif | |||
hpl_p : | |||
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX) | |||
for d in $(SUBDIRS) ../laswp exports ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
netlib : lapack_prebuild | |||
ifneq ($(NO_LAPACK), 1) | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib | |||
endif | |||
ifneq ($(NO_LAPACKE), 1) | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib | |||
endif | |||
ifeq ($(NO_LAPACK), 1) | |||
re_lapack : | |||
else | |||
re_lapack : | |||
@$(MAKE) -C relapack | |||
endif | |||
prof_lapack : lapack_prebuild | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof | |||
lapack_prebuild : | |||
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK))) | |||
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else | |||
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1) | |||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else | |||
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGIBM1) | |||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else | |||
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
endif | |||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
ifdef SMP | |||
ifeq ($(OSNAME), WINNT) | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else ifeq ($(OSNAME), Haiku) | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else | |||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
else | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
else | |||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1) | |||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_SINGLE), 1) | |||
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_DOUBLE), 1) | |||
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_COMPLEX), 1) | |||
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
ifeq ($(BUILD_COMPLEX16), 1) | |||
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
large.tgz : | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
if [ ! -a $< ]; then | |||
-wget http://www.netlib.org/lapack/timing/large.tgz; | |||
fi | |||
endif | |||
timing.tgz : | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
if [ ! -a $< ]; then | |||
-wget http://www.netlib.org/lapack/timing/timing.tgz; | |||
fi | |||
endif | |||
lapack-timing : large.tgz timing.tgz | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) | |||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) | |||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING | |||
endif | |||
lapack-test : | |||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||
ifneq ($(CROSS), 1) | |||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \ | |||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING) | |||
endif | |||
lapack-runtest: lapack-test | |||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING ) | |||
blas-test: | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out) | |||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | |||
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out) | |||
dummy : | |||
install : | |||
$(MAKE) -f Makefile.install install | |||
install_tests : | |||
$(MAKE) -f Makefile.install install_tests | |||
clean :: | |||
@for d in $(SUBDIRS_ALL) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
#ifdef DYNAMIC_ARCH | |||
@$(MAKE) -C kernel clean | |||
#endif | |||
@$(MAKE) -C reference clean | |||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0 | |||
ifeq ($(OSNAME), Darwin) | |||
@rm -rf getarch.dSYM getarch_2nd.dSYM | |||
endif | |||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib | |||
@rm -f cblas.tmp cblas.tmp2 | |||
@touch $(NETLIB_LAPACK_DIR)/make.inc | |||
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean | |||
@rm -f $(NETLIB_LAPACK_DIR)/make.inc | |||
@$(MAKE) -C relapack clean | |||
@rm -f *.grd Makefile.conf_last config_last.h | |||
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt) | |||
@echo Done. |
@@ -1,39 +0,0 @@ | |||
ifneq ($(COMPILER), NATIVE) | |||
# GCC User | |||
ifeq ($(CORE), EV4) | |||
CCOMMON_OPT += -mcpu=ev4 | |||
endif | |||
ifeq ($(CORE), EV5) | |||
CCOMMON_OPT += -mcpu=ev5 | |||
endif | |||
ifeq ($(CORE), EV6) | |||
CCOMMON_OPT += -mcpu=ev6 | |||
endif | |||
else | |||
# Compaq Compiler User | |||
ifeq ($(CORE), EV4) | |||
CCOMMON_OPT += -tune ev4 -arch ev4 | |||
endif | |||
ifeq ($(CORE), EV5) | |||
CCOMMON_OPT += -tune ev5 -arch ev5 | |||
endif | |||
ifeq ($(CORE), EV6) | |||
CCOMMON_OPT += -tune ev6 -arch ev6 | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
FCOMMON_OPT += -mieee | |||
endif | |||
ifeq ($(F_COMPILER), G77) | |||
FCOMMON_OPT += -mieee | |||
endif | |||
ifndef SMP | |||
LIBCXML = -lcxml -lots -lm | |||
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm | |||
else | |||
LIBCXML = -lcxmlp -lots -lm | |||
LIBATLAS = -L/usr/lib/atlas3.7.8p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm | |||
endif |
@@ -1,19 +0,0 @@ | |||
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15)) | |||
ifeq ($(OSNAME), Android) | |||
CCOMMON_OPT += -mfpu=neon -march=armv7-a | |||
FCOMMON_OPT += -mfpu=neon -march=armv7-a | |||
else | |||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | |||
endif | |||
endif | |||
ifeq ($(CORE), ARMV6) | |||
CCOMMON_OPT += -mfpu=vfp | |||
FCOMMON_OPT += -mfpu=vfp | |||
endif | |||
ifdef HAVE_NEON | |||
CCOMMON_OPT += -mfpu=neon | |||
FCOMMON_OPT += -mfpu=neon | |||
endif |
@@ -1,429 +0,0 @@ | |||
############################################################################### | |||
# Copyright (c) 2025, The OpenBLAS Project | |||
# All rights reserved. | |||
# Redistribution and use in source and binary forms, with or without | |||
# modification, are permitted provided that the following conditions are | |||
# met: | |||
# 1. Redistributions of source code must retain the above copyright | |||
# notice, this list of conditions and the following disclaimer. | |||
# 2. Redistributions in binary form must reproduce the above copyright | |||
# notice, this list of conditions and the following disclaimer in | |||
# the documentation and/or other materials provided with the | |||
# distribution. | |||
# 3. Neither the name of the OpenBLAS project nor the names of | |||
# its contributors may be used to endorse or promote products | |||
# derived from this software without specific prior written permission. | |||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
# POSSIBILITY OF SUCH DAMAGE. | |||
############################################################################### | |||
ifneq ($(C_COMPILER), PGI) | |||
ifeq ($(C_COMPILER), CLANG) | |||
ISCLANG=1 | |||
endif | |||
ifeq ($(C_COMPILER), FUJITSU) | |||
ISCLANG=1 | |||
endif | |||
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8-a | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a | |||
endif | |||
else | |||
ifeq ($(CORE), ARMV8) | |||
CCOMMON_OPT += -march=armv8-a | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a | |||
endif | |||
endif | |||
ifeq ($(CORE), ARMV8SVE) | |||
CCOMMON_OPT += -march=armv8-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a+sve | |||
endif | |||
endif | |||
ifeq ($(CORE), ARMV9SME) | |||
CCOMMON_OPT += -march=armv9-a+sve2+sme | |||
FCOMMON_OPT += -march=armv9-a+sve2 | |||
endif | |||
ifeq ($(CORE), CORTEXA53) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
endif | |||
endif | |||
ifeq ($(CORE), CORTEXA57) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57 | |||
endif | |||
endif | |||
ifeq ($(CORE), CORTEXA72) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
ifeq ($(CORE), CORTEXA73) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73 | |||
endif | |||
endif | |||
ifeq ($(CORE), CORTEXA76) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76 | |||
endif | |||
endif | |||
ifeq ($(CORE), FT2000) | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
# Use a72 tunings because Neoverse-N1 is only available | |||
# in GCC>=9 | |||
ifeq ($(CORE), NEOVERSEN1) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
endif | |||
# Use a72 tunings because Neoverse-V1 is only available | |||
# in GCC>=10.4 | |||
ifeq ($(CORE), NEOVERSEV1) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.4-a+sve+bf16 | |||
ifeq (1, $(ISCLANG)) | |||
CCOMMON_OPT += -mtune=cortex-x1 | |||
else | |||
CCOMMON_OPT += -mtune=neoverse-v1 | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.4-a+sve+bf16 | |||
ifneq ($(CROSS), 1) | |||
CCOMMON_OPT += -mtune=native | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a | |||
ifneq ($(CROSS), 1) | |||
FCOMMON_OPT += -mtune=native | |||
endif | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
endif | |||
# Use a72 tunings because Neoverse-N2 is only available | |||
# in GCC>=10.4 | |||
ifeq ($(CORE), NEOVERSEN2) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifneq ($(OSNAME), Darwin) | |||
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2 | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72 | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.5-a+sve+bf16 | |||
ifneq ($(CROSS), 1) | |||
CCOMMON_OPT += -mtune=native | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.5-a | |||
ifneq ($(CROSS), 1) | |||
FCOMMON_OPT += -mtune=native | |||
endif | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8-a+sve+bf16 -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
endif | |||
# Detect ARM Neoverse V2. | |||
ifeq ($(CORE), NEOVERSEV2) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ13) $(ISCLANG))) | |||
CCOMMON_OPT += -mcpu=neoverse-v2 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mcpu=neoverse-v2 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=neoverse-n1 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1 | |||
endif | |||
endif | |||
endif | |||
# Detect Ampere AmpereOne(ampere1,ampere1a) processors. | |||
ifeq ($(CORE), AMPERE1) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng | |||
endif | |||
endif | |||
endif | |||
# Use a53 tunings because a55 is only available in GCC>=8.1 | |||
ifeq ($(CORE), CORTEXA55) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53 | |||
endif | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), THUNDERX) | |||
CCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=thunderx | |||
endif | |||
endif | |||
ifeq ($(CORE), FALKOR) | |||
CCOMMON_OPT += -march=armv8-a -mtune=falkor | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=falkor | |||
endif | |||
endif | |||
ifeq ($(CORE), THUNDERX2T99) | |||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
endif | |||
endif | |||
ifeq ($(CORE), THUNDERX3T110) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.3-a | |||
ifeq (0, $(ISCLANG)) | |||
CCOMMON_OPT += -mtune=thunderx3t110 | |||
else | |||
CCOMMON_OPT += -mtune=thunderx2t99 | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110 | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), VORTEX) | |||
CCOMMON_OPT += -march=armv8.3-a | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.3-a | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) | |||
ifeq ($(CORE), TSV110) | |||
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG))) | |||
ifeq ($(CORE), EMAG8180) | |||
CCOMMON_OPT += -march=armv8-a | |||
ifeq ($(ISCLANG), 0) | |||
CCOMMON_OPT += -mtune=emag | |||
endif | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8-a -mtune=emag | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), A64FX) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ3) $(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx | |||
endif | |||
else | |||
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-n1 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-n1 | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXX1) | |||
CCOMMON_OPT += -march=armv8.2-a | |||
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG))) | |||
CCOMMON_OPT += -mtune=cortex-x1 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1 | |||
endif | |||
else | |||
CCOMMON_OPT += -mtune=cortex-a72 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72 | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXX2) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) | |||
CCOMMON_OPT += -mtune=cortex-x2 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mtune=cortex-x2 | |||
endif | |||
endif | |||
endif | |||
endif | |||
#ifeq (1, $(filter 1,$(ISCLANG))) | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXA510) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
endif | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG))) | |||
ifeq ($(CORE), CORTEXA710) | |||
CCOMMON_OPT += -march=armv8.4-a+sve | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=armv8.4-a+sve | |||
endif | |||
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG))) | |||
CCOMMON_OPT += -mtune=cortex-a710 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mtune=cortex-a710 | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
else | |||
# NVIDIA HPC options necessary to enable SVE in the compiler | |||
ifeq ($(CORE), THUNDERX2T99) | |||
CCOMMON_OPT += -tp=thunderx2t99 | |||
FCOMMON_OPT += -tp=thunderx2t99 | |||
endif | |||
ifeq ($(CORE), NEOVERSEN1) | |||
CCOMMON_OPT += -tp=neoverse-n1 | |||
FCOMMON_OPT += -tp=neoverse-n1 | |||
endif | |||
ifeq ($(CORE), NEOVERSEV1) | |||
CCOMMON_OPT += -tp=neoverse-v1 | |||
FCOMMON_OPT += -tp=neoverse-v1 | |||
endif | |||
ifeq ($(CORE), NEOVERSEV2) | |||
CCOMMON_OPT += -tp=neoverse-v2 | |||
FCOMMON_OPT += -tp=neoverse-v2 | |||
endif | |||
ifeq ($(CORE), ARMV8SVE) | |||
CCOMMON_OPT += -tp=neoverse-v2 | |||
FCOMMON_OPT += -tp=neoverse-v2 | |||
endif | |||
ifeq ($(CORE), ARMV9SVE) | |||
CCOMMON_OPT += -tp=neoverse-v2 | |||
FCOMMON_OPT += -tp=neoverse-v2 | |||
endif | |||
endif |
@@ -1,4 +0,0 @@ | |||
ifeq ($(CORE), CK860FV) | |||
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static | |||
endif |
@@ -1 +0,0 @@ | |||
COPT = -Wall -O2 # -DGEMMTEST |
@@ -1 +0,0 @@ | |||
COPT = -Wall -O2 # -DGEMMTEST |
@@ -1,22 +0,0 @@ | |||
CCOMMON_COPT += # -DUSE64BITINT # -DGEMMTEST | |||
# CCOMMON_OPT += -DPARAMTEST | |||
FLAMEPATH = $(HOME)/flame/lib/ia64 | |||
ifndef SMP | |||
LIBMKL = -L$(MKLPATH)/64 -Wl,-rpath,$(MKLPATH)/64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lguide -lpthread -lm | |||
else | |||
LIBMKL = -L$(MKLPATH)/64 -Wl,-rpath,$(MKLPATH)/64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm | |||
endif | |||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame $(TOPDIR)/$(LIBNAME) -lgfortran -lpthread -lm | |||
LIBMLIB = ../../level1/others/libmisc.a -L/opt/intel/fc/ia64/9.1.040/lib -L/opt/mlib/lib \ | |||
-llapack -lguide -lifcore -lm -lpthread | |||
LIBSCSL = -L/opt/scsl/1.4.1.0/lib -Wl,-rpath,/opt/scsl/1.4.1.0/lib -lscs | |||
ifndef SMP | |||
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm | |||
else | |||
LIBATLAS = -L$(HOME)/misc/lib -L/usr/lib/atlas3.6.0p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm | |||
endif |
@@ -1,345 +0,0 @@ | |||
TOPDIR = . | |||
export GOTOBLAS_MAKEFILE = 1 | |||
-include $(TOPDIR)/Makefile.conf_last | |||
include ./Makefile.system | |||
LNCMD = ln -fs | |||
ifdef THELIBNAME | |||
LIBNAME=$(THELIBNAME) | |||
LIBSONAME=$(THELIBSONAME) | |||
endif | |||
ifeq ($(FIXED_LIBNAME), 1) | |||
LNCMD = true | |||
endif | |||
ifeq ($(INTERFACE64),1) | |||
USE_64BITINT=1 | |||
endif | |||
ifeq ($(USE_OPENMP),1) | |||
FOMP_OPT:= -fopenmp | |||
endif | |||
PREFIX ?= /opt/OpenBLAS | |||
OPENBLAS_INCLUDE_DIR := $(PREFIX)/include | |||
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib | |||
OPENBLAS_BINARY_DIR := $(PREFIX)/bin | |||
OPENBLAS_BUILD_DIR := $(CURDIR) | |||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE) | |||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | |||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | |||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig | |||
PKG_EXTRALIB := $(EXTRALIB) | |||
ifeq ($(INTERFACE64),1) | |||
SUFFIX64=64 | |||
endif | |||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc" | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(C_COMPILER), PGI) | |||
PKG_EXTRALIB += -lomp | |||
else | |||
PKG_EXTRALIB += -lgomp | |||
endif | |||
endif | |||
.PHONY : install | |||
.NOTPARALLEL : install | |||
lib.grd : | |||
$(error OpenBLAS: Please run "make" firstly) | |||
install : lib.grd | |||
@-mkdir -p "$(DESTDIR)$(PREFIX)" | |||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)" | |||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)" | |||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | |||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
#for inc | |||
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||
ifneq ($(NO_CBLAS),1) | |||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
@cp cblas.h cblas.tmp | |||
ifdef SYMBOLPREFIX | |||
@sed 's/cblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2 | |||
@sed 's/openblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp | |||
#change back any openblas_complex_float and double that got hit | |||
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2 | |||
@sed 's/goto[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp | |||
endif | |||
ifdef SYMBOLSUFFIX | |||
@sed 's/cblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2 | |||
@sed 's/openblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp | |||
#change back any openblas_complex_float and double that got hit | |||
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2 | |||
@sed 's/goto[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp | |||
endif | |||
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" | |||
endif | |||
ifneq ($(OSNAME), AIX) | |||
ifneq ($(NO_LAPACKE), 1) | |||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h" | |||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | |||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" | |||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" | |||
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" | |||
endif | |||
#for install static library | |||
ifneq ($(NO_STATIC),1) | |||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
endif | |||
#for install shared library | |||
ifneq ($(NO_SHARED),1) | |||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly)) | |||
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \ | |||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||
endif | |||
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD)) | |||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \ | |||
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
endif | |||
endif | |||
else | |||
#install on AIX has different options syntax | |||
ifneq ($(NO_LAPACKE), 1) | |||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h" | |||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | |||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" | |||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" | |||
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" | |||
endif | |||
#for install static library | |||
ifneq ($(NO_STATIC),1) | |||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | |||
endif | |||
#for install shared library | |||
ifneq ($(NO_SHARED),1) | |||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \ | |||
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||
endif | |||
endif | |||
#Generating openblas.pc | |||
ifeq ($(INTERFACE64),1) | |||
SUFFIX64=64 | |||
endif | |||
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc" | |||
@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | |||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)" | |||
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)" | |||
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)" | |||
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)" | |||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)" | |||
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)" | |||
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)" | |||
@echo 'version='$(VERSION) >> "$(PKGFILE)" | |||
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)" | |||
@cat openblas.pc.in >> "$(PKGFILE)" | |||
#Generating OpenBLASConfig.cmake | |||
ifneq ($(origin _OpenBLAS_ROOT_DIR),"undefined") | |||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
@echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
@echo "SET(OpenBLAS_INCLUDE_DIRS \$${_OpenBLAS_ROOT_DIR}/include)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
ifneq ($(NO_SHARED),1) | |||
#ifeq logical or | |||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly)) | |||
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | |||
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/bin/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
@echo "add_library(OpenBLAS::OpenBLAS SHARED IMPORTED)" | |||
@echo "target_include_directories(OpenBLAS::OpenBLAS INTERFACE \$${OpenBLAS_INCLUDE_DIRS})" | |||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | |||
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_LOCATION \$${OpenBLAS_LIBRARIES})" | |||
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_IMPLIB \$${_OpenBLAS_ROOT_DIR}/lib/libopenblas.lib)" | |||
endif | |||
else | |||
#only static | |||
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
else | |||
echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
ifneq ($(NO_SHARED),1) | |||
#ifeq logical or | |||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly)) | |||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | |||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
ifeq ($(OSNAME), Darwin) | |||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
else | |||
#only static | |||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||
endif | |||
endif | |||
endif | |||
#Generating OpenBLASConfigVersion.cmake | |||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||
@echo Install OK! | |||
install_tests : lib.grd | |||
ifneq ($(ONLY_CBLAS), 1) | |||
@install -m 666 utest/openblas_utest $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 utest/openblas_utest_ext $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
ifndef NO_FBLAS | |||
ifeq ($(BUILD_BFLOAT16),1) | |||
@install -m 666 test/test_sbgemm $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
ifeq ($(BUILD_SINGLE),1) | |||
@install -m 666 test/sblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/sblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/sblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/sblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/sblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
@install -m 666 test/dblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/dblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/dblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/dblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/dblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
@install -m 666 test/cblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/cblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/cblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/cblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/cblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) | |||
@install -m 666 test/cblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/cblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
@install -m 666 test/zblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/zblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/zblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/zblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/zblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) | |||
@install -m 666 test/zblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 test/zblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifneq ($(ONLY_CBLAS), 1) | |||
ifeq ($(BUILD_SINGLE),1) | |||
@install -m 666 ctest/xscblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xscblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xscblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/sin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/sin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
ifeq ($(BUILD_DOUBLE),1) | |||
@install -m 666 ctest/xdcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xdcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xdcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/din2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/din3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
ifeq ($(BUILD_COMPLEX),1) | |||
@install -m 666 ctest/xccblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xccblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xccblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/cin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/cin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) | |||
@install -m 666 ctest/xccblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/cin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
endif | |||
ifeq ($(BUILD_COMPLEX16),1) | |||
@install -m 666 ctest/xzcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xzcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/xzcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/zin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/zin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS)) | |||
@install -m 666 ctest/xzcblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 ctest/zin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CPP_THREAD_SAFETY_TEST), 1) | |||
@install -m 666 cpp_thread_test/dgemm_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
@install -m 666 cpp_thread_test/dgemv_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||
endif | |||
endif | |||
@@ -1,3 +0,0 @@ | |||
ifdef BINARY64 | |||
else | |||
endif |
@@ -1,4 +0,0 @@ | |||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs | |||
ifdef BINARY64 | |||
else | |||
endif |
@@ -1,4 +0,0 @@ | |||
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs | |||
ifdef BINARY64 | |||
else | |||
endif |
@@ -1,224 +0,0 @@ | |||
ifdef USE_THREAD | |||
ifeq ($(USE_THREAD), 0) | |||
USE_OPENMP = 0 | |||
else | |||
USE_OPENMP = 1 | |||
endif | |||
else | |||
USE_OPENMP = 1 | |||
endif | |||
ifeq ($(CORE), POWER10) | |||
ifneq ($(C_COMPILER), PGI) | |||
ifeq ($(C_COMPILER), GCC) | |||
ifeq ($(GCCVERSIONGTEQ10), 1) | |||
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math | |||
else ifneq ($(GCCVERSIONGT4), 1) | |||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) | |||
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math | |||
else | |||
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended) | |||
CCOMMON_OPT += -O3 -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math | |||
endif | |||
else | |||
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math | |||
endif | |||
ifeq ($(F_COMPILER), IBM) | |||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize | |||
else | |||
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), POWER9) | |||
ifneq ($(C_COMPILER), PGI) | |||
CCOMMON_OPT += -O3 -mvsx -fno-fast-math | |||
ifeq ($(C_COMPILER), GCC) | |||
ifneq ($(GCCVERSIONGT4), 1) | |||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) | |||
CCOMMON_OPT += -mcpu=power8 -mtune=power8 | |||
else | |||
CCOMMON_OPT += -mcpu=power9 -mtune=power9 | |||
endif | |||
else | |||
CCOMMON_OPT += -mcpu=power9 -mtune=power9 | |||
endif | |||
else | |||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align | |||
endif | |||
ifneq ($(F_COMPILER), PGI) | |||
ifeq ($(F_COMPILER), IBM) | |||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize | |||
else | |||
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9 | |||
endif | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
ifneq ($(GCCVERSIONGT4), 1) | |||
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended) | |||
FCOMMON_OPT += -mcpu=power8 -mtune=power8 | |||
else | |||
FCOMMON_OPT += -mcpu=power9 -mtune=power9 | |||
endif | |||
endif | |||
else | |||
FCOMMON_OPT += -O2 -Mrecursive | |||
endif | |||
endif | |||
ifeq ($(CORE), POWER8) | |||
ifneq ($(C_COMPILER), PGI) | |||
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math | |||
else | |||
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align | |||
endif | |||
ifneq ($(F_COMPILER), PGI) | |||
ifeq ($(OSNAME), AIX) | |||
ifeq ($(F_COMPILER), IBM) | |||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize | |||
else | |||
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
endif | |||
else | |||
ifeq ($(F_COMPILER), IBM) | |||
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize | |||
else | |||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math | |||
endif | |||
endif | |||
else | |||
FCOMMON_OPT += -O2 -Mrecursive | |||
endif | |||
endif | |||
ifeq ($(USE_OPENMP), 1) | |||
ifneq ($(C_COMPILER), PGI) | |||
CCOMMON_OPT += -DUSE_OPENMP -fopenmp | |||
else | |||
CCOMMON_OPT += -DUSE_OPENMP -mp | |||
endif | |||
ifeq ($(F_COMPILER), IBM) | |||
FCOMMON_OPT += -DUSE_OPENMP | |||
else | |||
ifneq ($(F_COMPILER), PGI) | |||
FCOMMON_OPT += -DUSE_OPENMP -fopenmp | |||
else | |||
FCOMMON_OPT += -DUSE_OPENMP -mp | |||
endif | |||
endif | |||
endif | |||
ifeq ($(C_COMPILER), CLANG) | |||
CCOMMON_OPT += -fno-integrated-as | |||
endif | |||
# workaround for C->FORTRAN ABI violation in LAPACKE | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
FCOMMON_OPT += -fno-optimize-sibling-calls | |||
endif | |||
FLAMEPATH = $(HOME)/flame/lib | |||
#ifeq ($(CORE), CELL) | |||
#CELL_SDK_ROOT = /opt/IBM/cell-sdk-1.1/sysroot/usr | |||
#SPU_CC = spu-gcc | |||
#EXTRALIB += -lspe | |||
#endif | |||
ifeq ($(OSNAME), Linux) | |||
ifdef BINARY64 | |||
# COMPILER_PREFIX = powerpc64-linux- | |||
else | |||
# COMPILER_PREFIX = powerpc-linux- | |||
endif | |||
endif | |||
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library | |||
#USE_MASS = 1 | |||
ifeq ($(USE_MASS), 1) | |||
# Path to MASS libs, change it if the libs are installed at any other location | |||
MASSPATH = /opt/ibm/xlmass/8.1.5/lib | |||
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS | |||
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8 | |||
endif | |||
ifdef BINARY64 | |||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX) | |||
$(error Using GCC and XLF on AIX is not a supported combination.) | |||
endif | |||
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX) | |||
$(error Using Clang and gFortran on AIX is not a supported combination.) | |||
endif | |||
ifeq ($(OSNAME), AIX) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -mpowerpc64 -maix64 | |||
else | |||
CCOMMON_OPT += -m64 | |||
endif | |||
ifeq ($(COMPILER_F77), g77) | |||
FCOMMON_OPT += -mpowerpc64 -maix64 | |||
endif | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
FCOMMON_OPT += -mpowerpc64 -maix64 | |||
endif | |||
ifeq ($(COMPILER_F77), xlf) | |||
FCOMMON_OPT += -q64 | |||
endif | |||
ARFLAGS = -X 64 | |||
ASFLAGS = -a64 | |||
endif | |||
else | |||
ifeq ($(OSNAME), AIX) | |||
CCOMMON_OPT += -Wa,-a32 | |||
ARFLAGS = -X 32 | |||
ASFLAGS = -a32 | |||
endif | |||
endif | |||
# CCOMMON_OPT += -maltivec -mabi=altivec | |||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS) | |||
ifeq ($(OSNAME), Darwin) | |||
CCOMMON_OPT += -force_cpusubtype_ALL | |||
endif | |||
ifndef BINARY64 | |||
ifeq ($(OSNAME), Linux) | |||
ESSLPATH = -L/opt/ibmcmp/lib -L/opt/ibmcmp/xlf/11.1/lib -Wl,-rpath,/opt/ibmcmp/lib -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib -lxlf90_r -lxlomp_ser -lxlfmath -lxl -lpthread | |||
else | |||
ESSLPATH = -lxlf90_r | |||
endif | |||
LIBVECLIB = -framework VecLib | |||
ifndef SMP | |||
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm | |||
LIBESSL = -lessl $(ESSLPATH) ../../level1/others/libmisc.a -lm | |||
else | |||
LIBATLAS = -L/usr/lib/atlas3.7.11p -lptf77blas -latlas -lm -lpthread | |||
LIBESSL = -lesslsmp $(ESSLPATH) ../../level1/others/libmisc.a -lm | |||
endif | |||
else | |||
ifeq ($(OSNAME), Linux) | |||
ESSLPATH = -L/opt/ibmcmp/lib64 -Wl,-rpath,/opt/ibmcmp/lib64 -L/opt/ibmcmp/xlf/11.1/lib64 -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib64 -lxlf90_r -lxlomp_ser | |||
else | |||
ESSLPATH = -lxlf90_r | |||
endif | |||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib | |||
ifndef SMP | |||
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm | |||
LIBESSL = -lessl $(ESSLPATH) -lm | |||
else | |||
LIBATLAS = -L/usr/lib64/atlas3.7.11p -lptf77blas -latlas -lm -lpthread | |||
LIBESSL = -lesslsmp $(ESSLPATH) -lxlsmp -lm | |||
endif | |||
endif |
@@ -1,113 +0,0 @@ | |||
# This is triggered by Makefile.system and runs before any of the code is built. | |||
export BINARY | |||
export USE_OPENMP | |||
ifdef DYNAMIC_ARCH | |||
override HOST_CFLAGS += -DDYNAMIC_ARCH | |||
endif | |||
ifdef TARGET_CORE | |||
TARGET_MAKE = Makefile_kernel.conf | |||
TARGET_CONF = config_kernel.h | |||
else | |||
TARGET_MAKE = Makefile.conf | |||
TARGET_CONF = config.h | |||
endif | |||
ifdef USE_PERL | |||
SCRIPTSUFFIX = .pl | |||
else | |||
SCRIPTSUFFIX = | |||
endif | |||
# CPUIDEMU = ../../cpuid/table.o | |||
ifdef CPUIDEMU | |||
EXFLAGS = -DCPUIDEMU -DVENDOR=99 | |||
endif | |||
ifeq ($(TARGET), MIPS24K) | |||
TARGET_FLAGS = -mips32r2 | |||
endif | |||
ifeq ($(TARGET), MIPS1004K) | |||
TARGET_FLAGS = -mips32r2 | |||
endif | |||
ifeq ($(TARGET), P5600) | |||
TARGET_FLAGS = -mips32r5 | |||
endif | |||
ifeq ($(TARGET), I6400) | |||
TARGET_FLAGS = -mips64r6 | |||
endif | |||
ifeq ($(TARGET), P6600) | |||
TARGET_FLAGS = -mips64r6 | |||
endif | |||
ifeq ($(TARGET), I6500) | |||
TARGET_FLAGS = -mips64r6 | |||
endif | |||
ifeq ($(TARGET), C910V) | |||
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), CK860FV) | |||
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float | |||
endif | |||
ifeq ($(TARGET), x280) | |||
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), RISCV64_ZVL256B) | |||
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), RISCV64_ZVL128B) | |||
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(TARGET), RISCV64_GENERIC) | |||
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d | |||
endif | |||
all: getarch_2nd | |||
./getarch_2nd 0 >> $(TARGET_MAKE) | |||
./getarch_2nd 1 >> $(TARGET_CONF) | |||
$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch | |||
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | |||
ifneq ($(ONLY_CBLAS), 1) | |||
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS) | |||
else | |||
#When we only build CBLAS, we set NOFORTRAN=2 | |||
echo "NOFORTRAN=2" >> $(TARGET_MAKE) | |||
echo "NO_FBLAS=1" >> $(TARGET_MAKE) | |||
echo "F_COMPILER=GFORTRAN" >> $(TARGET_MAKE) | |||
echo "BU=_" >> $(TARGET_MAKE) | |||
echo "#define BUNDERSCORE _" >> $(TARGET_CONF) | |||
echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF) | |||
endif | |||
./getarch 0 >> $(TARGET_MAKE) | |||
./getarch 1 >> $(TARGET_CONF) | |||
getarch : getarch.c cpuid.S dummy $(CPUIDEMU) | |||
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \ | |||
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \ | |||
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU) | |||
getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy | |||
ifndef TARGET_CORE | |||
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c | |||
else | |||
$(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c | |||
endif | |||
dummy: | |||
.PHONY: dummy |
@@ -1,20 +0,0 @@ | |||
ifeq ($(CORE), C910V) | |||
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 | |||
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static | |||
endif | |||
ifeq ($(CORE), x280) | |||
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static | |||
endif | |||
ifeq ($(CORE), RISCV64_ZVL256B) | |||
CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(CORE), RISCV64_ZVL128B) | |||
CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d | |||
endif | |||
ifeq ($(CORE), RISCV64_GENERIC) | |||
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | |||
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d | |||
endif |
@@ -1,337 +0,0 @@ | |||
# | |||
# Beginning of user configuration | |||
# | |||
# This library's version | |||
VERSION = 0.3.30.dev | |||
# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a | |||
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library | |||
# | |||
# LIBNAMEPREFIX = scipy | |||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
# is libopenblas_$(LIBNAMESUFFIX).so.0. | |||
# LIBNAMESUFFIX = omp | |||
# You can specify the target architecture, otherwise it's | |||
# automatically detected. | |||
# TARGET = PENRYN | |||
# If you want to support multiple architecture in one binary | |||
# DYNAMIC_ARCH = 1 | |||
# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH | |||
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON, | |||
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures) | |||
# DYNAMIC_OLDER = 1 | |||
# C compiler including binary type(32bit / 64bit). Default is gcc. | |||
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect. | |||
# CC = gcc | |||
# Fortran compiler. Default is g77. | |||
# FC = gfortran | |||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC. | |||
# cross compiler for Windows | |||
# CC = x86_64-w64-mingw32-gcc | |||
# FC = x86_64-w64-mingw32-gfortran | |||
# cross compiler for 32bit ARM | |||
# CC = arm-linux-gnueabihf-gcc | |||
# FC = arm-linux-gnueabihf-gfortran | |||
# cross compiler for 64bit ARM | |||
# CC = aarch64-linux-gnu-gcc | |||
# FC = aarch64-linux-gnu-gfortran | |||
# If you use the cross compiler, please set this host compiler. | |||
# HOSTCC = gcc | |||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 | |||
# Please note that AVX is not available on 32-bit. | |||
# Setting BINARY=32 disables AVX/AVX2/AVX-512. | |||
# BINARY=64 | |||
# About threaded BLAS. It will be automatically detected if you don't | |||
# specify it. | |||
# For force setting for single threaded, specify USE_THREAD = 0 | |||
# For force setting for multi threaded, specify USE_THREAD = 1 | |||
# USE_THREAD = 0 | |||
# If you want to build a single-threaded OpenBLAS, but expect to call this | |||
# from several concurrent threads in some other program, comment this in for | |||
# thread safety. (This is done automatically for USE_THREAD=1 , and should not | |||
# be necessary when USE_OPENMP=1) | |||
# USE_LOCKING = 1 | |||
# If you're going to use this library with OpenMP, please comment it in. | |||
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8. | |||
# USE_OPENMP = 1 | |||
# The OpenMP scheduler to use - by default this is "static" and you | |||
# will normally not want to change this unless you know that your main | |||
# workload will involve tasks that have highly unbalanced running times | |||
# for individual threads. Changing away from "static" may also adversely | |||
# affect memory access locality in NUMA systems. Setting to "runtime" will | |||
# allow you to select the scheduler from the environment variable OMP_SCHEDULE | |||
# CCOMMON_OPT += -DOMP_SCHED=dynamic | |||
# You can define the maximum number of threads. Basically it should be less | |||
# than or equal to the number of CPU threads. If you don't specify one, it's | |||
# automatically detected by the build system. | |||
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to | |||
# restrict NUM_THREADS to the number of physical cores. By default, the automatic | |||
# detection includes logical CPUs, thus allowing the use of SMT. | |||
# Users may opt at runtime to use less than NUM_THREADS threads. | |||
# | |||
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS | |||
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way | |||
# some internal structures are allocated, using a large NUM_THREADS value has a RAM | |||
# footprint penalty, even if users reduce the actual number of threads at runtime. | |||
# NUM_THREADS = 24 | |||
# If you have enabled USE_OPENMP and your application would call | |||
# OpenBLAS's calculation API from multiple threads, please comment this in. | |||
# This flag defines how many instances of OpenBLAS's calculation API can actually | |||
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API, | |||
# they need to wait for the preceding API calls to finish or risk data corruption. | |||
# NUM_PARALLEL = 2 | |||
# When multithreading, OpenBLAS needs to use a memory buffer for communicating | |||
# and collating results for individual subranges of the original matrix. Since | |||
# the original GotoBLAS of the early 2000s, the default size of this buffer has | |||
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC. | |||
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment | |||
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value | |||
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc) | |||
# BUFFERSIZE = 25 | |||
# If you don't need to install the static library, please comment this in. | |||
# NO_STATIC = 1 | |||
# If you don't need to generate the shared library, please comment this in. | |||
# NO_SHARED = 1 | |||
# If you don't need the CBLAS interface, please comment this in. | |||
# NO_CBLAS = 1 | |||
# If you only want the CBLAS interface without installing a Fortran compiler, | |||
# please comment this in. | |||
# ONLY_CBLAS = 1 | |||
# If you don't need LAPACK, please comment this in. | |||
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1. | |||
# NO_LAPACK = 1 | |||
# If you don't need LAPACKE (C Interface to LAPACK), please comment this in. | |||
# NO_LAPACKE = 1 | |||
# Build LAPACK Deprecated functions since LAPACK 3.6.0 | |||
BUILD_LAPACK_DEPRECATED = 1 | |||
# The variable type assumed for the length of character arguments when passing | |||
# data between Fortran LAPACK and C BLAS (defaults to "size_t", but older GCC | |||
# versions used "int"). Mismatches will not cause runtime failures but may result | |||
# in build warnings or errors when building with link-time optimization (LTO) | |||
# LAPACK_STRLEN=int | |||
# Build RecursiveLAPACK on top of LAPACK | |||
# BUILD_RELAPACK = 1 | |||
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of | |||
# just adding its equivalents with a RELAPACK_ prefix | |||
# RELAPACK_REPLACE = 1 | |||
# If you want to use the legacy threaded Level 3 implementation. | |||
# USE_SIMPLE_THREADED_LEVEL3 = 1 | |||
# If you want to use the new, still somewhat experimental code that uses | |||
# thread-local storage instead of a central memory buffer in memory.c | |||
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21 | |||
# for this to work. | |||
# USE_TLS = 1 | |||
# If you want to drive whole 64bit region by BLAS. Not all Fortran | |||
# compilers support this. It's safe to keep this commented out if you | |||
# are not sure. (This is equivalent to the "-i8" ifort option). | |||
# INTERFACE64 = 1 | |||
# Unfortunately most of kernel won't give us high quality buffer. | |||
# BLAS tries to find the best region before entering main function, | |||
# but it will consume time. If you don't like it, you can disable one. | |||
NO_WARMUP = 1 | |||
# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling. | |||
# This feature is only implemented on Linux, and is always disabled on other platforms. | |||
# Enabling affinity handling may improve performance, especially on NUMA systems, but | |||
# it may conflict with certain applications that also try to manage affinity. | |||
# This conflict can result in threads of the application calling OpenBLAS ending up locked | |||
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core. | |||
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing | |||
# else modifies affinity settings. | |||
# Note: enabling affinity has been known to cause problems with NumPy and R | |||
NO_AFFINITY = 1 | |||
# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus | |||
# BIGNUMA = 1 | |||
# If you are compiling for an embedded system ("bare metal") like Cortex M series | |||
# Note that you will have to provide implementations of malloc() and free() in this case | |||
# EMBEDDED = 1 | |||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers | |||
# and OS. However, the performance is low. | |||
# NO_AVX = 1 | |||
# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6) | |||
# NO_AVX2 = 1 | |||
# Don't use SkylakeX optimizations if binutils or compiler are too old (the build | |||
# system will try to determine this automatically) | |||
# NO_AVX512 = 1 | |||
# Don't use parallel make. | |||
# NO_PARALLEL_MAKE = 1 | |||
# Force number of make jobs. The default is the number of logical CPU of the host. | |||
# This is particularly useful when using distcc. | |||
# A negative value will disable adding a -j flag to make, allowing to use a parent | |||
# make -j value. This is useful to call OpenBLAS make from an other project | |||
# makefile | |||
# MAKE_NB_JOBS = 2 | |||
# If you would like to know minute performance report of GotoBLAS. | |||
# FUNCTION_PROFILE = 1 | |||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing) | |||
# This option should not be used - it is a holdover from unfinished code present | |||
# in the original GotoBLAS2 library that may be usable as a starting point but | |||
# is not even expected to compile in its present form. | |||
# QUAD_PRECISION = 1 | |||
# Theads are still working for a while after finishing BLAS operation | |||
# to reduce thread activate/deactivate overhead. You can determine | |||
# time out to improve performance. This number should be from 4 to 30 | |||
# which corresponds to (1 << n) cycles. For example, if you set to 26, | |||
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz | |||
# system). Also you can control this number by THREAD_TIMEOUT | |||
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26 | |||
# Using special device driver for mapping physically contiguous memory | |||
# to the user space. If bigphysarea is enabled, it will use it. | |||
# DEVICEDRIVER_ALLOCATION = 1 | |||
# Use large page allocation (called hugepage support in Linux context) | |||
# for the thread buffers (with access by shared memory operations) | |||
# HUGETLB_ALLOCATION = 1 | |||
# Use large page allocation called hugepages in Linux) based on mmap accessing | |||
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system, | |||
# the example below has it mounted on /hugepages. OpenBLAS will create the backing | |||
# file as gotoblas.processid in that path) | |||
# HUGETLBFILE_ALLOCATION = /hugepages | |||
# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only). | |||
# CONSISTENT_FPCSR = 1 | |||
# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute | |||
# with single thread. (Actually in recent versions this is a factor proportional to the | |||
# number of floating point operations necessary for the given problem size, no longer | |||
# an individual dimension). You can use this setting to avoid the overhead of multi- | |||
# threading in small matrix sizes. The default value is 4, but values as high as 50 have | |||
# been reported to be optimal for certain workloads (50 is the recommended value for Julia). | |||
# GEMM_MULTITHREAD_THRESHOLD = 4 | |||
# If you need sanity check by comparing results to reference BLAS. It'll be very | |||
# slow (Not implemented yet). | |||
# SANITY_CHECK = 1 | |||
# The installation directory. | |||
# PREFIX = /opt/OpenBLAS | |||
# Common Optimization Flag; | |||
# The default -O2 is enough. | |||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT | |||
# COMMON_OPT = -O2 | |||
# gfortran option for LAPACK to improve thread-safety | |||
# It is enabled by default in Makefile.system for gfortran | |||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT | |||
# FCOMMON_OPT = -frecursive | |||
# Profiling flags | |||
COMMON_PROF = -pg | |||
# Build Debug version | |||
# DEBUG = 1 | |||
# Set maximum stack allocation. | |||
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV | |||
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482 | |||
# | |||
# MAX_STACK_ALLOC = 0 | |||
# Add a prefix or suffix to all exported symbol names in the shared library. | |||
# Avoid conflicts with other BLAS libraries, especially when using | |||
# 64 bit integer interfaces in OpenBLAS. | |||
# For details, https://github.com/xianyi/OpenBLAS/pull/459 | |||
# | |||
# The same prefix and suffix are also added to the library name, | |||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas | |||
# | |||
# SYMBOLPREFIX= | |||
# SYMBOLSUFFIX= | |||
# Run a C++ based thread safety tester after the build is done. | |||
# This is mostly intended as a developer feature to spot regressions, but users and | |||
# package maintainers can enable this if they have doubts about the thread safety of | |||
# the library, given the configuration in this file. | |||
# By default, the thread safety tester launches 52 concurrent calculations at the same | |||
# time. | |||
# | |||
# Please note that the test uses ~1300 MiB of RAM for the DGEMM test. | |||
# | |||
# The test requires CBLAS to be built, a C++11 capable compiler and the presence of | |||
# an OpenMP implementation. If you are cross-compiling this test will probably not | |||
# work at all. | |||
# | |||
# CPP_THREAD_SAFETY_TEST = 1 | |||
# | |||
# use this to run only the less memory-hungry GEMV test | |||
# CPP_THREAD_SAFETY_GEMV = 1 | |||
# If you want to enable the experimental BFLOAT16 support | |||
# BUILD_BFLOAT16 = 1 | |||
# If you want to enable the experimental HFLOAT16 support | |||
# BUILD_HFLOAT16 = 1 | |||
# Set the thread number threshold beyond which the job array for the threaded level3 BLAS | |||
# will be allocated on the heap rather than the stack. (This array alone requires | |||
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu | |||
# counts, but obviously it is not the only item that ends up on the stack. | |||
# The default value of 32 ensures that the overall requirement is compatible | |||
# with the default 1MB stacksize imposed by having the Java VM loaded without use | |||
# of its -Xss parameter. | |||
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible | |||
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java | |||
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code | |||
# BLAS3_MEM_ALLOC_THRESHOLD = 160 | |||
# By default the library contains BLAS functions (and LAPACK if selected) for all input types. | |||
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only | |||
# the functions for complex numbers, uncomment the desired type(s) below | |||
# BUILD_SINGLE = 1 | |||
# BUILD_DOUBLE = 1 | |||
# BUILD_COMPLEX = 1 | |||
# BUILD_COMPLEX16 = 1 | |||
# | |||
# End of user configuration | |||
# |
@@ -1,48 +0,0 @@ | |||
CPP = $(CC) -E | |||
RANLIB = ranlib | |||
ifdef BINARY64 | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -mcpu=v9 -m64 | |||
else | |||
CCOMMON_OPT += -m64 | |||
endif | |||
ifeq ($(COMPILER_F77), g77) | |||
FCOMMON_OPT += -mcpu=v9 -m64 | |||
endif | |||
ifeq ($(COMPILER_F77), f95) | |||
FCOMMON_OPT += -m64 | |||
endif | |||
else | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -mcpu=v9 | |||
else | |||
CCOMMON_OPT += -xarch=v9 | |||
endif | |||
ifeq ($(COMPILER_F77), g77) | |||
FCOMMON_OPT += -mcpu=v9 | |||
endif | |||
ifeq ($(COMPILER_F77), f95) | |||
FCOMMON_OPT += -xarch=v8plusb | |||
endif | |||
endif | |||
LIBNAME = $(LIBPREFIX).a | |||
ifndef SMP | |||
LIBCXML = -L/opt/SUNWspro/lib/v9 | |||
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm | |||
else | |||
LIBCXML = -lcxmlp -lots -lm | |||
endif | |||
ifdef BINARY64 | |||
LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \ | |||
-Wl,-R,/opt/SUNWspro/lib/v9 -lsunperf -lompstubs -lfui -lfsu -lsunmath | |||
else | |||
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ | |||
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath | |||
endif |
@@ -1,658 +0,0 @@ | |||
############################################################################### | |||
# Copyright (c) 2025, The OpenBLAS Project | |||
# All rights reserved. | |||
# Redistribution and use in source and binary forms, with or without | |||
# modification, are permitted provided that the following conditions are | |||
# met: | |||
# 1. Redistributions of source code must retain the above copyright | |||
# notice, this list of conditions and the following disclaimer. | |||
# 2. Redistributions in binary form must reproduce the above copyright | |||
# notice, this list of conditions and the following disclaimer in | |||
# the documentation and/or other materials provided with the | |||
# distribution. | |||
# 3. Neither the name of the OpenBLAS project nor the names of | |||
# its contributors may be used to endorse or promote products | |||
# derived from this software without specific prior written permission. | |||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |||
# POSSIBILITY OF SUCH DAMAGE. | |||
############################################################################### | |||
BBLASOBJS_P = $(BBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
SHBLASPBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
BLASOBJS = $(SHBLASOBJS) $(BBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS) | |||
BLASOBJS_P = $(SHBLASPBJS_P) $(BBLASOBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P) | |||
ifdef EXPRECISION | |||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | |||
endif | |||
ifdef QUAD_PRECISION | |||
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | |||
endif | |||
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHFLOAT16 -UDOUBLE -UCOMPLEX | |||
$(BBLASOBJS) $(BBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX | |||
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | |||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | |||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | |||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | |||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | |||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | |||
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||
$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(BBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
libs :: $(BLASOBJS) $(COMMONOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
prof :: $(BLASOBJS_P) $(COMMONOBJS_P) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^ | |||
hpl :: $(HPLOBJS) $(COMMONOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
hpl_p :: $(HPLOBJS_P) $(COMMONOBJS_P) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^ | |||
kernel :: $(BLASOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
commonlibs :: $(COMMONOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
commonprof :: $(COMMONOBJS_P) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^ | |||
quick : | |||
$(MAKE) -C $(TOPDIR) libs | |||
bms.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmd.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmd-k.$(SUFFIX):bm-k.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
ifdef QUAD_PRECISION | |||
bmq.$(SUFFIX):bmq.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmx.$(SUFFIX):bmx.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
else | |||
bmq.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmx.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
endif | |||
bmc.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
bmz.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
bmd_nn.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DNN -c $< -o $(@F) | |||
bmd_nt.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DNT -c $< -o $(@F) | |||
bmd_tn.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DTN -c $< -o $(@F) | |||
bmd_tt.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DTT -c $< -o $(@F) | |||
bm-phy.$(SUFFIX):bm-phy.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bms.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmd.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
ifdef QUAD_PRECISION | |||
bmq.$(PSUFFIX):bmq.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmx.$(PSUFFIX):bmx.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
else | |||
bmq.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
bmx.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
endif | |||
bmc.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
bmz.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(PFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
bms : bms.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd : bmd.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) -lm | |||
bmd-k : bmd-k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) -lm | |||
bmq : bmq.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmc : bmc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) $(FEXTRALIB) | |||
bmz : bmz.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmx : bmx.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd_nn : bmd_nn.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd_nt : bmd_nt.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd_tn : bmd_tn.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd_tt : bmd_tt.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bm-phy:bm-phy.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmcc : bmcc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmzc : bmzc.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bms.prof : bms.$(PSUFFIX) $(SBLASOBJS_P) $(COMMONOBJS_P) $(SOBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmd.prof : bmd.$(PSUFFIX) $(DBLASOBJS_P) $(COMMONOBJS_P) $(DOBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmq.prof : bmq.$(PSUFFIX) $(QBLASOBJS_P) $(COMMONOBJS_P) $(QOBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmc.prof : bmc.$(PSUFFIX) $(CBLASOBJS_P) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmz.prof : bmz.$(PSUFFIX) $(ZBLASOBJS_P) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bmx.prof : bmz.$(PSUFFIX) $(XBLASOBJS_P) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS_P) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bms.cxml : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML) | |||
bmd.cxml : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML) | |||
bmc.cxml : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML) | |||
bmz.cxml : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML) | |||
bms.scsl : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) | |||
bmd.scsl : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) | |||
bmc.scsl : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) | |||
bmz.scsl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) | |||
bms.acml : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
bmd.acml : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
bmc.acml : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
bmz.acml : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
bms.sun : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB) | |||
bmd.sun : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB) | |||
bmc.sun : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB) | |||
bmz.sun : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB) | |||
bms.atlas : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
bmd.atlas : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
bmc.atlas : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
bmz.atlas : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
bms.essl : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) $(FCOMMON_OPT) -o $(@F) $^ $(LIBESSL) | |||
bmd.essl : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL) | |||
bmc.essl : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(F77) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL) | |||
bmz.essl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL) | |||
bms.flame : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) | |||
bmd.flame : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) | |||
bmc.flame : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) | |||
bmz.flame : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) | |||
bms.flame.prof : bms.$(SUFFIX) $(SOBJS) $(OBJS_P) | |||
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME) | |||
bmd.flame.prof : bmd.$(SUFFIX) $(DOBJS) $(OBJS_P) | |||
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME) | |||
bmc.flame.prof : bmc.$(SUFFIX) $(COBJS) $(OBJS_P) | |||
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME) | |||
bmz.flame.prof : bmz.$(SUFFIX) $(ZOBJS) $(OBJS_P) | |||
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME) | |||
bms.mkl : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bmd.mkl : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bmc.mkl : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bmz.mkl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bmq.mkl : bmq.$(SUFFIX) $(QOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bms.mkl.prof : bms.$(PSUFFIX) $(SOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL) | |||
bmd.mkl.prof : bmd.$(PSUFFIX) $(DOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL) | |||
bmc.mkl.prof : bmc.$(PSUFFIX) $(COBJS) $(OBJS) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL) | |||
bmz.mkl.prof : bmz.$(PSUFFIX) $(ZOBJS) $(OBJS) | |||
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL) | |||
bms.mlib : bms.$(SUFFIX) $(SOBJS) $(OBJS) | |||
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB) | |||
bmd.mlib : bmd.$(SUFFIX) $(DOBJS) $(OBJS) | |||
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB) | |||
bmc.mlib : bmc.$(SUFFIX) $(COBJS) $(OBJS) | |||
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB) | |||
bmz.mlib : bmz.$(SUFFIX) $(ZOBJS) $(OBJS) | |||
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB) | |||
bms.veclib : bms.$(SUFFIX) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB) | |||
bmd.veclib : bmd.$(SUFFIX) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB) | |||
bmc.veclib : bmc.$(SUFFIX) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB) | |||
bmz.veclib : bmz.$(SUFFIX) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB) | |||
bms.fuji : bms.$(SUFFIX) $(SOBJS) | |||
ifndef SMP | |||
fcc -KV9FMADD -SSL2 -o $(@F) $^ | |||
else | |||
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^ | |||
endif | |||
bmd.fuji : bmd.$(SUFFIX) $(DOBJS) | |||
ifndef SMP | |||
fcc -KV9FMADD -SSL2 -o $(@F) $^ | |||
else | |||
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^ | |||
endif | |||
bmc.fuji : bmc.$(SUFFIX) $(COBJS) | |||
ifndef SMP | |||
fcc -KV9FMADD -SSL2 -o $(@F) $^ | |||
else | |||
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^ | |||
endif | |||
bmz.fuji : bmz.$(SUFFIX) $(ZOBJS) | |||
ifndef SMP | |||
fcc -KV9FMADD -SSL2 -o $(@F) $^ | |||
else | |||
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^ | |||
endif | |||
bench: bench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
bench.$(SUFFIX): bench.c | |||
$(CC) -c -o $(@F) $(CFLAGS) $^ | |||
bench_old: bench_old.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
kbench: kbench.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
prebench: prebench.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
kbench_rank_k: kbench_rank_k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
smallbench: smallbench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
smallbench.mkl: smallbench.$(SUFFIX) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bench.sun: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB) | |||
bench.cxml: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML) | |||
bench.atlas: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
bench.essl: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) ../../level1/others/libmisc.$(LIBSUFFIX) | |||
bench.scsl: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) $(EXTRALIB) $(CEXTRALIB) | |||
bench.acml: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBACML) $(EXTRALIB) $(CEXTRALIB) | |||
bench.flame: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB) | |||
kbench.mkl: kbench.$(SUFFIX) $(OBJS) | |||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bench.mkl: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bench_old.mkl: bench_old.$(SUFFIX) $(OBJS) | |||
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB) | |||
bench.mlib: bench.$(SUFFIX) $(OBJS) | |||
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB) | |||
bench.veclib: bench.$(SUFFIX) $(OBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB) | |||
params : params.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramd : paramd.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramq : paramq.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramc : paramc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramz : paramz.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramx : paramx.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
params-ex : params-ex.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramd-ex : paramd-ex.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramq-ex : paramq-ex.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramc-ex : paramc-ex.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramz-ex : paramz-ex.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
paramx-ex : paramx-ex.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) | |||
params.atlas : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
paramd.atlas : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
paramc.atlas : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
paramz.atlas : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS) | |||
params.sun : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) | |||
paramd.sun : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) | |||
paramc.sun : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) | |||
paramz.sun : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) | |||
params.essl : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) | |||
paramd.essl : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) | |||
paramc.essl : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) | |||
paramz.essl : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) | |||
params.mkl : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) | |||
paramd.mkl : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) | |||
paramc.mkl : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) | |||
paramz.mkl : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) | |||
params.acml : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
paramd.acml : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
paramc.acml : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
paramz.acml : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML) | |||
params.flame : params.$(SUFFIX) $(OBJS) $(SOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB) | |||
paramd.flame : paramd.$(SUFFIX) $(OBJS) $(DOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB) | |||
paramc.flame : paramc.$(SUFFIX) $(OBJS) $(COBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB) | |||
paramz.flame : paramz.$(SUFFIX) $(OBJS) $(ZOBJS) | |||
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB) | |||
params.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramd.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramq.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramc.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
paramz.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
paramx.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
params-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramd-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramq-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F) | |||
paramc-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
paramz-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
paramx-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h | |||
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F) | |||
gen_insn_flash.c : | |||
echo '#include <stdio.h>' > gen_insn_flash.c | |||
echo '#include <stdlib.h>' >> gen_insn_flash.c | |||
echo '#define ICACHE_SIZE ( 256 << 10)' >> gen_insn_flash.c | |||
echo 'int main(void){' >> gen_insn_flash.c | |||
echo 'int i;' >> gen_insn_flash.c | |||
echo '#ifdef __alpha' >> gen_insn_flash.c | |||
echo 'printf(".set noat;.set noreorder;\n");' >> gen_insn_flash.c | |||
echo 'printf(".arch ev6;.text;.align 5\n");' >> gen_insn_flash.c | |||
echo 'printf(".globl insn_flash\n");' >> gen_insn_flash.c | |||
echo 'printf(".ent insn_flash\n");' >> gen_insn_flash.c | |||
echo 'printf("insn_flash:\n");' >> gen_insn_flash.c | |||
echo 'for (i = 0; i < ICACHE_SIZE / 4; i++)' >> gen_insn_flash.c | |||
echo 'printf("br 1f\n 1:\n");' >> gen_insn_flash.c | |||
echo 'printf(".align 5;ret;.end insn_flash\n");'>> gen_insn_flash.c | |||
echo '#else' >> gen_insn_flash.c | |||
echo 'printf(".text;.align 32\n");' >> gen_insn_flash.c | |||
echo 'printf(".globl insn_flash\n");' >> gen_insn_flash.c | |||
echo 'printf("insn_flash:\n");' >> gen_insn_flash.c | |||
echo 'for (i = 0; i < ICACHE_SIZE / 2; i++)' >> gen_insn_flash.c | |||
echo 'printf("jmp 1f\n 1:\n");' >> gen_insn_flash.c | |||
echo 'printf(".align 32;ret\n");' >> gen_insn_flash.c | |||
echo '#endif' >> gen_insn_flash.c | |||
echo 'return 0;' >> gen_insn_flash.c | |||
echo '}' >> gen_insn_flash.c | |||
insn_flash.$(SUFFIX) : gen_insn_flash | |||
./gen_insn_flash > temp.s | |||
$(AS) -o $(@F) temp.s | |||
rm -f temp.s | |||
dummy : | |||
clean :: | |||
@if test -d $(ARCH); then \ | |||
(cd $(ARCH) && $(MAKE) clean) \ | |||
fi | |||
@find . -name '*.o' | xargs rm -rf | |||
@rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \ | |||
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \ | |||
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \ | |||
*.pc *.pcl *.def *.i *.prof linktest.c \ | |||
bms bmd bmc bmz bmq bmx \ | |||
params paramd paramc paramz paramq paramx \ | |||
params-ex paramd-ex paramc-ex paramz-ex paramq-ex paramx-ex \ | |||
bench tpp kbench kbench2 \ | |||
*.mkl *.sun *.acml *.cxml *.essl *.atlas *.scsl *.mlib *.veclib *.fuji |
@@ -1,89 +0,0 @@ | |||
# COMPILER_PREFIX = mingw32- | |||
ifneq ($(DYNAMIC_ARCH),1) | |||
ADD_CPUFLAGS = 1 | |||
else | |||
ifdef TARGET_CORE | |||
ADD_CPUFLAGS = 1 | |||
endif | |||
endif | |||
ifdef ADD_CPUFLAGS | |||
ifdef HAVE_SSE | |||
CCOMMON_OPT += -msse | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -msse | |||
endif | |||
endif | |||
endif | |||
ifeq ($(OSNAME), Interix) | |||
ARFLAGS = -m x86 | |||
endif | |||
ifndef SMP | |||
LIBMKL = -L$(MKLPATH)/32 -Wl,-rpath,$(MKLPATH)/32 -lmkl_intel -lmkl_sequential -lmkl_core -lguide -lpthread -lm | |||
else | |||
LIBMKL = -L$(MKLPATH)/32 -Wl,-rpath,$(MKLPATH)/32 -lmkl_intel -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm | |||
endif | |||
# LIBMKL = -L$(MKLPATH)/32 -lmkl_lapack -lmkl_ia32 -lguide -lpthread -lm | |||
ifndef SMP | |||
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm | |||
else | |||
LIBATLAS = -L$(ATLAS) -lptf77blas -latlas -lpthread -lg2c -lm | |||
endif | |||
ifeq ($(COMPILER_F77), g77) | |||
LIBACML = -L$(ACMLPATH)/gnu32/lib -Wl,-rpath,$(ACMLPATH)/gnu32/lib -lacml -lg2c | |||
endif | |||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS) | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/gfortran32/lib -Wl,-rpath,$(ACMLPATH)/gfortran32/lib -lacml -lgfortran -lm | |||
else | |||
LIBACML = -L$(ACMLPATH)/gfortran32_mp/lib -Wl,-rpath,$(ACMLPATH)/gfortran32_mp/lib -lacml_mp -lgfortran -lgomp -lm | |||
endif | |||
endif | |||
ifeq ($(COMPILER_F77), pgf77) | |||
LIBACML = -L$(ACMLPATH)/pgi32/lib -lacml -L/opt/pgi/linux86-64/5.2/lib -lpgftnrtl -lnspgc -lpgc | |||
endif | |||
ifeq ($(F_COMPILER), PATHSCALE) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/pathscale32/lib -Wl,-rpath,$(ACMLPATH)/pathscale32/lib -lacml -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lpathfortran -lm | |||
else | |||
LIBACML = -L$(ACMLPATH)/pathscale32_mp/lib -Wl,-rpath,$(ACMLPATH)/pathscale32_mp/lib -lacml_mp -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lopenmp -lpathfortran -lm | |||
endif | |||
endif | |||
LIBSUNPERF = -L/opt/SUNWspro/lib/sse2 -Wl,-R,/opt/SUNWspro/lib/sse2 -lsunperf | |||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib | |||
ifndef SMP | |||
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm | |||
else | |||
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm | |||
endif | |||
ifdef HAVE_SSE2 | |||
CCOMMON_OPT += -msse2 | |||
FCOMMON_OPT += -msse2 | |||
endif | |||
ifdef HAVE_SSE3 | |||
CCOMMON_OPT += -msse3 | |||
FCOMMON_OPT += -msse3 | |||
ifdef HAVE_SSSE3 | |||
CCOMMON_OPT += -mssse3 | |||
FCOMMON_OPT += -mssse3 | |||
endif | |||
ifdef HAVE_SSE4_1 | |||
CCOMMON_OPT += -msse4.1 | |||
FCOMMON_OPT += -msse4.1 | |||
endif | |||
endif | |||
@@ -1,299 +0,0 @@ | |||
# CCOMMON_OPT += -DFASTCPU | |||
ifeq ($(OSNAME), SunOS) | |||
ifdef BINARY64 | |||
ifeq ($(F_COMPILER), SUN) | |||
FCOMMON_OPT += -m64 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(C_COMPILER), CLANG) | |||
ifeq ($(findstring icx,$(CC)),icx) | |||
CCOMMON_OPT += -fp-model=consistent | |||
endif | |||
endif | |||
ifneq ($(DYNAMIC_ARCH),1) | |||
ADD_CPUFLAGS = 1 | |||
else | |||
ifdef TARGET_CORE | |||
ADD_CPUFLAGS = 1 | |||
endif | |||
endif | |||
ifdef ADD_CPUFLAGS | |||
ifdef HAVE_SSE3 | |||
CCOMMON_OPT += -msse3 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -msse3 | |||
endif | |||
endif | |||
ifdef HAVE_SSSE3 | |||
CCOMMON_OPT += -mssse3 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mssse3 | |||
endif | |||
endif | |||
ifdef HAVE_SSE4_1 | |||
CCOMMON_OPT += -msse4.1 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -msse4.1 | |||
endif | |||
endif | |||
ifndef OLDGCC | |||
ifdef HAVE_AVX | |||
CCOMMON_OPT += -mavx | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mavx | |||
endif | |||
endif | |||
endif | |||
ifndef NO_AVX2 | |||
ifdef HAVE_AVX2 | |||
CCOMMON_OPT += -mavx2 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -mavx2 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), SKYLAKEX) | |||
ifndef NO_AVX512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), COOPERLAKE) | |||
ifndef NO_AVX512 | |||
ifeq ($(C_COMPILER), GCC) | |||
# cooperlake support was added in 10.1 | |||
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) | |||
CCOMMON_OPT += -march=cooperlake | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=cooperlake | |||
endif | |||
else # gcc not support, fallback to avx512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
endif | |||
else ifeq ($(C_COMPILER), CLANG) | |||
# cooperlake support was added in clang 9 | |||
ifeq ($(CLANGVERSIONGTEQ9), 1) | |||
CCOMMON_OPT += -march=cooperlake | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=cooperlake | |||
endif | |||
else # not supported in clang, fallback to avx512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), SAPPHIRERAPIDS) | |||
ifndef NO_AVX512 | |||
ifeq ($(C_COMPILER), GCC) | |||
# sapphire rapids support was added in 11 | |||
ifeq ($(GCCVERSIONGTEQ11), 1) | |||
CCOMMON_OPT += -march=sapphirerapids | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=sapphirerapids | |||
endif | |||
else # gcc not support, fallback to avx512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
endif | |||
else ifeq ($(C_COMPILER), CLANG) | |||
# sapphire rapids support was added in clang 12 | |||
ifeq ($(CLANGVERSIONGTEQ12), 1) | |||
CCOMMON_OPT += -march=sapphirerapids | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=sapphirerapids | |||
endif | |||
else # not supported in clang, fallback to avx512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
endif | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq ($(CORE), ZEN) | |||
ifdef HAVE_AVX512VL | |||
ifndef NO_AVX512 | |||
CCOMMON_OPT += -march=skylake-avx512 | |||
ifneq ($(F_COMPILER), NAG) | |||
FCOMMON_OPT += -march=skylake-avx512 | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
FCOMMON_OPT += -fno-asynchronous-unwind-tables | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifdef HAVE_AVX2 | |||
ifndef NO_AVX2 | |||
ifeq ($(C_COMPILER), GCC) | |||
# AVX2 support was added in 4.7.0 | |||
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) | |||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) | |||
CCOMMON_OPT += -mavx2 | |||
endif | |||
else | |||
ifeq ($(C_COMPILER), CLANG) | |||
CCOMMON_OPT += -mavx2 | |||
endif | |||
endif | |||
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
# AVX2 support was added in 4.7.0 | |||
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4) | |||
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5) | |||
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7) | |||
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) | |||
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) | |||
FCOMMON_OPT += -mavx2 | |||
endif | |||
else | |||
ifeq ($(F_COMPILER), FLANG) | |||
FCOMMON_OPT += -mavx2 | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
endif | |||
ifeq ($(OSNAME), Interix) | |||
ARFLAGS = -m x64 | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
ifndef SMP | |||
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lguide -lpthread -lm | |||
else | |||
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm | |||
endif | |||
else | |||
ifndef SMP | |||
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lguide -lpthread -lm | |||
else | |||
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -Wl,--start-group -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -Wl,--end-group -lguide -lpthread -lm | |||
endif | |||
endif | |||
ifndef SMP | |||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm | |||
else | |||
LIBATLAS = -L$(ATLASPATH)64 -llapack -lptcblas -lptf77blas -latlas -lpthread -lm | |||
endif | |||
LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame $(TOPDIR)/$(LIBNAME) -lgfortran -lpthread -lm | |||
ifeq ($(F_COMPILER), g77) | |||
LIBACML = -L$(ACMLPATH)/gnu64/lib -Wl,-rpath,$(ACMLPATH)/gnu64/lib -lacml -lacml_mv -lg2c -lm | |||
endif | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/gfortran64/lib -Wl,-rpath,$(ACMLPATH)/gfortran64/lib -lacml -lacml_mv -lgfortran -lm | |||
else | |||
LIBACML = -L$(ACMLPATH)/gfortran64_mp/lib -Wl,-rpath,$(ACMLPATH)/gfortran64_mp/lib -lacml_mp -lacml_mv -lgfortran -lgomp -lm | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), INTEL) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/ifort64/lib -Wl,-rpath,$(ACMLPATH)/ifort64/lib -lacml -lacml_mv -lifcoremt_pic -lirc -lm -lpthread -ldl | |||
else | |||
LIBACML = -L$(ACMLPATH)/ifort64_mp/lib -Wl,-rpath,$(ACMLPATH)/ifort64_mp/lib -lacml_mp -lacml_mv -lifcoremt_pic -liomp5 -lirc -lm -lpthread -ldl | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), OPEN64) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/open64/lib -Wl,-rpath,$(ACMLPATH)/open64/lib -lacml -lacml_mv -lm | |||
else | |||
LIBACML = -L$(ACMLPATH)/open64_mp/lib -Wl,-rpath,$(ACMLPATH)/open64_mp/lib -lacml_mp -lacml_mv -lm -lpthread | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), pgf77) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/pgi64/lib -Wl,-rpath,$(ACMLPATH)/pgi64/lib -lacml -lacml_mv -L$(PGIPATH) -Wl,-rpath,$(PGIPATH) -lpgftnrtl -lnspgc -lpgmp -lpgc | |||
else | |||
LIBACML = -L$(ACMLPATH)/pgi64_mp/lib -Wl,-rpath,$(ACMLPATH)/pgi64_mp/lib -lacml -lacml_mv -L$(PGIPATH) -Wl,-rpath,$(PGIPATH) -lpgftnrtl -lnspgc -lpgmp -lpgc | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), PATHSCALE) | |||
ifndef SMP | |||
LIBACML = -L$(ACMLPATH)/pathscale64/lib -Wl,-rpath,$(ACMLPATH)/pathscale64/lib -lacml -lacml_mv -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lpathfortran -lm | |||
else | |||
LIBACML = -L$(ACMLPATH)/pathscale64_mp/lib -Wl,-rpath,$(ACMLPATH)/pathscale64_mp/lib -lacml_mp -lacml_mv -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lopenmp -lpathfortran -lm | |||
endif | |||
endif | |||
ifeq ($(F_COMPILER), f90) | |||
LIBACML = -L$(ACMLPATH)/sun64/lib -Wl,-R,$(ACMLPATH)/sun64/lib -L$(SUNPATH)/lib/amd64 -Wl,-R,$(SUNPATH)/lib/amd64 -lacml -lacml_mv -lfsu | |||
endif | |||
LIBSUNPERF = -L$(SUNPATH)/lib/amd64 -L$(SUNPATH)/rtlibs/amd64 -Wl,-R,$(SUNPATH)/lib/amd64 -Wl,-R,$(SUNPATH)/rtlibs/amd64 -lsunperf -lfui -lfsu -lmtsk | |||
LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib |
@@ -1,16 +0,0 @@ | |||
ifeq ($(CORE), Z13) | |||
CCOMMON_OPT += -march=z13 -mzvector | |||
FCOMMON_OPT += -march=z13 -mzvector | |||
endif | |||
ifeq ($(CORE), Z14) | |||
CCOMMON_OPT += -march=z14 -mzvector -O3 | |||
FCOMMON_OPT += -march=z14 -mzvector | |||
endif | |||
# Enable floating-point expression contraction for clang, since it is the | |||
# default for gcc | |||
ifeq ($(C_COMPILER), CLANG) | |||
CCOMMON_OPT += -ffp-contract=on | |||
endif |
@@ -1,370 +0,0 @@ | |||
# OpenBLAS | |||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |||
Cirrus CI: [](https://cirrus-ci.com/github/xianyi/OpenBLAS) | |||
[](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop) | |||
OSUOSL POWERCI [](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/) | |||
OSUOSL IBMZ-CI [](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/) | |||
## Introduction | |||
OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version. | |||
For more information about OpenBLAS, please see: | |||
- The documentation at [openmathlib.org/OpenBLAS/docs/](http://www.openmathlib.org/OpenBLAS/docs), | |||
- The home page at [openmathlib.org/OpenBLAS/](http://www.openmathlib.org/OpenBLAS). | |||
For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib: | |||
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six | |||
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare [here](https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/) or YouTube [here](https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek) may be helpful. | |||
## Binary Packages | |||
We provide official binary packages for the following platform: | |||
* Windows x86/x86_64 | |||
* Windows arm64 (woa) | |||
You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the [Releases section of the GitHub project page](https://github.com/OpenMathLib/OpenBLAS/releases). | |||
OpenBLAS is also packaged for many package managers - see [the installation section of the docs](http://www.openmathlib.org/OpenBLAS/docs/install/) for details. | |||
## Installation from Source | |||
Obtain the source code from https://github.com/OpenMathLib/OpenBLAS/. Note that the default branch | |||
is `develop` (a `master` branch is still present, but far out of date). | |||
Build-time parameters can be chosen in `Makefile.rule`, see there for a short description of each option. | |||
Most options can also be given directly on the command line as parameters to your `make` or `cmake` invocation. | |||
### Dependencies | |||
Building OpenBLAS requires the following to be installed: | |||
* GNU Make or CMake | |||
* A C compiler, e.g. GCC or Clang | |||
* A Fortran compiler (optional, for LAPACK) | |||
In general, using a recent version of the compiler is strongly recommended. | |||
If a Fortran compiler is not available, it is possible to compile an older version of the included LAPACK | |||
that has been machine-translated to C. | |||
### Normal compile | |||
Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically. | |||
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`. | |||
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and | |||
can either be set there (typically by removing the comment character from the respective line), or used on the | |||
`make` command line. | |||
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make` | |||
in the build step, as some settings like the supported maximum number of threads are automatically derived from the | |||
build host by default, which might not be what you want. | |||
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel | |||
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any | |||
build options you plan to set. | |||
For more details, see the [Building from source](http://www.openmathlib.org/OpenBLAS/docs/install/#building-from-source) | |||
section in the docs. | |||
### Cross compile | |||
Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler. | |||
The target must be specified explicitly when cross compiling. | |||
Examples: | |||
* On a Linux system, cross-compiling to an older MIPS64 router board: | |||
```sh | |||
make BINARY=64 CC=mipsisa64r6el-linux-gnuabi64-gcc FC=mipsisa64r6el-linux-gnuabi64-gfortran HOSTCC=gcc TARGET=P6600 | |||
``` | |||
* or to a Windows x64 host: | |||
```sh | |||
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc | |||
``` | |||
You can find instructions for other cases both in the "Supported Systems" section below and in | |||
the [Building from source docs](http://www.openmathlib.org/OpenBLAS/docs/install). | |||
The `.yml` scripts included with the sources (which contain the | |||
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints. | |||
When compiling for a more modern CPU target of the same architecture, e.g. `TARGET=SKYLAKEX` on a `HASWELL` host, option `CROSS=1` can be used to suppress the automatic invocation of the tests at the end of the build. | |||
### Debug version | |||
A debug version can be built using `make DEBUG=1`. | |||
### Compile with MASS support on Power CPU (optional) | |||
The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures. | |||
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER. | |||
The library can be installed as shown: | |||
* On Ubuntu: | |||
```sh | |||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add - | |||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list | |||
sudo apt-get update | |||
sudo apt-get install libxlmass-devel.8.1.5 | |||
``` | |||
* On RHEL/CentOS: | |||
```sh | |||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key | |||
sudo rpm --import repomd.xml.key | |||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo | |||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/ | |||
sudo yum install libxlmass-devel.8.1.5 | |||
``` | |||
After installing the MASS library, compile OpenBLAS with `USE_MASS=1`. | |||
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`. | |||
### Install to a specific directory (optional) | |||
Use `PREFIX=` when invoking `make`, for example | |||
```sh | |||
make install PREFIX=your_installation_directory | |||
``` | |||
(along with all options you added on the `make` command line in the preceding build step) | |||
The default installation directory is `/opt/OpenBLAS`. | |||
## Supported CPUs and Operating Systems | |||
Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by the 2010 GotoBLAS. | |||
### Additional supported CPUs | |||
#### x86/x86-64 | |||
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes. | |||
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64. | |||
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64. | |||
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64. | |||
- **Intel Cooper Lake**: as Skylake-X with improved BFLOAT16 support. | |||
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes. | |||
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar) | |||
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations. | |||
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations. | |||
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4) | |||
#### MIPS32 | |||
- **MIPS 1004K**: uses P5600 codes | |||
- **MIPS 24K**: uses P5600 codes | |||
#### MIPS64 | |||
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2. | |||
- **ICT Loongson 3B**: Experimental | |||
#### ARM | |||
- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+) | |||
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15) | |||
#### ARM64 | |||
- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS | |||
- **Cortex-A53**: same as ARMV8 (different cpu specifications) | |||
- **Cortex-A55**: same as ARMV8 (different cpu specifications) | |||
- **Cortex A57**: Optimized Level-3 and Level-2 functions | |||
- **Cortex A72**: same as A57 ( different cpu specifications) | |||
- **Cortex A73**: same as A57 (different cpu specifications) | |||
- **Cortex A76**: same as A57 (different cpu specifications) | |||
- **Falkor**: same as A57 (different cpu specifications) | |||
- **ThunderX**: Optimized some Level-1 functions | |||
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2 | |||
- **ThunderX3T110** | |||
- **TSV110**: Optimized some Level-3 helper functions | |||
- **EMAG 8180**: preliminary support based on A57 | |||
- **Neoverse N1**: (AWS Graviton2) preliminary support | |||
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS | |||
- **Apple Vortex**: preliminary support based on ThunderX2/3 | |||
- **A64FX**: preliminary support, optimized Level-3 BLAS | |||
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions | |||
#### PPC/PPC64 | |||
- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1` | |||
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only. | |||
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2. | |||
- **AIX**: Dynamic architecture with OpenXL and OpenMP. | |||
```sh | |||
make CC=ibm-clang_r FC=xlf_r TARGET=POWER7 BINARY=64 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 USE_THREAD=1 | |||
``` | |||
#### IBM zEnterprise System | |||
- **Z13**: Optimized Level-3 BLAS and Level-1,2 | |||
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 | |||
#### RISC-V | |||
- **C910V**: Optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1. | |||
```sh | |||
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran | |||
``` | |||
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision) | |||
- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0. | |||
```sh | |||
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran | |||
``` | |||
- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available. | |||
e.g.: | |||
```sh | |||
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \ | |||
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \ | |||
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \ | |||
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \ | |||
HOSTCC=gcc HOSTFC=gfortran -j | |||
``` | |||
#### LOONGARCH64 | |||
- **LA64_GENERIC**: Optimized Level-3, Level-2 and Level-1 BLAS with scalar instruction | |||
```sh | |||
make HOSTCC=gcc TARGET=LA64_GENERIC CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1 | |||
``` | |||
The old-style TARGET=LOONGSONGENERIC is still supported | |||
- **LA264**: Optimized Level-3, Level-2 and Level-1 BLAS with LSX instruction | |||
```sh | |||
make HOSTCC=gcc TARGET=LA264 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1 | |||
``` | |||
The old-style TARGET=LOONGSON2K1000 is still supported | |||
- **LA464**: Optimized Level-3, Level-2 and Level-1 BLAS with LASX instruction | |||
```sh | |||
make HOSTCC=gcc TARGET=LA464 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1 | |||
``` | |||
The old-style TARGET=LOONGSON3R5 is still supported | |||
### Support for multiple targets in a single library | |||
OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. | |||
For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default. | |||
`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias, | |||
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano. | |||
On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled. | |||
For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build. | |||
on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support. | |||
On **riscv64**, DYNAMIC_ARCH enables support for riscv64_zvl128b and riscv64_zvl256b in addition to generic riscv64 support. A compiler that supports RVV 1.0 is required to build OpenBLAS for riscv64 when DYNAMIC_ARCH is enabled. | |||
On **LoongArch64**, it comprises LA264 and LA464 as well as generic LoongArch64 support. | |||
The `TARGET` option can - and usually **should** - be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter. | |||
Failure to specify this may lead to advanced instructions being used by the compiler, just because the build host happens to support them. This is most likely to happen when aggressive optimization options are in effect, and the resulting library may then crash with an | |||
illegal instruction error on weaker hardware, before it even reaches the BLAS routines specifically included for that cpu. | |||
Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library. | |||
### Supported OS | |||
- **GNU/Linux** | |||
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. | |||
- **Darwin/macOS/OSX/iOS**: Experimental. Although GotoBLAS2 already supports Darwin, we are not OSX/iOS experts. | |||
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS. | |||
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS. | |||
- **NetBSD**: Supported by the community. We don't actively test the library on this OS. | |||
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS. | |||
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>. | |||
- **AIX**: Supported on PPC up to POWER10 | |||
- **Haiku**: Supported by the community. We don't actively test the library on this OS. | |||
- **SunOS**: Supported by the community. We don't actively test the library on this OS. | |||
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>. | |||
## Usage | |||
Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was | |||
compiled as a shared library. | |||
### Setting the number of threads using environment variables | |||
Environment variables are used to specify a maximum number of threads. | |||
For example, | |||
```sh | |||
export OPENBLAS_NUM_THREADS=4 | |||
export GOTO_NUM_THREADS=4 | |||
export OMP_NUM_THREADS=4 | |||
``` | |||
The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`. | |||
If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS` | |||
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when | |||
compiled with `USE_OPENMP=1`. | |||
### Setting the number of threads at runtime | |||
We provide the following functions to control the number of threads at runtime: | |||
```c | |||
void goto_set_num_threads(int num_threads); | |||
void openblas_set_num_threads(int num_threads); | |||
``` | |||
Note that these are only used once at library initialization, and are not available for | |||
fine-tuning thread numbers in individual BLAS calls. | |||
If you compile this library with `USE_OPENMP=1`, you should use the above functions too. | |||
## Reporting bugs | |||
Please submit an issue in https://github.com/OpenMathLib/OpenBLAS/issues. | |||
## Contact | |||
+ Use github discussions: https://github.com/OpenMathLib/OpenBLAS/discussions | |||
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users | |||
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev | |||
## Change log | |||
Please see Changelog.txt. | |||
## Troubleshooting | |||
* Please read the [FAQ](http://www.openmathlib.org/OpenBLAS/docs/faq) section of the docs first. | |||
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD. | |||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. | |||
Clang 3.0 will generate the wrong AVX binary code. | |||
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake/CooperLake AVX512 kernels | |||
* Please use LLVM version 18 and above (version 19 and above on Windows) if you plan to use | |||
its new flang compiler for Fortran | |||
* Please use GCC version 11 and above to compile OpenBLAS on the POWER architecture | |||
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`), | |||
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build | |||
the library with `BIGNUMA=1`. | |||
* OpenBLAS does not set processor affinity by default. | |||
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in | |||
Makefile.rule. However, note that this may cause | |||
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). | |||
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`). | |||
However, it will be okay when you run the same test case on the shell. | |||
## Contributing | |||
1. [Check for open issues](https://github.com/OpenMathLib/OpenBLAS/issues) or open a fresh issue | |||
to start a discussion around a feature idea or a bug. | |||
2. Fork the [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS) repository to start making your changes. | |||
3. Write a test which shows that the bug was fixed or that the feature works as expected. | |||
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`. | |||
## Donation | |||
Please see [the donations section](http://www.openmathlib.org/OpenBLAS/docs/about/#donations) in the docs. |
@@ -1,20 +0,0 @@ | |||
# Security Policy | |||
## Supported Versions | |||
It is generally recommended to use the latest release as this project | |||
does not maintain multiple stable branches and providing packages e.g. | |||
for Linux distributions is outside our scope. In particular, versions | |||
before 0.3.18 can be assumed to carry the out-of-bounds-read error in | |||
the LAPACK ?LARRV family of functions that was the subject of | |||
CVE-2021-4048 | |||
## Reporting a Vulnerability | |||
If you suspect that you have found a vulnerability - a defect that could | |||
be abused to compromise the security of a user's code or systems - please | |||
do not use the normal github issue tracker (except perhaps to post a general | |||
warning if you deem that necessary). Instead, please contact the project | |||
maintainers through the email addresses given in their github user profiles. | |||
Defects found in the "lapack-netlib" subtree should ideally be reported to | |||
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu |
@@ -1,154 +0,0 @@ | |||
Force Target Examples: | |||
make TARGET=NEHALEM | |||
make TARGET=LOONGSON3A BINARY=64 | |||
make TARGET=ISTANBUL | |||
Supported List: | |||
1.X86/X86_64 | |||
a)Intel CPU: | |||
P2 | |||
KATMAI | |||
COPPERMINE | |||
NORTHWOOD | |||
PRESCOTT | |||
BANIAS | |||
YONAH | |||
CORE2 | |||
PENRYN | |||
DUNNINGTON | |||
NEHALEM | |||
SANDYBRIDGE | |||
HASWELL | |||
SKYLAKEX | |||
ATOM | |||
COOPERLAKE | |||
SAPPHIRERAPIDS | |||
b)AMD CPU: | |||
ATHLON | |||
OPTERON | |||
OPTERON_SSE3 | |||
BARCELONA | |||
SHANGHAI | |||
ISTANBUL | |||
BOBCAT | |||
BULLDOZER | |||
PILEDRIVER | |||
STEAMROLLER | |||
EXCAVATOR | |||
ZEN | |||
c)VIA CPU: | |||
SSE_GENERIC | |||
VIAC3 | |||
NANO | |||
2.Power CPU: | |||
POWER4 | |||
POWER5 | |||
POWER6 | |||
POWER7 | |||
POWER8 | |||
POWER9 | |||
POWER10 | |||
POWER11 | |||
PPCG4 | |||
PPC970 | |||
PPC970MP | |||
PPC440 | |||
PPC440FP2 | |||
CELL | |||
3.MIPS CPU: | |||
P5600 | |||
MIPS1004K | |||
MIPS24K | |||
4.MIPS64 CPU: | |||
MIPS64_GENERIC | |||
SICORTEX | |||
LOONGSON3A | |||
LOONGSON3B | |||
I6400 | |||
P6600 | |||
I6500 | |||
5.IA64 CPU: | |||
ITANIUM2 | |||
6.SPARC CPU: | |||
SPARC | |||
SPARCV7 | |||
7.ARM CPU: | |||
CORTEXA15 | |||
CORTEXA9 | |||
ARMV7 | |||
ARMV6 | |||
ARMV5 | |||
8.ARM 64-bit CPU: | |||
ARMV8 | |||
CORTEXA53 | |||
CORTEXA57 | |||
CORTEXA72 | |||
CORTEXA73 | |||
CORTEXA76 | |||
CORTEXA510 | |||
CORTEXA710 | |||
CORTEXX1 | |||
CORTEXX2 | |||
NEOVERSEN1 | |||
NEOVERSEV1 | |||
NEOVERSEN2 | |||
NEOVERSEV2 | |||
CORTEXA55 | |||
EMAG8180 | |||
FALKOR | |||
THUNDERX | |||
THUNDERX2T99 | |||
TSV110 | |||
THUNDERX3T110 | |||
VORTEX | |||
A64FX | |||
ARMV8SVE | |||
ARMV9SME | |||
FT2000 | |||
9.System Z: | |||
ZARCH_GENERIC | |||
Z13 | |||
Z14 | |||
10.RISC-V 64: | |||
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54) | |||
RISCV64_ZVL128B | |||
C910V | |||
x280 | |||
RISCV64_ZVL256B | |||
11.LOONGARCH64: | |||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names, | |||
// and it is recommended to use the more standardized naming conventions | |||
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as | |||
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime, | |||
// and they will be internally relocated to LA64_GENERIC/LA264/LA464. | |||
LOONGSONGENERIC | |||
LOONGSON2K1000 | |||
LOONGSON3R5 | |||
LA64_GENERIC | |||
LA264 | |||
LA464 | |||
12. Elbrus E2000: | |||
E2K | |||
13. Alpha | |||
EV4 | |||
EV5 | |||
EV6 | |||
14.CSKY | |||
CSKY | |||
CK860FV |
@@ -1,213 +0,0 @@ | |||
# Notes on OpenBLAS usage | |||
## Usage | |||
#### Program is Terminated. Because you tried to allocate too many memory regions | |||
In OpenBLAS, we mange a pool of memory buffers and allocate the number of | |||
buffers as the following. | |||
``` | |||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2) | |||
``` | |||
This error indicates that the program exceeded the number of buffers. | |||
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make | |||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set | |||
`MAX_CPU_NUMBER=NUM_THREADS`. | |||
Despite its name, and due to the use of memory buffers in functions like SGEMM, | |||
the setting of NUM_THREADS can be relevant even for a single-threaded build | |||
of OpenBLAS, if such functions get called by multiple threads of a program | |||
that uses OpenBLAS. In some cases, the affected code may simply crash or throw | |||
a segmentation fault without displaying the above warning first. | |||
Note that the number of threads used at runtime can be altered to differ from the | |||
value NUM_THREADS was set to at build time. At runtime, the actual number of | |||
threads can be set anywhere from 1 to the build's NUM_THREADS (note however, | |||
that this does not change the number of memory buffers that will be allocated, | |||
which is set at build time). The number of threads for a process can be set by | |||
using the mechanisms described below. | |||
#### How can I use OpenBLAS in multi-threaded applications? | |||
If your application is already multi-threaded, it will conflict with OpenBLAS | |||
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the | |||
following ways: | |||
* `export OPENBLAS_NUM_THREADS=1` in the environment variables. | |||
* Call `openblas_set_num_threads(1)` in the application on runtime. | |||
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0` | |||
If the application is parallelized by OpenMP, please use OpenBLAS built with | |||
`USE_OPENMP=1` | |||
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH | |||
The environment variable which control the kernel selection is | |||
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export | |||
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()` | |||
returns the used target. | |||
#### How could I disable OpenBLAS threading affinity on runtime? | |||
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment | |||
variable to disable threading affinity on runtime. For example, before the | |||
running, | |||
``` | |||
export OPENBLAS_MAIN_FREE=1 | |||
``` | |||
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1` | |||
in `Makefile.rule`. | |||
## Linking with the library | |||
* Link with shared library | |||
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas` | |||
If the library is multithreaded, please add `-lpthread`. If the library | |||
contains LAPACK functions, please add `-lgfortran` or other Fortran libs. | |||
* Link with static library | |||
`gcc -o test test.c /your/path/libopenblas.a` | |||
You can download `test.c` from https://gist.github.com/xianyi/5780018 | |||
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by | |||
default), custom programs statically linked against `libopenblas.a` should also | |||
link with the pthread library e.g.: | |||
``` | |||
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread | |||
``` | |||
Failing to add the `-lpthread` flag will cause errors such as: | |||
``` | |||
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory': | |||
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock' | |||
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock' | |||
... | |||
``` | |||
## Code examples | |||
#### Call CBLAS interface | |||
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656 | |||
``` | |||
#include <cblas.h> | |||
#include <stdio.h> | |||
void main() | |||
{ | |||
int i=0; | |||
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5}; | |||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3); | |||
for(i=0; i<9; i++) | |||
printf("%lf ", C[i]); | |||
printf("\n"); | |||
} | |||
``` | |||
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran` | |||
#### Call BLAS Fortran interface | |||
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018 | |||
``` | |||
#include "stdio.h" | |||
#include "stdlib.h" | |||
#include "sys/time.h" | |||
#include "time.h" | |||
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*); | |||
int main(int argc, char* argv[]) | |||
{ | |||
int i; | |||
printf("test!\n"); | |||
if(argc<4){ | |||
printf("Input Error\n"); | |||
return 1; | |||
} | |||
int m = atoi(argv[1]); | |||
int n = atoi(argv[2]); | |||
int k = atoi(argv[3]); | |||
int sizeofa = m * k; | |||
int sizeofb = k * n; | |||
int sizeofc = m * n; | |||
char ta = 'N'; | |||
char tb = 'N'; | |||
double alpha = 1.2; | |||
double beta = 0.001; | |||
struct timeval start,finish; | |||
double duration; | |||
double* A = (double*)malloc(sizeof(double) * sizeofa); | |||
double* B = (double*)malloc(sizeof(double) * sizeofb); | |||
double* C = (double*)malloc(sizeof(double) * sizeofc); | |||
srand((unsigned)time(NULL)); | |||
for (i=0; i<sizeofa; i++) | |||
A[i] = i%3+1;//(rand()%100)/10.0; | |||
for (i=0; i<sizeofb; i++) | |||
B[i] = i%3+1;//(rand()%100)/10.0; | |||
for (i=0; i<sizeofc; i++) | |||
C[i] = i%3+1;//(rand()%100)/10.0; | |||
//#if 0 | |||
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc); | |||
gettimeofday(&start, NULL); | |||
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m); | |||
gettimeofday(&finish, NULL); | |||
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000; | |||
double gflops = 2.0 * m *n*k; | |||
gflops = gflops/duration*1.0e-6; | |||
FILE *fp; | |||
fp = fopen("timeDGEMM.txt", "a"); | |||
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops); | |||
fclose(fp); | |||
free(A); | |||
free(B); | |||
free(C); | |||
return 0; | |||
} | |||
``` | |||
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a` | |||
` ./time_dgemm <m> <n> <k> ` | |||
## Troubleshooting | |||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first. | |||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD. | |||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code. | |||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1. | |||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). | |||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell. | |||
## BLAS reference manual | |||
If you want to understand every BLAS function and definition, please read | |||
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm) | |||
or [netlib.org](http://netlib.org/blas/) | |||
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions) | |||
## How to reference OpenBLAS. | |||
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications). | |||
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly. | |||
@@ -1,78 +0,0 @@ | |||
version: 0.2.19.{build} | |||
#environment: | |||
platform: | |||
- x64 | |||
os: Visual Studio 2017 | |||
configuration: Release | |||
clone_folder: c:\projects\OpenBLAS | |||
init: | |||
- git config --global core.autocrlf input | |||
clone_depth: 5 | |||
skip_tags: true | |||
matrix: | |||
fast_finish: false | |||
skip_commits: | |||
# Add [av skip] to commit messages | |||
message: /\[av skip\]/ | |||
environment: | |||
global: | |||
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64 | |||
matrix: | |||
# - COMPILER: clang-cl | |||
# WITH_FORTRAN: ON | |||
# - COMPILER: clang-cl | |||
# DYNAMIC_ARCH: ON | |||
# WITH_FORTRAN: OFF | |||
# - COMPILER: cl | |||
# - COMPILER: MinGW64-gcc-7.2.0-mingw | |||
# DYNAMIC_ARCH: OFF | |||
# WITH_FORTRAN: ignore | |||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 | |||
COMPILER: MinGW-gcc-6.3.0-32 | |||
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015 | |||
COMPILER: MinGW-gcc-5.3.0 | |||
WITH_FORTRAN: ignore | |||
install: | |||
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat | |||
- if [%COMPILER%]==[clang-cl] conda update --yes -n base conda | |||
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force | |||
- if [%COMPILER%]==[clang-cl] conda config --set auto_update_conda false | |||
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1 | |||
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64 | |||
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%" | |||
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%" | |||
before_build: | |||
- ps: if (-Not (Test-Path .\build)) { mkdir build } | |||
- cd build | |||
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=% | |||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% | |||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH% | |||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH% | |||
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" .. | |||
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 .. | |||
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. | |||
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 .. | |||
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DMSVC_STATIC_CRT=ON .. | |||
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 .. | |||
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON .. | |||
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' .. | |||
build_script: | |||
- cmake --build . | |||
test_script: | |||
- ctest -j2 | |||
@@ -1,330 +0,0 @@ | |||
trigger: | |||
# start a new build for every push | |||
batch: False | |||
branches: | |||
include: | |||
- develop | |||
resources: | |||
containers: | |||
- container: oneapi-hpckit | |||
image: intel/oneapi-hpckit:latest | |||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so' | |||
- container: oneapi-basekit | |||
image: intel/oneapi-basekit:latest | |||
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so' | |||
jobs: | |||
# manylinux1 is useful to test because the | |||
# standard Docker container uses an old version | |||
# of gcc / glibc | |||
- job: manylinux1_gcc | |||
pool: | |||
vmImage: 'ubuntu-latest' | |||
steps: | |||
- script: | | |||
echo "FROM quay.io/pypa/manylinux1_x86_64 | |||
COPY . /tmp/openblas | |||
RUN cd /tmp/openblas && \ | |||
CC=gcc && \ | |||
make QUIET_MAKE=1 BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C test BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C ctest BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C utest BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" > Dockerfile | |||
docker build . | |||
displayName: Run manylinux1 docker build | |||
- job: manylinux_32bit | |||
pool: | |||
vmImage: 'ubuntu-latest' | |||
steps: | |||
- script: | | |||
echo "FROM quay.io/pypa/manylinux2014_i686 | |||
COPY . /tmp/openblas | |||
RUN cd /tmp/openblas && \ | |||
CC=gcc && \ | |||
make QUIET_MAKE=1 BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C test BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C ctest BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \ | |||
make -C utest BINARY=32 TARGET=NEHALEM NUM_THREADS=32" > Dockerfile | |||
docker build . | |||
displayName: Run manylinux 32bit docker build | |||
- job: Intel_SDE_skx | |||
pool: | |||
vmImage: 'ubuntu-latest' | |||
steps: | |||
- script: | | |||
# at the time of writing the available Azure Ubuntu vm image | |||
# does not support AVX512VL, so use more recent LTS version | |||
echo "FROM ubuntu:bionic | |||
COPY . /tmp/openblas | |||
RUN apt-get -y update && apt-get -y install \\ | |||
cmake \\ | |||
gfortran \\ | |||
make \\ | |||
wget | |||
RUN mkdir /tmp/SDE && cd /tmp/SDE && \\ | |||
mkdir sde-external-8.35.0-2019-03-11-lin && \\ | |||
wget --quiet -O sde-external-8.35.0-2019-03-11-lin.tar.bz2 https://www.dropbox.com/s/fopsnzj67572sj5/sde-external-8.35.0-2019-03-11-lin.tar.bz2?dl=0 && \\ | |||
tar -xjvf sde-external-8.35.0-2019-03-11-lin.tar.bz2 -C /tmp/SDE/sde-external-8.35.0-2019-03-11-lin --strip-components=1 | |||
RUN cd /tmp/openblas && CC=gcc make QUIET_MAKE=1 DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64 | |||
CMD cd /tmp/openblas && echo 0 > /proc/sys/kernel/yama/ptrace_scope && CC=gcc OPENBLAS_VERBOSE=2 /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/sde64 -cpuid_in /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/misc/cpuid/skx/cpuid.def -- make -C utest DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64" > Dockerfile | |||
docker build -t intel_sde . | |||
# we need a privileged docker run for sde process attachment | |||
docker run --privileged intel_sde | |||
displayName: 'Run AVX512 SkylakeX docker build / test' | |||
- job: Windows_cl | |||
pool: | |||
vmImage: 'windows-latest' | |||
steps: | |||
- task: CMake@1 | |||
inputs: | |||
workingDirectory: 'build' # Optional | |||
cmakeArgs: '-G "Visual Studio 17 2022" ..' | |||
- task: CMake@1 | |||
inputs: | |||
cmakeArgs: '--build . --config Release' | |||
workingDirectory: 'build' | |||
- script: | | |||
cd build | |||
cd utest | |||
dir | |||
openblas_utest.exe | |||
- job: Windows_mingw_gmake | |||
pool: | |||
vmImage: 'windows-latest' | |||
steps: | |||
- script: | | |||
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE" | |||
- job: Windows_clang_cmake | |||
pool: | |||
vmImage: 'windows-latest' | |||
steps: | |||
- script: | | |||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
set "LIB=C:\Miniconda\Library\lib;%LIB%" | |||
set "CPATH=C:\Miniconda\Library\include;%CPATH% | |||
conda config --add channels conda-forge --force | |||
conda config --set auto_update_conda false | |||
conda install --yes ninja | |||
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
mkdir build | |||
cd build | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DNOFORTRAN=1 -DMSVC_STATIC_CRT=ON .. | |||
cmake --build . --config Release | |||
ctest | |||
- job: Windows_flang_clang | |||
pool: | |||
vmImage: 'windows-2022' | |||
steps: | |||
- script: | | |||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
set "LIB=C:\Miniconda\Library\lib;%LIB%" | |||
set "CPATH=C:\Miniconda\Library\include;%CPATH%" | |||
conda config --add channels conda-forge --force | |||
conda config --set auto_update_conda false | |||
conda install --yes --quiet ninja flang | |||
mkdir build | |||
cd build | |||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
cmake --build . --config Release | |||
ctest | |||
- job: Windows_cl_flang | |||
pool: | |||
vmImage: 'windows-2022' | |||
steps: | |||
- script: | | |||
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%" | |||
set "LIB=C:\Miniconda\Library\lib;%LIB%" | |||
set "CPATH=C:\Miniconda\Library\include;%CPATH%" | |||
conda config --add channels conda-forge --force | |||
conda config --set auto_update_conda false | |||
conda install --yes --quiet ninja flang | |||
mkdir build | |||
cd build | |||
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat" | |||
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang-new -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON .. | |||
cmake --build . --config Release | |||
ctest | |||
ctest --rerun-failed --output-on-failure | |||
- job: OSX_OpenMP | |||
pool: | |||
vmImage: 'macOS-13' | |||
steps: | |||
- script: | | |||
brew update | |||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13 | |||
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13 PREFIX=../blasinst install | |||
ls -lR ../blasinst | |||
- job: OSX_GCC_Nothreads | |||
pool: | |||
vmImage: 'macOS-13' | |||
steps: | |||
- script: | | |||
brew update | |||
make USE_THREADS=0 CC=gcc-13 FC=gfortran-13 | |||
- job: OSX_GCC12 | |||
pool: | |||
vmImage: 'macOS-latest' | |||
steps: | |||
- script: | | |||
brew update | |||
make CC=gcc-12 FC=gfortran-12 | |||
- job: OSX_LLVM_flangnew | |||
pool: | |||
vmImage: 'macOS-latest' | |||
variables: | |||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
steps: | |||
- script: | | |||
brew update | |||
brew install llvm flang | |||
make TARGET=NEHALEM CC=/usr/local/opt/llvm/bin/clang FC=/usr/local/opt/flang/bin/flang NO_SHARED=1 | |||
- job: OSX_OpenMP_Clang | |||
pool: | |||
vmImage: 'macOS-latest' | |||
variables: | |||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
steps: | |||
- script: | | |||
brew update | |||
brew install llvm libomp | |||
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1 | |||
- job: OSX_OpenMP_Clang_cmake | |||
pool: | |||
vmImage: 'macOS-latest' | |||
variables: | |||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
steps: | |||
- script: | | |||
brew update | |||
brew install llvm libomp | |||
mkdir build | |||
cd build | |||
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 .. | |||
make | |||
ctest | |||
- job: OSX_dynarch_cmake | |||
pool: | |||
vmImage: 'macOS-13' | |||
variables: | |||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
steps: | |||
- script: | | |||
mkdir build | |||
cd build | |||
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-13 -DCMAKE_Fortran_COMPILER=gfortran-13 -DBUILD_SHARED_LIBS=ON .. | |||
cmake --build . | |||
ctest | |||
- job: OSX_Ifort_Clang | |||
pool: | |||
vmImage: 'macOS-latest' | |||
variables: | |||
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/edb4dc2f-266f-47f2-8d56-21bc7764e119/m_HPCKit_p_2023.2.0.49443.dmg | |||
LIBRARY_PATH: /usr/local/opt/llvm/lib | |||
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler | |||
steps: | |||
- script: | | |||
brew update | |||
brew install llvm libomp | |||
sudo mkdir -p /opt/intel | |||
sudo chown $USER /opt/intel | |||
displayName: prepare for cache restore | |||
- task: Cache@2 | |||
inputs: | |||
path: /opt/intel/oneapi | |||
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"' | |||
cacheHitVar: CACHE_RESTORED | |||
- script: | | |||
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5 | |||
hdiutil attach webimage.dmg | |||
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=. | |||
installer_exit_code=$? | |||
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet | |||
exit $installer_exit_code | |||
displayName: install | |||
condition: ne(variables.CACHE_RESTORED, 'true') | |||
- script: | | |||
source /opt/intel/oneapi/setvars.sh | |||
make CC=/usr/local/opt/llvm/bin/clang FC=ifort | |||
- job: OSX_NDK_ARMV7 | |||
pool: | |||
vmImage: 'macOS-13' | |||
steps: | |||
- script: | | |||
brew update | |||
brew install --cask android-ndk | |||
export ANDROID_NDK_HOME=/usr/local/share/android-ndk | |||
make TARGET=ARMV7 ONLY_CBLAS=1 CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi21-clang AR=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar HOSTCC=gcc ARM_SOFTFP_ABI=1 -j4 | |||
- job: OSX_IOS_ARMV8 | |||
pool: | |||
vmImage: 'macOS-13' | |||
variables: | |||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch arm64 -miphoneos-version-min=10.0 | |||
steps: | |||
- script: | | |||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 | |||
- job: OSX_IOS_ARMV7 | |||
pool: | |||
vmImage: 'macOS-13' | |||
variables: | |||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch armv7 -miphoneos-version-min=5.1 | |||
steps: | |||
- script: | | |||
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 | |||
- job: OSX_xbuild_DYNAMIC_ARM64 | |||
pool: | |||
vmImage: 'macOS-13' | |||
variables: | |||
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang | |||
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX13.1.sdk -arch arm64 | |||
steps: | |||
- script: | | |||
ls /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs | |||
/Applications/Xcode_12.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus | |||
/Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version | |||
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 | |||
- job: ALPINE_MUSL | |||
pool: | |||
vmImage: 'ubuntu-latest' | |||
steps: | |||
- script: | | |||
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \ | |||
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \ | |||
|| exit 1 | |||
alpine() { /alpine/enter-chroot -u "$USER" "$@"; } | |||
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo' | |||
alpine make DYNAMIC_ARCH=1 BINARY=64 | |||
alpine make DYNAMIC_ARCH=1 BINARY=64 PREFIX=mytestdir install | |||
alpine ls -l mytestdir/include | |||
alpine echo "// tests that inclusion of openblas_config.h works with musl" >test_install.c | |||
alpine echo "#include <openblas_config.h>" >>test_install.c | |||
alpine echo "int main(){" >> test_install.c | |||
alpine echo "cpu_set_t* cpu_set = NULL;}" >>test_install.c | |||
alpine gcc -Imytestdir/include test_install.c -Lmytestdir/lib -lopenblas -lpthread -lgfortran -o test_install | |||
@@ -1,9 +0,0 @@ | |||
#!/bin/bash | |||
for f in *.goto *.acml *.mkl *.atlas | |||
do | |||
if [ -f "$f" ]; then | |||
mv $f `echo $f|tr '.' '_'`.exe | |||
fi | |||
done | |||
@@ -1,133 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef AMAX | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define AMAX BLASFUNC(dzamax) | |||
#else | |||
#define AMAX BLASFUNC(scamax) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define AMAX BLASFUNC(damax) | |||
#else | |||
#define AMAX BLASFUNC(samax) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]) | |||
{ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x = 1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1, timeg; | |||
argc--; | |||
argv++; | |||
if (argc > 0) | |||
{ | |||
from = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
to = MAX(atol(*argv), from); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
step = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) | |||
loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) | |||
inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||
{ | |||
fprintf(stderr, "Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for (m = from; m <= to; m += step) | |||
{ | |||
timeg = 0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l = 0; l < loops; l++) | |||
{ | |||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||
{ | |||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
AMAX(&m, x, &inc_x); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,137 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef AMIN | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define AMIN BLASFUNC(dzamin) | |||
#else | |||
#define AMIN BLASFUNC(scamin) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define AMIN BLASFUNC(damin) | |||
#else | |||
#define AMIN BLASFUNC(samin) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]) | |||
{ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x = 1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1, timeg; | |||
argc--; | |||
argv++; | |||
if (argc > 0) | |||
{ | |||
from = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
to = MAX(atol(*argv), from); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
step = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) | |||
loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) | |||
inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||
{ | |||
fprintf(stderr, "Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for (m = from; m <= to; m += step) | |||
{ | |||
timeg = 0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l = 0; l < loops; l++) | |||
{ | |||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||
{ | |||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
AMIN(&m, x, &inc_x); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,135 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef ASUM | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define ASUM BLASFUNC(dzasum) | |||
#else | |||
#define ASUM BLASFUNC(scasum) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define ASUM BLASFUNC(dasum) | |||
#else | |||
#define ASUM BLASFUNC(sasum) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]) | |||
{ | |||
FLOAT *x; | |||
FLOAT result; | |||
blasint m, i; | |||
blasint inc_x = 1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1, timeg; | |||
argc--; | |||
argv++; | |||
if (argc > 0) | |||
{ | |||
from = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
to = MAX(atol(*argv), from); | |||
argc--; | |||
argv++; | |||
} | |||
if (argc > 0) | |||
{ | |||
step = atol(*argv); | |||
argc--; | |||
argv++; | |||
} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) | |||
loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) | |||
inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||
{ | |||
fprintf(stderr, "Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for (m = from; m <= to; m += step) | |||
{ | |||
timeg = 0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l = 0; l < loops; l++) | |||
{ | |||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||
{ | |||
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
result = ASUM(&m, x, &inc_x); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
if (loops > 1) | |||
timeg /= loops; | |||
#ifdef COMPLEX | |||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); | |||
#else | |||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); | |||
#endif | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,124 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef AXPBY | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define AXPBY BLASFUNC(zaxpby) | |||
#else | |||
#define AXPBY BLASFUNC(caxpby) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define AXPBY BLASFUNC(daxpby) | |||
#else | |||
#define AXPBY BLASFUNC(saxpby) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x, *y; | |||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||
FLOAT beta[2] = {2.0, 2.0}; | |||
blasint m, i; | |||
blasint inc_x=1,inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
begin(); | |||
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,128 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef AXPY | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define AXPY BLASFUNC(zaxpy) | |||
#else | |||
#define AXPY BLASFUNC(caxpy) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define AXPY BLASFUNC(daxpy) | |||
#else | |||
#define AXPY BLASFUNC(saxpy) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x, *y; | |||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||
blasint m, i; | |||
blasint inc_x=1,inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
AXPY (&m, alpha, x, &inc_x, y, &inc_y ); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.9f sec\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,134 +0,0 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <time.h> | |||
#ifdef __CYGWIN32__ | |||
#include <sys/time.h> | |||
#elif defined(__APPLE__) | |||
#include <mach/mach_time.h> | |||
#endif | |||
#include "common.h" | |||
#if defined(__WIN32__) || defined(__WIN64__) | |||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||
#endif | |||
int gettimeofday(struct timeval *tv, void *tz){ | |||
FILETIME ft; | |||
unsigned __int64 tmpres = 0; | |||
static int tzflag; | |||
if (NULL != tv) | |||
{ | |||
GetSystemTimeAsFileTime(&ft); | |||
tmpres |= ft.dwHighDateTime; | |||
tmpres <<= 32; | |||
tmpres |= ft.dwLowDateTime; | |||
/*converting file time to unix epoch*/ | |||
tmpres /= 10; /*convert into microseconds*/ | |||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||
} | |||
return 0; | |||
} | |||
#endif | |||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||
static void *huge_malloc(BLASLONG size){ | |||
int shmid; | |||
void *address; | |||
#ifndef SHM_HUGETLB | |||
#define SHM_HUGETLB 04000 | |||
#endif | |||
if ((shmid =shmget(IPC_PRIVATE, | |||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||
printf( "Memory allocation failed(shmget).\n"); | |||
exit(1); | |||
} | |||
address = shmat(shmid, NULL, SHM_RND); | |||
if ((BLASLONG)address == -1){ | |||
printf( "Memory allocation failed(shmat).\n"); | |||
exit(1); | |||
} | |||
shmctl(shmid, IPC_RMID, 0); | |||
return address; | |||
} | |||
#define malloc huge_malloc | |||
#endif | |||
/* Benchmarks should allocate with cacheline (often 64 bytes) alignment | |||
to avoid unreliable results. This technique, storing the allocated | |||
pointer value just before the aligned memory, doesn't require | |||
C11's aligned_alloc for compatibility with older compilers. */ | |||
static void *aligned_alloc_cacheline(size_t n) | |||
{ | |||
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1); | |||
if (p) { | |||
void **newp = (void **) | |||
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE); | |||
newp[-1] = p; | |||
p = newp; | |||
} | |||
return p; | |||
} | |||
#define malloc aligned_alloc_cacheline | |||
#define free(p) free((p) ? ((void **)(p))[-1] : (p)) | |||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||
struct timeval start, stop; | |||
#elif defined(__APPLE__) | |||
mach_timebase_info_data_t info; | |||
uint64_t start = 0, stop = 0; | |||
#else | |||
struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||
#endif | |||
double getsec() | |||
{ | |||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
#elif defined(__APPLE__) | |||
mach_timebase_info(&info); | |||
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9; | |||
#else | |||
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; | |||
#endif | |||
} | |||
void begin() { | |||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||
gettimeofday( &start, (struct timezone *)0); | |||
#elif defined(__APPLE__) | |||
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); | |||
#else | |||
clock_gettime(CLOCK_REALTIME, &start); | |||
#endif | |||
} | |||
void end() { | |||
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||
gettimeofday( &stop, (struct timezone *)0); | |||
#elif defined(__APPLE__) | |||
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW); | |||
#else | |||
clock_gettime(CLOCK_REALTIME, &stop); | |||
#endif | |||
} |
@@ -1,247 +0,0 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "bench.h" | |||
double fabs(double); | |||
#undef POTRF | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
#define POTRF BLASFUNC(qpotrf) | |||
#define SYRK BLASFUNC(qsyrk) | |||
#elif defined(DOUBLE) | |||
#define POTRF BLASFUNC(dpotrf) | |||
#define SYRK BLASFUNC(dsyrk) | |||
#else | |||
#define POTRF BLASFUNC(spotrf) | |||
#define SYRK BLASFUNC(ssyrk) | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
#define POTRF BLASFUNC(xpotrf) | |||
#define SYRK BLASFUNC(xherk) | |||
#elif defined(DOUBLE) | |||
#define POTRF BLASFUNC(zpotrf) | |||
#define SYRK BLASFUNC(zherk) | |||
#else | |||
#define POTRF BLASFUNC(cpotrf) | |||
#define SYRK BLASFUNC(cherk) | |||
#endif | |||
#endif | |||
static __inline double getmflops(int ratio, int m, double secs){ | |||
double mm = (double)m; | |||
double mulflops, addflops; | |||
if (secs==0.) return 0.; | |||
mulflops = mm * (1./3. + mm * (1./2. + mm * 1./6.)); | |||
addflops = 1./6. * mm * (mm * mm - 1); | |||
if (ratio == 1) { | |||
return (mulflops + addflops) / secs * 1.e-6; | |||
} else { | |||
return (2. * mulflops + 6. * addflops) / secs * 1.e-6; | |||
} | |||
} | |||
int main(int argc, char *argv[]){ | |||
#ifndef COMPLEX | |||
char *trans[] = {"T", "N"}; | |||
#else | |||
char *trans[] = {"C", "N"}; | |||
#endif | |||
char *uplo[] = {"U", "L"}; | |||
FLOAT alpha[] = {1.0, 0.0}; | |||
FLOAT beta [] = {0.0, 0.0}; | |||
FLOAT *a, *b; | |||
blasint m, i, j, info, uplos; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
FLOAT maxerr; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
for(m = from; m <= to; m += step){ | |||
fprintf(stderr, "M = %6d : ", (int)m); | |||
for (uplos = 0; uplos < 2; uplos ++) { | |||
#ifndef COMPLEX | |||
if (uplos & 1) { | |||
for (j = 0; j < m; j++) { | |||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.; | |||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
} | |||
} else { | |||
for (j = 0; j < m; j++) { | |||
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.; | |||
} | |||
} | |||
#else | |||
if (uplos & 1) { | |||
for (j = 0; j < m; j++) { | |||
for(i = 0; i < j; i++) { | |||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.; | |||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.; | |||
} | |||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.; | |||
for(i = j + 1; i < m; i++) { | |||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
} | |||
} | |||
} else { | |||
for (j = 0; j < m; j++) { | |||
for(i = 0; i < j; i++) { | |||
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5; | |||
} | |||
a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.; | |||
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.; | |||
for(i = j + 1; i < m; i++) { | |||
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.; | |||
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.; | |||
} | |||
} | |||
} | |||
#endif | |||
SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | |||
begin(); | |||
POTRF(uplo[uplos], &m, b, &m, &info); | |||
end(); | |||
if (info != 0) { | |||
fprintf(stderr, "Info = %d\n", info); | |||
exit(1); | |||
} | |||
time1 = getsec(); | |||
if (!(uplos & 1)) { | |||
for (j = 0; j < m; j++) { | |||
for(i = 0; i <= j; i++) { | |||
#ifndef COMPLEX | |||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m])) | |||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]); | |||
#else | |||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0])) | |||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]); | |||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1])) | |||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]); | |||
#endif | |||
} | |||
} | |||
} else { | |||
for (j = 0; j < m; j++) { | |||
for(i = j; i < m; i++) { | |||
#ifndef COMPLEX | |||
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m])) | |||
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]); | |||
#else | |||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0])) | |||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]); | |||
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1])) | |||
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]); | |||
#endif | |||
} | |||
} | |||
} | |||
fprintf(stderr, | |||
#ifdef XDOUBLE | |||
" %Le %10.3f MFlops", maxerr, | |||
#else | |||
" %e %10.3f MFlops", maxerr, | |||
#endif | |||
getmflops(COMPSIZE * COMPSIZE, m, time1)); | |||
if (maxerr > 1.e-3) { | |||
fprintf(stderr, "Hmm, probably it has bug.\n"); | |||
exit(1); | |||
} | |||
} | |||
fprintf(stderr, "\n"); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,123 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef COPY | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define COPY BLASFUNC(zcopy) | |||
#else | |||
#define COPY BLASFUNC(ccopy) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define COPY BLASFUNC(dcopy) | |||
#else | |||
#define COPY BLASFUNC(scopy) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x, *y; | |||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||
blasint m, i; | |||
blasint inc_x=1,inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1 = 0.0, timeg = 0.0; | |||
long nanos = 0; | |||
time_t seconds = 0; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
begin(); | |||
COPY (&m, x, &inc_x, y, &inc_y ); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MBytes %12.9f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,28 +0,0 @@ | |||
#include <stdio.h> | |||
#include "culapack.h" | |||
static int initialized = 0; | |||
int sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info) { | |||
if (!initialized) { | |||
culaInitialize(); | |||
initialized = 1; | |||
} | |||
*info = culaSgetrf(*m, *m, a, *lda, ipiv); | |||
return 0; | |||
} | |||
int cgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info) { | |||
if (!initialized) { | |||
culaInitialize(); | |||
initialized = 1; | |||
} | |||
*info = culaCgetrf(*m, *m, (culaFloatComplex *)a, *lda, ipiv); | |||
return 0; | |||
} |
@@ -1,118 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef DOT | |||
#ifdef DOUBLE | |||
#define DOT BLASFUNC(ddot) | |||
#else | |||
#define DOT BLASFUNC(sdot) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x, *y; | |||
FLOAT result; | |||
blasint m, i; | |||
blasint inc_x=1,inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
result = DOT (&m, x, &inc_x, y, &inc_y ); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,189 +0,0 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "bench.h" | |||
#undef GEEV | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
#define GEEV BLASFUNC(qgeev) | |||
#elif defined(DOUBLE) | |||
#define GEEV BLASFUNC(dgeev) | |||
#else | |||
#define GEEV BLASFUNC(sgeev) | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
#define GEEV BLASFUNC(xgeev) | |||
#elif defined(DOUBLE) | |||
#define GEEV BLASFUNC(zgeev) | |||
#else | |||
#define GEEV BLASFUNC(cgeev) | |||
#endif | |||
#endif | |||
#ifndef COMPLEX | |||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a, | |||
blasint* lda, FLOAT* wr, FLOAT* wi, FLOAT* vl, blasint* ldvl, | |||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, blasint* info ); | |||
#else | |||
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a, | |||
blasint* lda, FLOAT* wr, FLOAT* vl, blasint* ldvl, | |||
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info ); | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; | |||
FLOAT wkopt[4]; | |||
char job='V'; | |||
char jobr='N'; | |||
char *p; | |||
blasint m, i, j, info,lwork; | |||
double factor = 26.33; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_JOB"))) job=*p; | |||
if ( job == 'N' ) factor = 10.0; | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Job=%c\n", from, to, step,job); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( vl = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( vr = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( wr = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( wi = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( rwork = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
for(j = 0; j < to; j++){ | |||
for(i = 0; i < to * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
lwork = -1; | |||
m=to; | |||
#ifndef COMPLEX | |||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||
#else | |||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||
#endif | |||
lwork = (blasint)wkopt[0]; | |||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE FLops Time Lwork\n"); | |||
for(m = from; m <= to; m += step){ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
begin(); | |||
lwork = -1; | |||
#ifndef COMPLEX | |||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||
#else | |||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||
#endif | |||
lwork = (blasint)wkopt[0]; | |||
#ifndef COMPLEX | |||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info); | |||
#else | |||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); | |||
#endif | |||
end(); | |||
if (info) { | |||
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info); | |||
exit(1); | |||
} | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops : %10.2f Sec : %d\n", | |||
COMPSIZE * COMPSIZE * factor * (double)m * (double)m * (double)m / time1 * 1.e-6,time1,lwork); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,197 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef GEMM | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define GEMM BLASFUNC(dgemm) | |||
#elif defined(BFLOAT16) && defined(BGEMM) | |||
#define GEMM BLASFUNC(bgemm) | |||
#elif defined(BFLOAT16) | |||
#define GEMM BLASFUNC(sbgemm) | |||
#undef IFLOAT | |||
#define IFLOAT bfloat16 | |||
#elif defined(HFLOAT16) | |||
#define GEMM BLASFUNC(shgemm) | |||
#undef IFLOAT | |||
#define IFLOAT hfloat16 | |||
#else | |||
#define GEMM BLASFUNC(sgemm) | |||
#define IFLOAT float | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define GEMM BLASFUNC(zgemm) | |||
#else | |||
#define GEMM BLASFUNC(cgemm) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
IFLOAT *a, *b; | |||
FLOAT *c; | |||
#ifdef BGEMM | |||
blasint one=1; | |||
blasint two=2; | |||
float alpha_in[] = {1.0, 0.0}; | |||
float beta_in[] = {0.0, 0.0}; | |||
FLOAT alpha[2], beta[2]; | |||
sbstobf16_(&two, alpha_in, &one, alpha, &one); | |||
sbstobf16_(&two, beta_in, &one, beta, &one); | |||
#else | |||
FLOAT alpha[] = {1.0, 0.0}; | |||
FLOAT beta [] = {0.0, 0.0}; | |||
#endif | |||
char transa = 'N'; | |||
char transb = 'N'; | |||
blasint m, n, k, i, j, lda, ldb, ldc; | |||
int loops = 1; | |||
int has_param_m = 0; | |||
int has_param_n = 0; | |||
int has_param_k = 0; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1, timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++; } | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; } | |||
if (argc > 0) { step = atol(*argv); argc--; argv++; } | |||
if ((p = getenv("OPENBLAS_TRANS"))) { | |||
transa=*p; | |||
transb=*p; | |||
} | |||
if ((p = getenv("OPENBLAS_TRANSA"))) { | |||
transa=*p; | |||
} | |||
if ((p = getenv("OPENBLAS_TRANSB"))) { | |||
transb=*p; | |||
} | |||
TOUPPER(transa); | |||
TOUPPER(transb); | |||
fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb); | |||
p = getenv("OPENBLAS_LOOPS"); | |||
if ( p != NULL ) { | |||
loops = atoi(p); | |||
} | |||
if ((p = getenv("OPENBLAS_PARAM_M"))) { | |||
m = atoi(p); | |||
has_param_m=1; | |||
} else { | |||
m = to; | |||
} | |||
if ((p = getenv("OPENBLAS_PARAM_N"))) { | |||
n = atoi(p); | |||
has_param_n=1; | |||
} else { | |||
n = to; | |||
} | |||
if ((p = getenv("OPENBLAS_PARAM_K"))) { | |||
k = atoi(p); | |||
has_param_k=1; | |||
} else { | |||
k = to; | |||
} | |||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
for (i = 0; i < m * k * COMPSIZE; i++) { | |||
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (i = 0; i < k * n * COMPSIZE; i++) { | |||
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (i = 0; i < m * n * COMPSIZE; i++) { | |||
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
fprintf(stderr, " SIZE Flops Time\n"); | |||
for (i = from; i <= to; i += step) { | |||
timeg=0; | |||
if (!has_param_m) { m = i; } | |||
if (!has_param_n) { n = i; } | |||
if (!has_param_k) { k = i; } | |||
if (transa == 'N') { lda = m; } | |||
else { lda = k; } | |||
if (transb == 'N') { ldb = k; } | |||
else { ldb = n; } | |||
ldc = m; | |||
fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); | |||
begin(); | |||
for (j=0; j<loops; j++) { | |||
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); | |||
} | |||
end(); | |||
time1 = getsec(); | |||
timeg = time1/loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,136 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef GEMM | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define GEMM BLASFUNC(dgemm) | |||
#else | |||
#define GEMM BLASFUNC(sgemm) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define GEMM BLASFUNC(zgemm3m) | |||
#else | |||
#define GEMM BLASFUNC(cgemm3m) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b, *c; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char trans='N'; | |||
blasint m, i, j; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
p = getenv("OPENBLAS_LOOPS"); | |||
if ( p != NULL ) | |||
loops = atoi(p); | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
begin(); | |||
GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,214 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, 2025 The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef GEMV | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define GEMV BLASFUNC(dgemv) | |||
#elif defined(BFLOAT16) && defined(BGEMM) | |||
#define GEMV BLASFUNC(bgemv) | |||
#elif defined(BFLOAT16) | |||
#define GEMV BLASFUNC(sbgemv) | |||
#undef IFLOAT | |||
#define IFLOAT bfloat16 | |||
#else | |||
#define GEMV BLASFUNC(sgemv) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define GEMV BLASFUNC(zgemv) | |||
#else | |||
#define GEMV BLASFUNC(cgemv) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
IFLOAT *a, *x; | |||
FLOAT *y; | |||
#ifdef BGEMM | |||
blasint one=1; | |||
blasint two=2; | |||
float alpha_in[] = {1.0, 0.0}; | |||
float beta_in[] = {0.0, 0.0}; | |||
FLOAT alpha[2], beta[2]; | |||
sbstobf16_(&two, alpha_in, &one, alpha, &one); | |||
sbstobf16_(&two, beta_in, &one, beta, &one); | |||
#else | |||
FLOAT alpha[] = {1.0, 0.0}; | |||
FLOAT beta [] = {0.0, 0.0}; | |||
#endif | |||
char trans='N'; | |||
blasint m, i, j; | |||
blasint inc_x=1,inc_y=1; | |||
blasint n=0; | |||
int has_param_n = 0; | |||
int has_param_m = 0; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
int tomax = to; | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
if ((p = getenv("OPENBLAS_PARAM_N"))) { | |||
n = atoi(p); | |||
if ((n>0)) has_param_n = 1; | |||
if ( n > tomax ) tomax = n; | |||
} | |||
if ( has_param_n == 0 ) | |||
if ((p = getenv("OPENBLAS_PARAM_M"))) { | |||
m = atoi(p); | |||
if ((m>0)) has_param_m = 1; | |||
if ( m > tomax ) tomax = m; | |||
} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops); | |||
if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * tomax * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( x = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
if (has_param_m == 0) | |||
{ | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
if ( has_param_n == 0 ) n = m; | |||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < n * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg); | |||
} | |||
} | |||
else | |||
{ | |||
for(n = from; n <= to; n += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < n * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg); | |||
} | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,149 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef GER | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define GER BLASFUNC(zgeru) | |||
#else | |||
#define GER BLASFUNC(cgeru) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define GER BLASFUNC(dger) | |||
#else | |||
#define GER BLASFUNC(sger) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x, *y; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
blasint m, i, j; | |||
blasint inc_x=1,inc_y=1; | |||
blasint n=0; | |||
int has_param_n = 0; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
if ((p = getenv("OPENBLAS_PARAM_N"))) { | |||
n = atoi(p); | |||
if ((n>0) && (n<=to)) has_param_n = 1; | |||
} | |||
if ( has_param_n == 1 ) | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d N = %d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,n,inc_x,inc_y,loops); | |||
else | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
if ( has_param_n == 0 ) n = m; | |||
fprintf(stderr, " %6dx%d : ", (int)m,(int)n); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < n * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
begin(); | |||
GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); | |||
@@ -1,143 +0,0 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "bench.h" | |||
double fabs(double); | |||
#undef GESV | |||
#undef GETRS | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
#define GESV BLASFUNC(qgesv) | |||
#elif defined(DOUBLE) | |||
#define GESV BLASFUNC(dgesv) | |||
#else | |||
#define GESV BLASFUNC(sgesv) | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
#define GESV BLASFUNC(xgesv) | |||
#elif defined(DOUBLE) | |||
#define GESV BLASFUNC(zgesv) | |||
#else | |||
#define GESV BLASFUNC(cgesv) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b; | |||
blasint *ipiv; | |||
blasint m, i, j, info; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops Time\n"); | |||
for(m = from; m <= to; m += step){ | |||
fprintf(stderr, " %dx%d : ", (int)m, (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0; | |||
} | |||
} | |||
for (j = 0; j < m; ++j) { | |||
for (i = 0; i < m * COMPSIZE; ++i) { | |||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE]; | |||
} | |||
} | |||
begin(); | |||
GESV (&m, &m, a, &m, ipiv, b, &m, &info); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
"%10.2f MFlops %10.6f s\n", | |||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,179 +0,0 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "bench.h" | |||
#undef GETRF | |||
#undef GETRI | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
#define GETRF BLASFUNC(qgetrf) | |||
#define GETRI BLASFUNC(qgetri) | |||
#elif defined(DOUBLE) | |||
#define GETRF BLASFUNC(dgetrf) | |||
#define GETRI BLASFUNC(dgetri) | |||
#else | |||
#define GETRF BLASFUNC(sgetrf) | |||
#define GETRI BLASFUNC(sgetri) | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
#define GETRF BLASFUNC(xgetrf) | |||
#define GETRI BLASFUNC(xgetri) | |||
#elif defined(DOUBLE) | |||
#define GETRF BLASFUNC(zgetrf) | |||
#define GETRI BLASFUNC(zgetri) | |||
#else | |||
#define GETRF BLASFUNC(cgetrf) | |||
#define GETRI BLASFUNC(cgetri) | |||
#endif | |||
#endif | |||
extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a,*work; | |||
FLOAT wkopt[4]; | |||
blasint *ipiv; | |||
blasint m, i, j, l, info,lwork; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
int loops = 1; | |||
double time1,timeg; | |||
char *p; | |||
char btest = 'I'; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_TEST"))) btest=*p; | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
for(j = 0; j < to; j++){ | |||
for(i = 0; i < to * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
lwork = -1; | |||
m=to; | |||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | |||
lwork = (blasint)wkopt[0]; | |||
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE FLops Time Lwork\n"); | |||
for(m = from; m <= to; m += step){ | |||
timeg = 0.; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l = 0; l < loops; l++) { | |||
if (btest == 'F') begin(); | |||
GETRF (&m, &m, a, &m, ipiv, &info); | |||
if (btest == 'F') { | |||
end(); | |||
timeg += getsec(); | |||
} | |||
if (info) { | |||
fprintf(stderr, "Matrix is not singular .. %d\n", info); | |||
exit(1); | |||
} | |||
if (btest == 'I') begin(); | |||
lwork = -1; | |||
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | |||
lwork = (blasint)wkopt[0]; | |||
GETRI(&m, a, &m, ipiv, work, &lwork, &info); | |||
if (btest == 'I') end(); | |||
if (info) { | |||
fprintf(stderr, "failed compute inverse matrix .. %d\n", info); | |||
exit(1); | |||
} | |||
if (btest == 'I') | |||
timeg += getsec(); | |||
} // loops | |||
time1 = timeg/(double)loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops : %10.2f Sec : %d\n", | |||
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,134 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HBMV | |||
#ifdef DOUBLE | |||
#define HBMV BLASFUNC(zhbmv) | |||
#else | |||
#define HBMV BLASFUNC(chbmv) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x, *y; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {0.0, 0.0}; | |||
blasint k = 1; | |||
char uplo='L'; | |||
blasint m, i, j; | |||
blasint inc_x=1, inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
if ((p = getenv("OPENBLAS_K"))) k = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n", | |||
from, to, step, uplo, k, inc_x, inc_y, loops); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) { | |||
timeg=0; | |||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m); | |||
for(j = 0; j < m; j++) { | |||
for(i = 0; i < m * COMPSIZE; i++) { | |||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (l = 0; l < loops; l++) { | |||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) { | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) { | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
timeg += getsec(); | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, " %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,117 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HEMM | |||
#ifdef DOUBLE | |||
#define HEMM BLASFUNC(zhemm) | |||
#else | |||
#define HEMM BLASFUNC(chemm) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b, *c; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char *p; | |||
char side='L'; | |||
char uplo='U'; | |||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
blasint m, i, j; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
begin(); | |||
HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,134 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HEMV | |||
#ifdef DOUBLE | |||
#define HEMV BLASFUNC(zhemv) | |||
#else | |||
#define HEMV BLASFUNC(chemv) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x, *y; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char uplo='L'; | |||
blasint m, i, j; | |||
blasint inc_x=1,inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6dx%d : ", (int)m,(int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,109 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2020, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HER | |||
#ifdef DOUBLE | |||
#define HER BLASFUNC(zher) | |||
#else | |||
#define HER BLASFUNC(cher) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
blasint incx = 1; | |||
char *p; | |||
char uplo='U'; | |||
char trans='N'; | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
blasint m, i, j; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
HER (&uplo, &m, alpha, x, &incx, a, &m ); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6); | |||
} | |||
return 0; | |||
} |
@@ -1,113 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2020, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HER2 | |||
#ifdef DOUBLE | |||
#define HER2 BLASFUNC(zher2) | |||
#else | |||
#define HER2 BLASFUNC(cher2) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x, *y; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
blasint inc = 1; | |||
char *p; | |||
char uplo='U'; | |||
char trans='N'; | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
blasint m, i, j; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m ); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6); | |||
} | |||
return 0; | |||
} |
@@ -1,116 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HER2K | |||
#ifdef DOUBLE | |||
#define HER2K BLASFUNC(zher2k) | |||
#else | |||
#define HER2K BLASFUNC(cher2k) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b, *c; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char *p; | |||
char uplo='U'; | |||
char trans='N'; | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
blasint m, i, j; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
begin(); | |||
HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,112 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HERK | |||
#ifdef DOUBLE | |||
#define HERK BLASFUNC(zherk) | |||
#else | |||
#define HERK BLASFUNC(cherk) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *c; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char *p; | |||
char uplo='U'; | |||
char trans='N'; | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||
blasint m, i, j; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
begin(); | |||
HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | |||
end(); | |||
time1 = getsec(); | |||
fprintf(stderr, | |||
" %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,133 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef HPMV | |||
#ifdef DOUBLE | |||
#define HPMV BLASFUNC(zhpmv) | |||
#else | |||
#define HPMV BLASFUNC(chpmv) | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *x, *y; | |||
FLOAT alpha[] = {1.0, 1.0}; | |||
FLOAT beta [] = {1.0, 1.0}; | |||
char uplo='L'; | |||
blasint m, i, j; | |||
blasint inc_x=1, inc_y=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n"); | |||
exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) { | |||
timeg=0; | |||
fprintf(stderr, " %6dx%d : ", (int)m, (int)m); | |||
for(j = 0; j < m; j++) { | |||
for(i = 0; i < m * COMPSIZE; i++) { | |||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (l = 0; l < loops; l++) { | |||
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) { | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) { | |||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, " %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,120 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef IAMAX | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define IAMAX BLASFUNC(izamax) | |||
#else | |||
#define IAMAX BLASFUNC(icamax) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define IAMAX BLASFUNC(idamax) | |||
#else | |||
#define IAMAX BLASFUNC(isamax) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
IAMAX (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MBytes %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,120 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef IAMIN | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define IAMIN BLASFUNC(izamin) | |||
#else | |||
#define IAMIN BLASFUNC(icamin) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define IAMIN BLASFUNC(idamin) | |||
#else | |||
#define IAMIN BLASFUNC(isamin) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
IAMIN (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,114 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef IMAX | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define IMAX BLASFUNC(idmax) | |||
#else | |||
#define IMAX BLASFUNC(ismax) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
IMAX (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,114 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef IMIN | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define IMIN BLASFUNC(idmin) | |||
#else | |||
#define IMIN BLASFUNC(ismin) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
IMIN (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,208 +0,0 @@ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#include "bench.h" | |||
double fabs(double); | |||
#undef GETRF | |||
#undef GETRS | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
#define GETRF BLASFUNC(qgetrf) | |||
#define GETRS BLASFUNC(qgetrs) | |||
#elif defined(DOUBLE) | |||
#define GETRF BLASFUNC(dgetrf) | |||
#define GETRS BLASFUNC(dgetrs) | |||
#else | |||
#define GETRF BLASFUNC(sgetrf) | |||
#define GETRS BLASFUNC(sgetrs) | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
#define GETRF BLASFUNC(xgetrf) | |||
#define GETRS BLASFUNC(xgetrs) | |||
#elif defined(DOUBLE) | |||
#define GETRF BLASFUNC(zgetrf) | |||
#define GETRS BLASFUNC(zgetrs) | |||
#else | |||
#define GETRF BLASFUNC(cgetrf) | |||
#define GETRS BLASFUNC(cgetrs) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b; | |||
blasint *ipiv; | |||
blasint m, i, j, l, info; | |||
blasint unit = 1; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
int loops = 1; | |||
FLOAT maxerr; | |||
double time1, time2, timeg1,timeg2; | |||
char *p; | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p); | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Residual Decompose Solve Total\n"); | |||
for(m = from; m <= to; m += step){ | |||
timeg1 = timeg2 = 0.; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l = 0; l < loops; l++) { | |||
for(j = 0; j < m; j++){ | |||
for(i = 0; i < m * COMPSIZE; i++){ | |||
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
} | |||
for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.; | |||
for (j = 0; j < m; ++j) { | |||
for (i = 0; i < m * COMPSIZE; ++i) { | |||
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE]; | |||
} | |||
} | |||
begin(); | |||
GETRF (&m, &m, a, &m, ipiv, &info); | |||
end(); | |||
if (info) { | |||
fprintf(stderr, "Matrix is not singular .. %d\n", info); | |||
exit(1); | |||
} | |||
timeg1 += getsec(); | |||
begin(); | |||
GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); | |||
end(); | |||
if (info) { | |||
fprintf(stderr, "Matrix is not singular .. %d\n", info); | |||
exit(1); | |||
} | |||
timeg2 += getsec(); | |||
} //loops | |||
time1=timeg1/(double)loops; | |||
time2=timeg2/(double)loops; | |||
maxerr = 0.; | |||
for(i = 0; i < m; i++){ | |||
#ifndef XDOUBLE | |||
if (maxerr < fabs(b[i * COMPSIZE] - 1.0)) maxerr = fabs(b[i * COMPSIZE] - 1.0); | |||
#ifdef COMPLEX | |||
if (maxerr < fabs(b[i * COMPSIZE] + 1)) maxerr = fabs(b[i * COMPSIZE + 1]); | |||
#endif | |||
#else | |||
if (maxerr < fabsl(b[i * COMPSIZE] - 1.0L)) maxerr = fabsl(b[i * COMPSIZE] - 1.0L); | |||
#ifdef COMPLEX | |||
if (maxerr < fabsl(b[i * COMPSIZE] + 1)) maxerr = fabsl(b[i * COMPSIZE + 1]); | |||
#endif | |||
#endif | |||
} | |||
#ifdef XDOUBLE | |||
fprintf(stderr," %Le ", maxerr); | |||
#else | |||
fprintf(stderr," %e ", maxerr); | |||
#endif | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n", | |||
COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6, | |||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time2 * 1.e-6, | |||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6); | |||
#if 0 | |||
if ( | |||
#ifdef DOUBLE | |||
maxerr > 1.e-8 | |||
#else | |||
maxerr > 1.e-1 | |||
#endif | |||
) { | |||
fprintf(stderr, "Error is too large.\n"); | |||
exit(1); | |||
} | |||
#endif | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,113 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef NAMAX | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define NAMAX BLASFUNC(dmax) | |||
#else | |||
#define NAMAX BLASFUNC(smax) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
NAMAX (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,113 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef NAMIN | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define NAMIN BLASFUNC(dmin) | |||
#else | |||
#define NAMIN BLASFUNC(smin) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
NAMIN (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,121 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef NRM2 | |||
#ifdef COMPLEX | |||
#ifdef DOUBLE | |||
#define NRM2 BLASFUNC(dznrm2) | |||
#else | |||
#define NRM2 BLASFUNC(scnrm2) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define NRM2 BLASFUNC(dnrm2) | |||
#else | |||
#define NRM2 BLASFUNC(snrm2) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *x; | |||
blasint m, i; | |||
blasint inc_x=1; | |||
int loops = 1; | |||
int l; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
double time1,timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
fprintf(stderr, " SIZE Flops\n"); | |||
for(m = from; m <= to; m += step) | |||
{ | |||
timeg=0; | |||
fprintf(stderr, " %6d : ", (int)m); | |||
for (l=0; l<loops; l++) | |||
{ | |||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
begin(); | |||
NRM2 (&m, x, &inc_x); | |||
end(); | |||
time1 = getsec(); | |||
timeg += time1; | |||
} | |||
timeg /= loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||
} | |||
return 0; | |||
} | |||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -1,122 +0,0 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2024, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "bench.h" | |||
#undef OMATCOPY | |||
#ifndef COMPLEX | |||
#ifdef DOUBLE | |||
#define OMATCOPY BLASFUNC(domatcopy) | |||
#else | |||
#define OMATCOPY BLASFUNC(somatcopy) | |||
#endif | |||
#else | |||
#ifdef DOUBLE | |||
#define OMATCOPY BLASFUNC(zomatcopy) | |||
#else | |||
#define OMATCOPY BLASFUNC(comatcopy) | |||
#endif | |||
#endif | |||
int main(int argc, char *argv[]){ | |||
FLOAT *a, *b; | |||
FLOAT alpha[] = {1.0, 0.0}; | |||
char trans = 'N'; | |||
char order = 'C'; | |||
blasint crows, ccols, clda, cldb; | |||
int loops = 1; | |||
char *p; | |||
int from = 1; | |||
int to = 200; | |||
int step = 1; | |||
int i, j; | |||
double time1, timeg; | |||
argc--;argv++; | |||
if (argc > 0) { from = atol(*argv); argc--; argv++; } | |||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; } | |||
if (argc > 0) { step = atol(*argv); argc--; argv++; } | |||
if ((p = getenv("OPENBLAS_TRANS"))) { | |||
trans=*p; | |||
} | |||
if ((p = getenv("OPENBLAS_ORDER"))) { | |||
order=*p; | |||
} | |||
TOUPPER(trans); | |||
TOUPPER(order); | |||
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c : Order=%c\n", from, to, step, trans, order); | |||
p = getenv("OPENBLAS_LOOPS"); | |||
if ( p != NULL ) { | |||
loops = atoi(p); | |||
} | |||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) { | |||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||
} | |||
#ifdef __linux | |||
srandom(getpid()); | |||
#endif | |||
for (i = 0; i < to * to * COMPSIZE; i++) { | |||
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
for (i = 0; i < to * to * COMPSIZE; i++) { | |||
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
} | |||
fprintf(stderr, " SIZE Flops Time\n"); | |||
for (i = from; i <= to; i += step) { | |||
cldb = clda = crows = ccols = i; | |||
fprintf(stderr, " ROWS=%4d, COLS=%4d : ", (int)crows, (int)ccols); | |||
begin(); | |||
for (j=0; j<loops; j++) { | |||
OMATCOPY (&order, &trans, &crows, &ccols, alpha, a, &clda, b, &cldb); | |||
} | |||
end(); | |||
time1 = getsec(); | |||
timeg = time1/loops; | |||
fprintf(stderr, | |||
" %10.2f MFlops %10.6f sec\n", | |||
COMPSIZE * COMPSIZE * (double)ccols * (double)crows / timeg * 1.e-6, time1); | |||
} | |||
free(a); | |||
free(b); | |||
return 0; | |||
} |
@@ -1,65 +0,0 @@ | |||
#!/bin/sh | |||
# ********************************************************************************** | |||
# Copyright (c) 2014, The OpenBLAS Project | |||
# All rights reserved. | |||
# Redistribution and use in source and binary forms, with or without | |||
# modification, are permitted provided that the following conditions are | |||
# met: | |||
# 1. Redistributions of source code must retain the above copyright | |||
# notice, this list of conditions and the following disclaimer. | |||
# 2. Redistributions in binary form must reproduce the above copyright | |||
# notice, this list of conditions and the following disclaimer in | |||
# the documentation and/or other materials provided with the | |||
# distribution. | |||
# 3. Neither the name of the OpenBLAS project nor the names of | |||
# its contributors may be used to endorse or promote products | |||
# derived from this software without specific prior written permission. | |||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
# ********************************************************************************** | |||
# ************************************************************************ | |||
# sample filter for data output from benchmark programs | |||
# | |||
# usage example: | |||
# ./dgemm.goto 2>&1|./plotfilter.sh >OpenBLAS | |||
# ************************************************************************ | |||
if [ $# -eq 1 ] | |||
then | |||
arg1=$1 | |||
else | |||
arg1=0 | |||
fi | |||
case $arg1 in | |||
L) | |||
# Linpack Benchmark | |||
awk '/MFlops/ { print $1,int($8) }'|tail --lines=+2 | |||
;; | |||
C) | |||
# Cholesky Benchmark | |||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2 | |||
;; | |||
B) | |||
# Copy Benchmark | |||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2 | |||
;; | |||
*) | |||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2 | |||
;; | |||
esac | |||
@@ -1,42 +0,0 @@ | |||
# ********************************************************************************** | |||
# Copyright (c) 2014, The OpenBLAS Project | |||
# All rights reserved. | |||
# Redistribution and use in source and binary forms, with or without | |||
# modification, are permitted provided that the following conditions are | |||
# met: | |||
# 1. Redistributions of source code must retain the above copyright | |||
# notice, this list of conditions and the following disclaimer. | |||
# 2. Redistributions in binary form must reproduce the above copyright | |||
# notice, this list of conditions and the following disclaimer in | |||
# the documentation and/or other materials provided with the | |||
# distribution. | |||
# 3. Neither the name of the OpenBLAS project nor the names of | |||
# its contributors may be used to endorse or promote products | |||
# derived from this software without specific prior written permission. | |||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
# ********************************************************************************** | |||
set term x11 font sans; | |||
set ylabel "MFlops"; | |||
set xlabel "Size"; | |||
set grid xtics; | |||
set grid ytics; | |||
set key left; | |||
set timestamp "generated on %Y-%m-%d by `whoami`" | |||
set title "Dtrsm\nUPLO=U TRANS=N SIDE=L\nBulldozer 1 Thread" | |||
plot 'OpenBLAS' smooth bezier, 'ACML' smooth bezier, 'MKL' smooth bezier; | |||
set output "print.png"; | |||
show title; | |||
show plot; | |||
show output; | |||