Compare commits

...

No commits in common. 'develop' and 'gh-pages' have entirely different histories.

100 changed files with 1 additions and 21246 deletions
Split View
  1. +0
    -174
      .cirrus.yml
  2. +0
    -16
      .cirun.yml
  3. +0
    -216
      .drone.yml
  4. +0
    -156
      .github/workflows/apple_m.yml
  5. +0
    -140
      .github/workflows/arm64_graviton.yml
  6. +0
    -158
      .github/workflows/c910v.yml
  7. +0
    -157
      .github/workflows/codspeed-bench.yml
  8. +0
    -40
      .github/workflows/docs.yml
  9. +0
    -386
      .github/workflows/dynamic_arch.yml
  10. +0
    -37
      .github/workflows/harmonyos.yml
  11. +0
    -119
      .github/workflows/loongarch64.yml
  12. +0
    -141
      .github/workflows/loongarch64_clang.yml
  13. +0
    -123
      .github/workflows/mips64.yml
  14. +0
    -90
      .github/workflows/nightly-Homebrew-build.yml
  15. +0
    -256
      .github/workflows/riscv64_vector.yml
  16. +0
    -84
      .github/workflows/windows_arm64.yml
  17. +0
    -118
      .gitignore
  18. +0
    -0
      .nojekyll
  19. +0
    -320
      .travis.yml
  20. +0
    -43
      BACKERS.md
  21. +0
    -729
      CMakeLists.txt
  22. +1
    -0
      CNAME
  23. +0
    -269
      CONTRIBUTORS.md
  24. +0
    -2402
      Changelog.txt
  25. +0
    -32
      GotoBLAS_00License.txt
  26. +0
    -93
      GotoBLAS_01Readme.txt
  27. +0
    -118
      GotoBLAS_02QuickInstall.txt
  28. +0
    -128
      GotoBLAS_03FAQ.txt
  29. +0
    -13
      GotoBLAS_04FAQ.txt
  30. +0
    -53
      GotoBLAS_05LargePage.txt
  31. +0
    -22
      GotoBLAS_06WeirdPerformance.txt
  32. +0
    -14
      Jenkinsfile
  33. +0
    -16
      Jenkinsfile.pwr
  34. +0
    -29
      LICENSE
  35. +0
    -459
      Makefile
  36. +0
    -39
      Makefile.alpha
  37. +0
    -19
      Makefile.arm
  38. +0
    -429
      Makefile.arm64
  39. +0
    -4
      Makefile.csky
  40. +0
    -1
      Makefile.e2k
  41. +0
    -1
      Makefile.generic
  42. +0
    -22
      Makefile.ia64
  43. +0
    -345
      Makefile.install
  44. +0
    -3
      Makefile.loongarch64
  45. +0
    -4
      Makefile.mips
  46. +0
    -4
      Makefile.mips64
  47. +0
    -224
      Makefile.power
  48. +0
    -113
      Makefile.prebuild
  49. +0
    -20
      Makefile.riscv64
  50. +0
    -337
      Makefile.rule
  51. +0
    -48
      Makefile.sparc
  52. +0
    -1971
      Makefile.system
  53. +0
    -658
      Makefile.tail
  54. +0
    -89
      Makefile.x86
  55. +0
    -299
      Makefile.x86_64
  56. +0
    -16
      Makefile.zarch
  57. +0
    -370
      README.md
  58. +0
    -20
      SECURITY.md
  59. +0
    -154
      TargetList.txt
  60. +0
    -213
      USAGE.md
  61. +0
    -78
      appveyor.yml
  62. +0
    -330
      azure-pipelines.yml
  63. +0
    -9
      benchmark/Make_exe.sh
  64. +0
    -3541
      benchmark/Makefile
  65. +0
    -133
      benchmark/amax.c
  66. +0
    -137
      benchmark/amin.c
  67. +0
    -135
      benchmark/asum.c
  68. +0
    -124
      benchmark/axpby.c
  69. +0
    -128
      benchmark/axpy.c
  70. +0
    -134
      benchmark/bench.h
  71. +0
    -247
      benchmark/cholesky.c
  72. +0
    -123
      benchmark/copy.c
  73. +0
    -28
      benchmark/cula_wrapper.c
  74. +0
    -118
      benchmark/dot.c
  75. +0
    -189
      benchmark/geev.c
  76. +0
    -197
      benchmark/gemm.c
  77. +0
    -136
      benchmark/gemm3m.c
  78. +0
    -214
      benchmark/gemv.c
  79. +0
    -149
      benchmark/ger.c
  80. +0
    -143
      benchmark/gesv.c
  81. +0
    -179
      benchmark/getri.c
  82. +0
    -134
      benchmark/hbmv.c
  83. +0
    -117
      benchmark/hemm.c
  84. +0
    -134
      benchmark/hemv.c
  85. +0
    -109
      benchmark/her.c
  86. +0
    -113
      benchmark/her2.c
  87. +0
    -116
      benchmark/her2k.c
  88. +0
    -112
      benchmark/herk.c
  89. +0
    -133
      benchmark/hpmv.c
  90. +0
    -120
      benchmark/iamax.c
  91. +0
    -120
      benchmark/iamin.c
  92. +0
    -114
      benchmark/imax.c
  93. +0
    -114
      benchmark/imin.c
  94. +0
    -208
      benchmark/linpack.c
  95. +0
    -113
      benchmark/max.c
  96. +0
    -113
      benchmark/min.c
  97. +0
    -121
      benchmark/nrm2.c
  98. +0
    -122
      benchmark/omatcopy.c
  99. +0
    -65
      benchmark/plot-filter.sh
  100. +0
    -42
      benchmark/plot-header

+ 0
- 174
.cirrus.yml View File

@@ -1,174 +0,0 @@
macos_instance:
image: ghcr.io/cirruslabs/macos-monterey-xcode:latest

#task:
# name: AppleM1/LLVM
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang

#task:
# name: AppleM1/LLVM/ILP64
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - make TARGET=VORTEX USE_OPENMP=1 CC=clang INTERFACE64=1

#task:
# name: AppleM1/LLVM/CMAKE
# compile_script:
# - brew install llvm
# - export PATH=/opt/homebrew/opt/llvm/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
# - export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
# - mkdir build
# - cd build
# - cmake -DTARGET=VORTEX -DCMAKE_C_COMPILER=clang -DBUILD_SHARED_LIBS=ON ..
# - make -j 4

#task:
# name: AppleM1/GCC/MAKE/OPENMP
# compile_script:
# - brew install gcc@11
# - export PATH=/opt/homebrew/bin:$PATH
# - export LDFLAGS="-L/opt/homebrew/lib"
# - export CPPFLAGS="-I/opt/homebrew/include"
# - make CC=gcc-11 FC=gfortran-11 USE_OPENMP=1
macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM x86_64 xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export ARCHS="i386 x86_64"
- export ARCHS_STANDARD="i386 x86_64"
- export ARCHS_STANDARD_32_64_BIT="i386 x86_64"
- export ARCHS_STANDARD_64_BIT=x86_64
- export ARCHS_STANDARD_INCLUDING_64_BIT="i386 x86_64"
- export ARCHS_UNIVERSAL_IPHONE_OS="i386 x86_64"
- export VALID_ARCHS="i386 x86_64"
- xcrun --sdk macosx --show-sdk-path
- xcodebuild -version
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX15.4.sdk -arch x86_64"
- make TARGET=CORE2 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain
# lib_artifacts:
# path: "libopenblas*"
# type: application/octet-streamm

macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM armv8-ios xbuild
compile_script:
- #brew install llvm
- export #PATH=/opt/homebrew/opt/llvm/bin:$PATH
- export #LDFLAGS="-L/opt/homebrew/opt/llvm/lib"
- export #CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
- export CC=/Applications/Xcode_16.3.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
- export CFLAGS="-O2 -unwindlib=none -Wno-macro-redefined -isysroot /Applications/Xcode_16.3.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS18.4.sdk -arch arm64 -miphoneos-version-min=10.0"
- xcrun --sdk iphoneos --show-sdk-path
- ls -l /Applications
- make TARGET=ARMV8 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 CROSS=1
always:
config_artifacts:
path: "*conf*"
type: text/plain

macos_instance:
image: ghcr.io/cirruslabs/macos-sonoma-xcode:latest
task:
name: AppleM1/LLVM armv7-androidndk xbuild
compile_script:
- brew install --cask android-ndk
- export ANDROID_NDK_HOME="/opt/homebrew/share/android-ndk"
- export CC=/opt/homebrew/share/android-ndk/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi23-clang
- make TARGET=ARMV7 ARM_SOFTFP_ABI=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1 RANLIB="ls -l"
always:
config_artifacts:
path: "*conf*"
type: text/plain

task:
name: NeoverseN1
arm_container:
image: node:latest
compile_script:
- make

task:
name: NeoverseN1-ILP64
arm_container:
image: node:latest
compile_script:
- make INTERFACE64=1

task:
name: NeoverseN1-OMP
arm_container:
image: node:latest
cpu: 8
compile_script:
- make USE_OPENMP=1

FreeBSD_task:
name: FreeBSD-gcc
freebsd_instance:
image_family: freebsd-14-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc


FreeBSD_task:
name: freebsd-gcc-ilp64
freebsd_instance:
image_family: freebsd-14-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
compile_script:
- ls -l /usr/local/lib
- gmake CC=gcc INTERFACE64=1

FreeBSD_task:
name: FreeBSD-clang-openmp
freebsd_instance:
image_family: freebsd-14-2
install_script:
- pkg update -f && pkg upgrade -y && pkg install -y gmake gcc
- ln -s /usr/local/lib/gcc13/libgfortran.so.5.0.0 /usr/lib/libgfortran.so
compile_script:
- gmake CC=clang FC=gfortran USE_OPENMP=1 CPP_THREAD_SAFETY_TEST=1

#task:
# name: Windows/LLVM16 --- too slow ---
# windows_container:
# image: cirrusci/windowsservercore:cmake-2021.12.07
# install_script:
# - choco list --localonly
# - choco install -y llvm
# - # choco install -y cmake --installargs '"ADD_CMAKE_TO_PATH=System"'
# - choco install -y ninja
# - refreshenv
# - cd "c:/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/VC/Auxiliary/Build"
# - vcvarsall x64
# - cd "C:\Users\ContainerAdministrator\AppData\Local\Temp\cirrus-ci-build"
# - cmake -S . -B build -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release
# - cd build
# - cmake --build .
# - ctest

+ 0
- 16
.cirun.yml View File

@@ -1,16 +0,0 @@
# Self-Hosted Github Action Runners on AWS via Cirun.io
# Reference: https://docs.cirun.io/reference/yaml
runners:
- name: "aws-runner-graviton"
# Cloud Provider: AWS
cloud: "aws"
region: "us-east-1"
# Cheapest VM on AWS
instance_type: "c7g.large"
# Ubuntu-22.04, ami image
machine_image: "ami-0a0c8eebcdd6dcbd0"
preemptible: false
# Add this label in the "runs-on" param in .github/workflows/<workflow-name>.yml
# So that this runner is created for running the workflow
labels:
- "cirun-aws-runner-graviton"

+ 0
- 216
.drone.yml View File

@@ -1,216 +0,0 @@
---
kind: pipeline
name: arm64_gcc_make

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS

---
kind: pipeline
name: arm32_gcc_make

platform:
os: linux
arch: arm

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV6 NUM_THREADS=32'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS

---
kind: pipeline
name: arm64_clang_make

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: clang
COMMON_FLAGS: 'DYNAMIC_ARCH=1 TARGET=ARMV8 NUM_THREADS=32'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS

---
kind: pipeline
name: arm32_clang_cmake

platform:
os: linux
arch: arm

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: clang
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV6 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
commands:
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
- apt-get update -y
- apt-get install -y make $CC g++ perl cmake
- $CC --version
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V

---
kind: pipeline
name: arm64_gcc_cmake

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
commands:
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
- apt-get update -y
- apt-get install -y make $CC g++ perl cmake
- $CC --version
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V

---
kind: pipeline
name: arm64_clang_cmake

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: clang
CMAKE_FLAGS: '-DDYNAMIC_ARCH=1 -DTARGET=ARMV8 -DNUM_THREADS=32 -DNOFORTRAN=ON -DBUILD_WITHOUT_LAPACK=ON'
commands:
- echo "CMAKE_FLAGS:= $CMAKE_FLAGS"
- apt-get update -y
- apt-get install -y make $CC g++ perl cmake
- $CC --version
- mkdir build && cd build
- cmake $CMAKE_FLAGS ..
- make -j
- ctest -V

---
kind: pipeline
name: arm64_native_test

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: epyc_native_test

platform:
os: linux
arch: amd64

steps:
- name: Build and Test
image: ubuntu:18.04
environment:
CC: gcc
COMMON_FLAGS: 'USE_OPENMP=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C test $COMMON_FLAGS
- make -C ctest $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C cpp_thread_test dgemm_tester
---
kind: pipeline
name: arm64_gcc10

platform:
os: linux
arch: arm64

steps:
- name: Build and Test
image: ubuntu:20.04
environment:
CC: gcc-10
FC: gfortran-10
COMMON_FLAGS: 'TARGET=ARMV8 DYNAMIC_ARCH=1'
commands:
- echo "MAKE_FLAGS:= $COMMON_FLAGS"
- apt-get update -y
- apt-get install -y make $CC gfortran-10 perl python g++
- $CC --version
- make QUIET_MAKE=1 $COMMON_FLAGS
- make -C utest $COMMON_FLAGS
- make -C test $COMMON_FLAGS

+ 0
- 156
.github/workflows/apple_m.yml View File

@@ -1,156 +0,0 @@
name: apple m

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-14

strategy:
fail-fast: false
matrix:
build: [cmake, make]
fortran: [gfortran]
openmp: [0, 1]
ilp64: [0, 1]
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get install -y gfortran cmake ccache libtinfo5
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc
brew install coreutils ccache
brew install llvm
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}

- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
echo "/opt/homebrew/opt/llvm/bin" >>$GITHUB_PATH
echo "" >>$GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Add gfortran runtime to link path
if: matrix.build == 'make' && runner.os == 'macOS'
run: |
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
echo "LDFLAGS=-L/opt/homebrew/opt/llvm/lib -L$GFORTRAN_LIBDIR" >> $GITHUB_ENV

- name: Build OpenBLAS
run: |
export CPPFLAGS="-I/opt/homebrew/opt/llvm/include"
export CC="/opt/homebrew/opt/llvm/bin/clang"
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=${{matrix.openmp}} INTERFACE64=${{matrix.ilp64}} FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
export LDFLAGS="$LDFLAGS -Wl,-ld_classic"
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DUSE_OPENMP=${{matrix.openmp}} \
-DOpenMP_Fortran_LIB_NAMES=omp \
-DINTERFACE64=${{matrix.ilp64}} \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

- name: Show ccache status
continue-on-error: true
run: ccache -s

- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

+ 0
- 140
.github/workflows/arm64_graviton.yml View File

@@ -1,140 +0,0 @@
name: arm64 graviton cirun

on:
push:
branches:
- develop
- release-**
pull_request:
branches:
- develop
- release-**

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: "cirun-aws-runner-graviton--${{ github.run_id }}"

strategy:
fail-fast: false
matrix:
fortran: [gfortran]
build: [cmake, make]

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}

- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 BUILD_BFLOAT16=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DBUILD_BFLOAT16=1 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

- name: Show ccache status
continue-on-error: true
run: ccache -s

- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

+ 0
- 158
.github/workflows/c910v.yml View File

@@ -1,158 +0,0 @@
name: c910v qemu test

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
xuetie_toolchain: https://occ-oss-prod.oss-cn-hangzhou.aliyuncs.com/resource//1698113812618
toolchain_file_name: Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0-20231018.tar.gz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_GENERIC
triple: riscv64-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=RISCV64_GENERIC
- target: C910V
triple: riscv64-unknown-linux-gnu
apt_triple: riscv64-linux-gnu
opts: NO_SHARED=1 TARGET=C910V

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.apt_triple }} gfortran-${{ matrix.apt_triple }} libgomp1-riscv64-cross libglib2.0-dev

- name: checkout qemu
uses: actions/checkout@v4
with:
repository: XUANTIE-RV/qemu
path: qemu
ref: e0ace167effcd36d1f82c7ccb4522b3126011479 # xuantie-qemu-9.0

- name: build qemu
run: |
# Force use c910v qemu-user
wget https://github.com/revyos/qemu/commit/222729c7455784dd855216d7a2bec4bd8f2a6800.patch
cd qemu
patch -p1 < ../222729c7455784dd855216d7a2bec4bd8f2a6800.patch
export CXXFLAGS="-Wno-error"; export CFLAGS="-Wno-error"
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=riscv64-linux-user --disable-system
make -j$(nproc)
make install

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: build OpenBLAS
run: |
wget ${xuetie_toolchain}/${toolchain_file_name}
tar -xvf ${toolchain_file_name} -C /opt
export PATH="/opt/Xuantie-900-gcc-linux-5.10.4-glibc-x86_64-V2.8.0/bin:$PATH"

make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)

- name: test
run: |
run_with_retry() {
local cmd="$1"
local time_out=10
local retries=10
local attempt=0

for ((i=1; i<=retries; i++)); do
attempt=$((i))
if timeout -s 12 --preserve-status $time_out $cmd; then
echo "Command succeeded on attempt $i."
return 0
else
local exit_code=$?
if [ $exit_code -eq 140 ]; then
echo "Attempt $i timed out (retrying...)"
time_out=$((time_out + 5))
else
echo "Attempt $i failed with exit code $exit_code. Aborting workflow."
exit $exit_code
fi
fi
done
echo "All $retries attempts failed, giving up."
echo "Final failure was due to timeout."
echo "Aborting workflow."
exit $exit_code
}
export PATH=$GITHUB_WORKSPACE/qemu-install/bin:$PATH
which qemu-riscv64
export QEMU_BIN=$(which qemu-riscv64)
run_with_retry "$QEMU_BIN ./utest/openblas_utest"
run_with_retry "$QEMU_BIN ./utest/openblas_utest_ext"

OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-riscv64 ./test/zblat3 < ./test/zblat3.dat

+ 0
- 157
.github/workflows/codspeed-bench.yml View File

@@ -1,157 +0,0 @@
name: Run codspeed benchmarks

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
benchmarks:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
strategy:
fail-fast: false
matrix:
os: [ubuntu-22.04]
fortran: [gfortran]
build: [make]
pyver: ["3.12"]
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v3
- uses: actions/setup-python@v3
with:
python-version: ${{ matrix.pyver }}

- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
fi

- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y gfortran cmake ccache libtinfo5
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}

- name: Write out the .pc
run: |
cd benchmark/pybench
cat > openblas.pc << EOF
libdir=${{ github.workspace }}
includedir= ${{ github.workspace }}
openblas_config= OpenBLAS 0.3.27 DYNAMIC_ARCH NO_AFFINITY Haswell MAX_THREADS=64
version=0.0.99
extralib=-lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
Name: openblas
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: ${version}
URL: https://github.com/xianyi/OpenBLAS
Libs: ${{ github.workspace }}/libopenblas.so -Wl,-rpath,${{ github.workspace }}
Libs.private: -lm -lpthread -lgfortran -lquadmath -L${{ github.workspace }} -lopenblas
Cflags: -I${{ github.workspace}}
EOF
cat openblas.pc

- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: Build OpenBLAS
run: |
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

- name: Show ccache status
continue-on-error: true
run: ccache -s

- name: Install benchmark dependencies
run: pip install meson ninja numpy pytest pytest-codspeed --user

- name: Build the wrapper
run: |
cd benchmark/pybench
export PKG_CONFIG_PATH=$PWD
meson setup build --prefix=$PWD/build-install
meson install -C build
#
# sanity check
cd build/openblas_wrap
python -c'import _flapack; print(dir(_flapack))'

- name: Run benchmarks under pytest-benchmark
run: |
cd benchmark/pybench
pip install pytest-benchmark
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py -k 'gesdd'

- name: Run benchmarks
uses: CodSpeedHQ/action@v3
with:
token: ${{ secrets.CODSPEED_TOKEN }}
run: |
cd benchmark/pybench
export PYTHONPATH=$PWD/build-install/lib/python${{matrix.pyver}}/site-packages/
OPENBLAS_NUM_THREADS=1 pytest benchmarks/bench_blas.py --codspeed


+ 0
- 40
.github/workflows/docs.yml View File

@@ -1,40 +0,0 @@
name: Publish docs via GitHub Pages

on:
push:
branches:
- develop
pull_request:
branches:
- develop

jobs:
build:
name: Deploy docs
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0

- uses: actions/setup-python@v5
with:
python-version: "3.10"

- name: Install MkDocs and doc theme packages
run: pip install mkdocs mkdocs-material mkdocs-git-revision-date-localized-plugin mkdocs-mermaid2-plugin

- name: Build docs site
run: mkdocs build

# mkdocs gh-deploy command only builds to the top-level, hence deploying
# with this action instead.
# Deploys to http://www.openmathlib.org/OpenBLAS/docs/
- name: Deploy docs
uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e # v4.0.0
if: ${{ github.ref == 'refs/heads/develop' }}
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
publish_dir: ./site
destination_dir: docs/

+ 0
- 386
.github/workflows/dynamic_arch.yml View File

@@ -1,386 +0,0 @@
name: continuous build

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ${{ matrix.os }}

strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
fortran: [gfortran, flang]
build: [cmake, make]
exclude:
- os: macos-latest
fortran: flang

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Print system information
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
cat /proc/cpuinfo
elif [ "$RUNNER_OS" == "macOS" ]; then
sysctl -a | grep machdep.cpu
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Install Dependencies
run: |
if [ "$RUNNER_OS" == "Linux" ]; then
sudo apt-get update
sudo apt-get install -y gfortran cmake ccache
wget http://security.ubuntu.com/ubuntu/pool/universe/n/ncurses/libtinfo5_6.3-2ubuntu0.1_amd64.deb
sudo apt install ./libtinfo5_6.3-2ubuntu0.1_amd64.deb
elif [ "$RUNNER_OS" == "macOS" ]; then
# It looks like "gfortran" isn't working correctly unless "gcc" is re-installed.
brew reinstall gcc
brew install coreutils ccache
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
# GNU make and cmake call the compilers differently. It looks like
# that causes the cache to mismatch. Keep the ccache for both build
# tools separate to avoid polluting each other.
key: ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}-${{ github.sha }}
# Restore a matching ccache cache entry. Prefer same branch and same Fortran compiler.
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.build }}-${{ matrix.fortran }}
ccache-${{ runner.os }}-${{ matrix.build }}

- name: Configure ccache
run: |
if [ "${{ matrix.build }}" = "make" ]; then
# Add ccache to path
if [ "$RUNNER_OS" = "Linux" ]; then
echo "/usr/lib/ccache" >> $GITHUB_PATH
elif [ "$RUNNER_OS" = "macOS" ]; then
echo "$(brew --prefix)/opt/ccache/libexec" >> $GITHUB_PATH
else
echo "::error::$RUNNER_OS not supported"
exit 1
fi
fi
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s
- name: Add gfortran runtime to link path
if: matrix.build == 'make' && runner.os == 'macOS'
run: |
GFORTRAN_LIBDIR=$(gfortran -print-file-name=libgfortran.dylib | xargs dirname)
echo "Using gfortran runtime in $GFORTRAN_LIBDIR"
# Preserve whatever LDFLAGS may already contain
echo "LDFLAGS=${LDFLAGS:+$LDFLAGS }-L$GFORTRAN_LIBDIR" >> "$GITHUB_ENV"

- name: Build OpenBLAS
run: |
if [ "${{ matrix.fortran }}" = "flang" ]; then
# download and install classic flang
cd /usr/
sudo wget -nv https://github.com/flang-compiler/flang/releases/download/flang_20190329/flang-20190329-x86-70.tgz
sudo tar xf flang-20190329-x86-70.tgz
sudo rm flang-20190329-x86-70.tgz
cd -
fi
case "${{ matrix.build }}" in
"make")
make -j$(nproc) DYNAMIC_ARCH=1 USE_OPENMP=0 FC="ccache ${{ matrix.fortran }}"
;;
"cmake")
mkdir build && cd build
cmake -DDYNAMIC_ARCH=1 \
-DNOFORTRAN=0 \
-DBUILD_WITHOUT_LAPACK=0 \
-DCMAKE_VERBOSE_MAKEFILE=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_Fortran_COMPILER=${{ matrix.fortran }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..
cmake --build .
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac

- name: Show ccache status
continue-on-error: true
run: ccache -s

- name: Run tests
timeout-minutes: 60
run: |
case "${{ matrix.build }}" in
"make")
MAKE_FLAGS='DYNAMIC_ARCH=1 USE_OPENMP=0'
echo "::group::Tests in 'test' directory"
make -C test $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'ctest' directory"
make -C ctest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
echo "::group::Tests in 'utest' directory"
make -C utest $MAKE_FLAGS FC="ccache ${{ matrix.fortran }}"
echo "::endgroup::"
;;
"cmake")
cd build && ctest
;;
*)
echo "::error::Configuration not supported"
exit 1
;;
esac


msys2:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: windows-latest

strategy:
fail-fast: false
matrix:
msystem: [UCRT64, MINGW32, CLANG64]
idx: [int32, int64]
build-type: [Release]
include:
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: MINGW32
idx: int32
target-prefix: mingw-w64-i686
fc-pkg: fc
- msystem: CLANG64
idx: int32
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
- msystem: UCRT64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
- msystem: CLANG64
idx: int64
idx64-flags: -DBINARY=64 -DINTERFACE64=1
target-prefix: mingw-w64-clang-x86_64
fc-pkg: fc
- msystem: UCRT64
idx: int32
target-prefix: mingw-w64-ucrt-x86_64
fc-pkg: fc
build-type: None
exclude:
- msystem: MINGW32
idx: int64

defaults:
run:
# Use MSYS2 bash as default shell
shell: msys2 {0}

env:
CHERE_INVOKING: 1

steps:
- name: Get CPU name
shell: pwsh
run : |
Get-CIMInstance -Class Win32_Processor | Select-Object -Property Name

- name: Install build dependencies
uses: msys2/setup-msys2@v2
with:
msystem: ${{ matrix.msystem }}
update: true
release: false # Use pre-installed version
install: >-
base-devel
${{ matrix.target-prefix }}-cc
${{ matrix.target-prefix }}-${{ matrix.fc-pkg }}
${{ matrix.target-prefix }}-cmake
${{ matrix.target-prefix }}-ninja
${{ matrix.target-prefix }}-ccache

- name: Checkout repository
uses: actions/checkout@v3

- name: Prepare ccache
# Get cache location of ccache
# Create key that is used in action/cache/restore and action/cache/save steps
id: ccache-prepare
run: |
echo "ccachedir=$(cygpath -m $(ccache -k cache_dir))" >> $GITHUB_OUTPUT
# We include the commit sha in the cache key, as new cache entries are
# only created if there is no existing entry for the key yet.
echo "key=ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}-${{ github.sha }}" >> $GITHUB_OUTPUT

- name: Restore ccache
uses: actions/cache/restore@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}
# Restore a matching ccache cache entry. Prefer same branch.
restore-keys: |
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}-${{ github.ref }}
ccache-msys2-${{ matrix.msystem }}-${{ matrix.idx }}-${{ matrix.build-type }}

- name: Configure ccache
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota.
run: |
which ccache
test -d ${{ steps.ccache-prepare.outputs.ccachedir }} || mkdir -p ${{ steps.ccache-prepare.outputs.ccachedir }}
echo "max_size = 250M" > ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
echo "compression = true" >> ${{ steps.ccache-prepare.outputs.ccachedir }}/ccache.conf
ccache -p
ccache -s
echo $HOME
cygpath -w $HOME

- name: Configure OpenBLAS
run: |
mkdir build && cd build
cmake -DCMAKE_BUILD_TYPE=${{ matrix.build-type }} \
-DBUILD_SHARED_LIBS=ON \
-DBUILD_STATIC_LIBS=ON \
-DDYNAMIC_ARCH=ON \
-DUSE_THREAD=ON \
-DNUM_THREADS=64 \
-DTARGET=CORE2 \
${{ matrix.idx64-flags }} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_Fortran_COMPILER_LAUNCHER=ccache \
..

- name: Build OpenBLAS
run: cd build && cmake --build .

- name: Show ccache status
continue-on-error: true
run: ccache -s

- name: Save ccache
# Save the cache after we are done (successfully) building
uses: actions/cache/save@v3
with:
path: ${{ steps.ccache-prepare.outputs.ccachedir }}
key: ${{ steps.ccache-prepare.outputs.key }}

- name: Run tests
id: run-ctest
timeout-minutes: 60
run: cd build && ctest

- name: Re-run tests
if: always() && (steps.run-ctest.outcome == 'failure')
timeout-minutes: 60
run: |
cd build
echo "::group::Re-run ctest"
ctest --rerun-failed --output-on-failure || true
echo "::endgroup::"
echo "::group::Log from these tests"
[ ! -f Testing/Temporary/LastTest.log ] || cat Testing/Temporary/LastTest.log
echo "::endgroup::"


cross_build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-22.04

strategy:
fail-fast: false
matrix:
include:
- target: mips64el
triple: mips64el-linux-gnuabi64
opts: DYNAMIC_ARCH=1 TARGET=GENERIC
- target: riscv64
triple: riscv64-linux-gnu
opts: TARGET=RISCV64_GENERIC
- target: mipsel
triple: mipsel-linux-gnu
opts: TARGET=MIPS1004K
- target: alpha
triple: alpha-linux-gnu
opts: TARGET=EV4

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install -y ccache gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-${{ matrix.target }}-cross

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
# Limit the maximum size and switch on compression to avoid exceeding the total disk or cache quota (5 GB).
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s


- name: Build OpenBLAS
run: |
make -j$(nproc) HOSTCC="ccache gcc" CC="ccache ${{ matrix.triple }}-gcc" FC="ccache ${{ matrix.triple }}-gfortran" ARCH=${{ matrix.target }} ${{ matrix.opts }}

neoverse_build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-24.04-arm

steps:
- name: Checkout repository
uses: actions/checkout@v3
- name: Install Dependencies
run: |
sudo apt-get update
sudo apt-get install -y gcc gfortran make
- name: Build OpenBLAS
run: |
make -j${nproc}
make -j${nproc} lapack-test

+ 0
- 37
.github/workflows/harmonyos.yml View File

@@ -1,37 +0,0 @@
name: harmonyos

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
OHOS_NDK_CMAKE: $GITHUB_WORKSPACE/ohos-sdk/linux/native/build-tools/cmake/bin/cmake
COMMON_CMAKE_OPTIONS: |
-DCMAKE_TOOLCHAIN_FILE=$GITHUB_WORKSPACE/ohos-sdk/linux/native/build/cmake/ohos.toolchain.cmake \
-DCMAKE_INSTALL_PREFIX=install \
-DCMAKE_BUILD_TYPE=Release \
steps:
- uses: actions/checkout@v4
- name: ndk-install
run: |
wget https://repo.huaweicloud.com/harmonyos/os/4.1.1-Release/ohos-sdk-windows_linux-public.tar.gz
tar -xf ohos-sdk-windows_linux-public.tar.gz
cd ohos-sdk/linux
unzip -q native-linux-x64-4.1.7.8-Release.zip
cd -
- name: build-armv8
run: |
mkdir build && cd build
${{ env.OHOS_NDK_CMAKE }} ${{ env.COMMON_CMAKE_OPTIONS }} -DOHOS_ARCH="arm64-v8a" \
-DTARGET=ARMV8 -DNOFORTRAN=1 ..
${{ env.OHOS_NDK_CMAKE }} --build . -j $(nproc)

+ 0
- 119
.github/workflows/loongarch64.yml View File

@@ -1,119 +0,0 @@
name: loongarch64 qemu test

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-24.04
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
triple: loongarch64-linux-gnu
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install APT deps
run: |
sudo apt-get update && \
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache qemu-user-static \
gcc-14-loongarch64-linux-gnu g++-14-loongarch64-linux-gnu gfortran-14-loongarch64-linux-gnu

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the current version of qemu causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."

- name: Build OpenBLAS
run: |
make CC='ccache ${{ matrix.triple }}-gcc-14 -static' FC='ccache ${{ matrix.triple }}-gfortran-14 -static' \
RANLIB='ccache ${{ matrix.triple }}-gcc-ranlib-14' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)

- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat

+ 0
- 141
.github/workflows/loongarch64_clang.yml View File

@@ -1,141 +0,0 @@
name: loongarch64 clang qemu test

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: LOONGSONGENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSONGENERIC
- target: LOONGSON3R5
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON3R5
- target: LOONGSON2K1000
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LOONGSON2K1000
- target: LA64_GENERIC
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA64_GENERIC
- target: LA464
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA464
- target: LA264
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=LA264
- target: DYNAMIC_ARCH
opts: NO_SHARED=1 DYNAMIC_ARCH=1 TARGET=GENERIC

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install libffi6
run: |
wget https://download.nvidia.com/cumulus/apt.cumulusnetworks.com/pool/upstream/libf/libffi/libffi6_3.2.1-9_amd64.deb
sudo dpkg -i libffi6_3.2.1-9_amd64.deb

- name: Install APT deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache libglib2.0-dev

- name: Download and install loongarch64-toolchain
run: |
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt

- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master

- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install

- name: Set env
run: |
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: Disable utest dsdot:dsdot_n_1
run: |
echo -n > utest/test_dsdot.c
echo "Due to the qemu versions 7.2 causing utest cases to fail,"
echo "the utest dsdot:dsdot_n_1 have been temporarily disabled."

- name: Build OpenBLAS
run: make CC='ccache clang --target=loongarch64-linux-gnu --sysroot=/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/loongarch64-linux-gnu/sysroot/ -static' FC='ccache loongarch64-linux-gnu-gfortran -static' HOSTCC='ccache clang' CROSS_SUFFIX=llvm- NO_SHARED=1 ${{ matrix.opts }} -j$(nproc)

- name: Test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat


+ 0
- 123
.github/workflows/mips64.yml View File

@@ -1,123 +0,0 @@
name: mips64 qemu test

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
include:
- target: MIPS64_GENERIC
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=MIPS64_GENERIC
- target: SICORTEX
triple: mips64el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=SICORTEX
- target: I6400
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6400
- target: P6600
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=P6600
- target: I6500
triple: mipsisa64r6el-linux-gnuabi64
opts: NO_SHARED=1 TARGET=I6500

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
gcc-${{ matrix.triple }} gfortran-${{ matrix.triple }} libgomp1-mips64el-cross libglib2.0-dev

- name: checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: ae35f033b874c627d81d51070187fbf55f0bf1a7

- name: build qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=mips64el-linux-user --disable-system
make -j$(nproc)
make install

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: build OpenBLAS
run: make CC='ccache ${{ matrix.triple }}-gcc -static' FC='ccache ${{ matrix.triple }}-gfortran -static' ${{ matrix.opts }} HOSTCC='ccache gcc' -j$(nproc)

- name: test
run: |
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-mips64el ./utest/openblas_utest
qemu-mips64el ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-mips64el ./test/zblat3 < ./test/zblat3.dat

+ 0
- 90
.github/workflows/nightly-Homebrew-build.yml View File

@@ -1,90 +0,0 @@
# Only the "head" branch of the OpenBLAS package is tested

on:
push:
paths:
- '**/nightly-Homebrew-build.yml'
pull_request:
branches:
- develop
paths:
- '**/nightly-Homebrew-build.yml'
schedule:
- cron: 45 7 * * *
# This is 7:45 AM UTC daily, late at night in the USA

# Since push and pull_request will still always be building and testing the `develop` branch,
# it only makes sense to test if this file has been changed

name: Nightly-Homebrew-Build

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build-OpenBLAS-with-Homebrew:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: macos-latest
env:
DEVELOPER_DIR: /Applications/Xcode_11.4.1.app/Contents/Developer
HOMEBREW_DEVELOPER: "ON"
HOMEBREW_DISPLAY_INSTALL_TIMES: "ON"
HOMEBREW_NO_ANALYTICS: "ON"
HOMEBREW_NO_AUTO_UPDATE: "ON"
HOMEBREW_NO_BOTTLE_SOURCE_FALLBACK: "ON"
HOMEBREW_NO_INSTALL_CLEANUP: "ON"
HOMEBREW_NO_INSTALLED_DEPENDENTS_CHECK: "ON"
HOMEBREW_NO_INSTALL_FROM_API: "ON"

steps:
- name: Random delay for cron job
run: |
delay=$(( RANDOM % 600 ))
printf 'Delaying for %s seconds on event %s' ${delay} "${{ github.event_name }}"
sleep ${delay}
if: github.event_name == 'schedule'

- uses: actions/checkout@v2
# This isn't even needed, technically. Homebrew will get `develop` via git

- name: Update Homebrew
if: github.event_name != 'pull_request'
run: brew update || true
- name: Install prerequisites
run: brew install --fetch-HEAD --HEAD --only-dependencies --keep-tmp openblas

- name: Install and bottle OpenBLAS
run: brew install --fetch-HEAD --HEAD --build-bottle --keep-tmp openblas
# the HEAD flags tell Homebrew to build the develop branch fetch via git

- name: Create bottle
run: |
brew bottle -v openblas
mkdir bottles
mv *.bottle.tar.gz bottles

- name: Upload bottle
uses: actions/upload-artifact@v4
with:
name: openblas--HEAD.catalina.bottle.tar.gz
path: bottles

- name: Show linkage
run: brew linkage -v openblas

- name: Test openblas
run: brew test --HEAD --verbose openblas

- name: Audit openblas formula
run: |
brew audit --strict openblas
brew cat openblas

- name: Post logs on failure
if: failure()
run: brew gist-logs --with-hostname -v openblas

+ 0
- 256
.github/workflows/riscv64_vector.yml View File

@@ -1,256 +0,0 @@
name: riscv64 zvl256b qemu test

on: [push, pull_request]

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
TEST:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: ubuntu-latest
env:
triple: riscv64-unknown-linux-gnu
riscv_gnu_toolchain: https://github.com/riscv-collab/riscv-gnu-toolchain
riscv_gnu_toolchain_version: 13.2.0
riscv_gnu_toolchain_nightly_download_path: /releases/download/2025.08.29/riscv64-glibc-ubuntu-22.04-llvm-nightly-2025.08.29-nightly.tar.xz
strategy:
fail-fast: false
matrix:
include:
- target: RISCV64_ZVL128B
opts: TARGET=RISCV64_ZVL128B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=128,elen=64
- target: RISCV64_ZVL256B
opts: TARGET=RISCV64_ZVL256B BINARY=64 ARCH=riscv64
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64
- target: DYNAMIC_ARCH=1
opts: TARGET=RISCV64_GENERIC BINARY=64 ARCH=riscv64 DYNAMIC_ARCH=1
qemu_cpu: rv64,g=true,c=true,v=true,vext_spec=v1.0,vlen=256,elen=64

steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: install build deps
run: |
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make \
libgomp1-riscv64-cross ccache
wget ${riscv_gnu_toolchain}/${riscv_gnu_toolchain_nightly_download_path}
tar -xvf $(basename ${riscv_gnu_toolchain_nightly_download_path}) -C /opt

- name: Compilation cache
uses: actions/cache@v3
with:
path: ~/.ccache
key: ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}-${{ github.sha }}
restore-keys: |
ccache-${{ runner.os }}-${{ matrix.target }}-${{ github.ref }}
ccache-${{ runner.os }}-${{ matrix.target }}

- name: Configure ccache
run: |
test -d ~/.ccache || mkdir -p ~/.ccache
echo "max_size = 300M" > ~/.ccache/ccache.conf
echo "compression = true" >> ~/.ccache/ccache.conf
ccache -s

- name: build OpenBLAS libs
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='ccache clang --rtlib=compiler-rt -target ${triple} --sysroot /opt/riscv/sysroot --gcc-toolchain=/opt/riscv/lib/gcc/riscv64-unknown-linux-gnu/${riscv_gnu_toolchain_version}/' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc)

- name: build OpenBLAS tests
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) tests

- name: build lapack-netlib tests
working-directory: ./lapack-netlib/TESTING
run: |
export PATH="/opt/riscv/bin:$PATH"
make TARGET=${{ matrix.target }} CFLAGS="-DTARGET=${{ matrix.target }}" \
CC='${triple}-gcc' \
AR='ccache ${triple}-ar' AS='ccache ${triple}-gcc' LD='ccache ${triple}-gcc' \
RANLIB='ccache ${triple}-ranlib' \
FC='ccache ${triple}-gfortran' ${{ matrix.opts }} \
HOSTCC=gcc HOSTFC=gfortran -j$(nproc) \
LIN/xlintsts LIN/xlintstc LIN/xlintstd LIN/xlintstz LIN/xlintstrfs \
LIN/xlintstrfc LIN/xlintstrfd LIN/xlintstrfz LIN/xlintstds \
LIN/xlintstzc EIG/xeigtsts EIG/xeigtstc EIG/xeigtstd EIG/xeigtstz \

- name: OpenBLAS tests
shell: bash
run: |
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local DIR=$1; local CMD=$2; local DATA=$3; local OUTPUT="./test_out/$DIR.$CMD"; \
echo "`pwd`/$DIR/$CMD $DIR/$DATA" >> $OUTPUT; \
if [[ -z $DATA ]]; then qemu-riscv64 ./$DIR/$CMD |& tee $OUTPUT ; \
else qemu-riscv64 ./$DIR/$CMD < ./$DIR/$DATA |& tee $OUTPUT ; fi ; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi \
}
run_test test cblat1 &
run_test test cblat2 cblat2.dat &
run_test test cblat3 cblat3.dat &
run_test test dblat1 &
run_test test dblat2 dblat2.dat &
run_test test dblat3 dblat3.dat &
run_test test sblat1 &
run_test test sblat2 sblat2.dat &
run_test test sblat3 sblat3.dat &
run_test test zblat1 &
run_test test zblat2 zblat2.dat &
run_test test zblat3 zblat3.dat &
run_test ctest xccblat1 &
run_test ctest xccblat2 cin2 &
run_test ctest xccblat3 cin3 &
run_test ctest xdcblat1 &
run_test ctest xdcblat2 din2 &
run_test ctest xdcblat3 din3 &
run_test ctest xscblat1 &
run_test ctest xscblat2 sin2 &
run_test ctest xscblat3 sin3 &
run_test ctest xzcblat1 &
run_test ctest xzcblat2 zin2 &
run_test ctest xzcblat3 zin3 &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi

- name: netlib tests
shell: bash
run: |
: # these take a very long time
echo "Skipping netlib tests in CI"
exit 0
: # comment out exit above to enable the tests
: # probably we want to identify a subset to run in CI
export PATH="/opt/riscv/bin:$PATH"
export QEMU_CPU=${{ matrix.qemu_cpu }}
rm -rf ./test_out
mkdir -p ./test_out
run_test() { local OUTPUT="./test_out/$1"; local DATA="./lapack-netlib/TESTING/$2"; local CMD="./lapack-netlib/TESTING/$3"; \
echo "$4" >> $OUTPUT; \
echo "$CMD" >> $OUTPUT; \
qemu-riscv64 $CMD < $DATA |& tee $OUTPUT; \
RV=$? ; if [[ $RV != 0 ]]; then echo "*** FAIL: nonzero exit code $RV" >> $OUTPUT ; fi; \
if grep -q fail $OUTPUT ; then echo "*** FAIL: log contains 'fail'" >> $OUTPUT ; fi ; \
if grep -q rror $OUTPUT | grep -v -q "passed" | grep -v "largest error" ; then echo "*** FAIL: log contains 'error'" >> $OUTPUT ; fi \
}
run_test stest.out stest.in LIN/xlintsts "Testing REAL LAPACK linear equation routines" &
run_test ctest.out ctest.in LIN/xlintstc "Testing COMPLEX LAPACK linear equation routines" &
run_test dtest.out dtest.in LIN/xlintstd "Testing DOUBLE PRECISION LAPACK linear equation routines" &
run_test ztest.out ztest.in LIN/xlintstz "Testing COMPLEX16 LAPACK linear equation routines" &
run_test dstest.out dstest.in LIN/xlintstds "Testing SINGLE-DOUBLE PRECISION LAPACK prototype linear equation routines" &
run_test zctest.out zctest.in LIN/xlintstzc "Testing COMPLEX-COMPLEX16 LAPACK prototype linear equation routines" &
run_test stest_rfp.out stest_rfp.in LIN/xlintstrfs "Testing REAL LAPACK RFP prototype linear equation routines" &
run_test dtest_rfp.out dtest_rfp.in LIN/xlintstrfd "Testing DOUBLE PRECISION LAPACK RFP prototype linear equation routines" &
run_test ctest_rfp.out ctest_rfp.in LIN/xlintstrfc "Testing COMPLEX LAPACK RFP prototype linear equation routines" &
run_test ztest_rfp.out ztest_rfp.in LIN/xlintstrfz "Testing COMPLEX16 LAPACK RFP prototype linear equation routines" &
run_test snep.out nep.in EIG/xeigtsts "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test ssep.out sep.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test sse2.out se2.in EIG/xeigtsts "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test ssvd.out svd.in EIG/xeigtsts "SVD - Testing Singular Value Decomposition routines" &
run_test sec.out sec.in EIG/xeigtsts "SEC - Testing REAL Eigen Condition Routines" &
run_test sed.out sed.in EIG/xeigtsts "SEV - Testing REAL Nonsymmetric Eigenvalue Driver" &
run_test sgg.out sgg.in EIG/xeigtsts "SGG - Testing REAL Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test sgd.out sgd.in EIG/xeigtsts "SGD - Testing REAL Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test ssb.out ssb.in EIG/xeigtsts "SSB - Testing REAL Symmetric Eigenvalue Problem routines" &
run_test ssg.out ssg.in EIG/xeigtsts "SSG - Testing REAL Symmetric Generalized Eigenvalue Problem routines" &
run_test sbal.out sbal.in EIG/xeigtsts "SGEBAL - Testing the balancing of a REAL general matrix" &
run_test sbak.out sbak.in EIG/xeigtsts "SGEBAK - Testing the back transformation of a REAL balanced matrix" &
run_test sgbal.out sgbal.in EIG/xeigtsts "SGGBAL - Testing the balancing of a pair of REAL general matrices" &
run_test sgbak.out sgbak.in EIG/xeigtsts "SGGBAK - Testing the back transformation of a pair of REAL balanced matrices" &
run_test sbb.out sbb.in EIG/xeigtsts "SBB - Testing banded Singular Value Decomposition routines" &
run_test sglm.out glm.in EIG/xeigtsts "GLM - Testing Generalized Linear Regression Model routines" &
run_test sgqr.out gqr.in EIG/xeigtsts "GQR - Testing Generalized QR and RQ factorization routines" &
run_test sgsv.out gsv.in EIG/xeigtsts "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test scsd.out csd.in EIG/xeigtsts "CSD - Testing CS Decomposition routines" &
run_test slse.out lse.in EIG/xeigtsts "LSE - Testing Constrained Linear Least Squares routines" &
run_test cnep.out nep.in EIG/xeigtstc "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test csep.out sep.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test cse2.out se2.in EIG/xeigtstc "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test csvd.out svd.in EIG/xeigtstc "SVD - Testing Singular Value Decomposition routines" &
run_test cec.out cec.in EIG/xeigtstc "CEC - Testing COMPLEX Eigen Condition Routines" &
run_test ced.out ced.in EIG/xeigtstc "CES - Testing COMPLEX Nonsymmetric Schur Form Driver" &
run_test cgg.out cgg.in EIG/xeigtstc "CGG - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test cgd.out cgd.in EIG/xeigtstc "CGD - Testing COMPLEX Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test csb.out csb.in EIG/xeigtstc "CHB - Testing Hermitian Eigenvalue Problem routines" &
run_test csg.out csg.in EIG/xeigtstc "CSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test cbal.out cbal.in EIG/xeigtstc "CGEBAL - Testing the balancing of a COMPLEX general matrix" &
run_test cbak.out cbak.in EIG/xeigtstc "CGEBAK - Testing the back transformation of a COMPLEX balanced matrix" &
run_test cgbal.out cgbal.in EIG/xeigtstc "CGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test cgbak.out cgbak.in EIG/xeigtstc "CGGBAK - Testing the back transformation of a pair of COMPLEX balanced matrices" &
run_test cbb.out cbb.in EIG/xeigtstc "CBB - Testing banded Singular Value Decomposition routines" &
run_test cglm.out glm.in EIG/xeigtstc "GLM - Testing Generalized Linear Regression Model routines" &
run_test cgqr.out gqr.in EIG/xeigtstc "GQR - Testing Generalized QR and RQ factorization routines" &
run_test cgsv.out gsv.in EIG/xeigtstc "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test ccsd.out csd.in EIG/xeigtstc "CSD - Testing CS Decomposition routines" &
run_test clse.out lse.in EIG/xeigtstc "LSE - Testing Constrained Linear Least Squares routines" &
run_test dnep.out nep.in EIG/xeigtstd "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test dsep.out sep.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dse2.out se2.in EIG/xeigtstd "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test dsvd.out svd.in EIG/xeigtstd "SVD - Testing Singular Value Decomposition routines" &
run_test dec.out dec.in EIG/xeigtstd "DEC - Testing DOUBLE PRECISION Eigen Condition Routines" &
run_test ded.out ded.in EIG/xeigtstd "DEV - Testing DOUBLE PRECISION Nonsymmetric Eigenvalue Driver" &
run_test dgg.out dgg.in EIG/xeigtstd "DGG - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test dgd.out dgd.in EIG/xeigtstd "DGD - Testing DOUBLE PRECISION Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test dsb.out dsb.in EIG/xeigtstd "DSB - Testing DOUBLE PRECISION Symmetric Eigenvalue Problem routines" &
run_test dsg.out dsg.in EIG/xeigtstd "DSG - Testing DOUBLE PRECISION Symmetric Generalized Eigenvalue Problem routines" &
run_test dbal.out dbal.in EIG/xeigtstd "DGEBAL - Testing the balancing of a DOUBLE PRECISION general matrix" &
run_test dbak.out dbak.in EIG/xeigtstd "DGEBAK - Testing the back transformation of a DOUBLE PRECISION balanced matrix" &
run_test dgbal.out dgbal.in EIG/xeigtstd "DGGBAL - Testing the balancing of a pair of DOUBLE PRECISION general matrices" &
run_test dgbak.out dgbak.in EIG/xeigtstd "DGGBAK - Testing the back transformation of a pair of DOUBLE PRECISION balanced matrices" &
run_test dbb.out dbb.in EIG/xeigtstd "DBB - Testing banded Singular Value Decomposition routines" &
run_test dglm.out glm.in EIG/xeigtstd "GLM - Testing Generalized Linear Regression Model routines" &
run_test dgqr.out gqr.in EIG/xeigtstd "GQR - Testing Generalized QR and RQ factorization routines" &
run_test dgsv.out gsv.in EIG/xeigtstd "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test dcsd.out csd.in EIG/xeigtstd "CSD - Testing CS Decomposition routines" &
run_test dlse.out lse.in EIG/xeigtstd "LSE - Testing Constrained Linear Least Squares routines" &
run_test znep.out nep.in EIG/xeigtstz "NEP - Testing Nonsymmetric Eigenvalue Problem routines" &
run_test zsep.out sep.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zse2.out se2.in EIG/xeigtstz "SEP - Testing Symmetric Eigenvalue Problem routines" &
run_test zsvd.out svd.in EIG/xeigtstz "SVD - Testing Singular Value Decomposition routines" &
run_test zec.out zec.in EIG/xeigtstz "ZEC - Testing COMPLEX16 Eigen Condition Routines" &
run_test zed.out zed.in EIG/xeigtstz "ZES - Testing COMPLEX16 Nonsymmetric Schur Form Driver" &
run_test zgg.out zgg.in EIG/xeigtstz "ZGG - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem routines" &
run_test zgd.out zgd.in EIG/xeigtstz "ZGD - Testing COMPLEX16 Nonsymmetric Generalized Eigenvalue Problem driver routines" &
run_test zsb.out zsb.in EIG/xeigtstz "ZHB - Testing Hermitian Eigenvalue Problem routines" &
run_test zsg.out zsg.in EIG/xeigtstz "ZSG - Testing Symmetric Generalized Eigenvalue Problem routines" &
run_test zbal.out zbal.in EIG/xeigtstz "ZGEBAL - Testing the balancing of a COMPLEX16 general matrix" &
run_test zbak.out zbak.in EIG/xeigtstz "ZGEBAK - Testing the back transformation of a COMPLEX16 balanced matrix" &
run_test zgbal.out zgbal.in EIG/xeigtstz "ZGGBAL - Testing the balancing of a pair of COMPLEX general matrices" &
run_test zgbak.out zgbak.in EIG/xeigtstz "ZGGBAK - Testing the back transformation of a pair of COMPLEX16 balanced matrices" &
run_test zbb.out zbb.in EIG/xeigtstz "ZBB - Testing banded Singular Value Decomposition routines" &
run_test zglm.out glm.in EIG/xeigtstz "GLM - Testing Generalized Linear Regression Model routines" &
run_test zgqr.out gqr.in EIG/xeigtstz "GQR - Testing Generalized QR and RQ factorization routines" &
run_test zgsv.out gsv.in EIG/xeigtstz "GSV - Testing Generalized Singular Value Decomposition routines" &
run_test zcsd.out csd.in EIG/xeigtstz "CSD - Testing CS Decomposition routines" &
run_test zlse.out lse.in EIG/xeigtstz "LSE - Testing Constrained Linear Least Squares routines" &
wait
while IFS= read -r -d $'\0' LOG; do cat $LOG ; FAILURES=1 ; done < <(grep -lZ FAIL ./test_out/*)
python ./lapack-netlib/lapack_testing.py -d ./test_out -e > netlib_summary
TOTALS="$(grep 'ALL PRECISIONS' netlib_summary)"
NUMERICAL_ERRORS=-1
OTHER_ERRORS=-1
. <(awk '/ALL PRECISIONS/{printf "NUMERICAL_ERRORS=%s\nOTHER_ERRORS=%s\n", $5, $7}' netlib_summary
if (( NUMERICAL_ERRORS != 0 )) || (( OTHER_ERRORS != 0 )) ; then cat netlib_summary ; FAILURES=1 ; fi
if [[ ! -z $FAILURES ]]; then echo "==========" ; echo "== FAIL ==" ; echo "==========" ; echo ; exit 1 ; fi

+ 0
- 84
.github/workflows/windows_arm64.yml View File

@@ -1,84 +0,0 @@
name: Windows ARM64 CI

on:
push:
branches:
- develop
pull_request:
branches:
- develop

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

permissions:
contents: read # to fetch code (actions/checkout)

jobs:
build:
if: "github.repository == 'OpenMathLib/OpenBLAS'"
runs-on: windows-11-arm
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Install LLVM for Win-ARM64
shell: pwsh
run: |
Invoke-WebRequest https://github.com/llvm/llvm-project/releases/download/llvmorg-20.1.8/LLVM-20.1.8-woa64.exe -UseBasicParsing -OutFile LLVM-woa64.exe
Start-Process -FilePath ".\LLVM-woa64.exe" -ArgumentList "/S" -Wait
echo "C:\Program Files\LLVM\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
#dir "C:\Program Files\LLVM\include\flang"
#rmdir /Q /S "C:/Program Files/Microsoft Visual Studio/2022/Enterprise/VC/Tools/Llvm/ARM64"
- name: Install CMake and Ninja for Win-ARM64
shell: pwsh
run: |
Invoke-WebRequest https://github.com/Kitware/CMake/releases/download/v3.29.4/cmake-3.29.4-windows-arm64.msi -OutFile cmake-arm64.msi
Start-Process msiexec.exe -ArgumentList "/i cmake-arm64.msi /quiet /norestart" -Wait
echo "C:\Program Files\CMake\bin" >> $env:GITHUB_PATH
Invoke-WebRequest https://github.com/ninja-build/ninja/releases/download/v1.13.1/ninja-winarm64.zip -OutFile ninja-winarm64.zip
Expand-Archive ninja-winarm64.zip -DestinationPath ninja
Copy-Item ninja\ninja.exe -Destination "C:\Windows\System32"

- name: Configure OpenBLAS
shell: cmd
run: |
CALL "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsarm64.bat"
set PATH=C:\Program Files\LLVM\bin;%PATH%
mkdir build
cd build
cmake .. -G Ninja ^
-DCMAKE_BUILD_TYPE=Release ^
-DTARGET=ARMV8 ^
-DBINARY=64 ^
-DCMAKE_C_COMPILER=clang-cl ^
-DCMAKE_Fortran_COMPILER=flang-new ^
-DBUILD_SHARED_LIBS=ON ^
-DCMAKE_SYSTEM_PROCESSOR=arm64 ^
-DCMAKE_SYSTEM_NAME=Windows ^
-DCMAKE_INSTALL_PREFIX=C:/opt

- name: Build OpenBLAS
shell: cmd
run: |
cd build
ninja -j16

- name: Install OpenBLAS
shell: cmd
run: |
cd build
cmake --install .

- name: Run ctests
shell: pwsh
run: |
$env:PATH = "C:\opt\bin;$env:PATH"
cd build
ctest



+ 0
- 118
.gitignore View File

@@ -1,118 +0,0 @@
*.obj
*.lib
*.dll
*.dylib
*.def
*.o
*.out
*.tmp
lapack-3.1.1
lapack-3.1.1.tgz
lapack-3.4.1
lapack-3.4.1.tgz
lapack-3.4.2
lapack-3.4.2.tgz
lapack-netlib/make.inc
lapack-netlib/SRC/la_constants.mod
lapack-netlib/SRC/la_xisnan.mod
lapack-netlib/TESTING/testing_results.txt
lapack-netlib/INSTALL/test*
lapack-netlib/TESTING/xeigtstc
lapack-netlib/TESTING/xeigtstd
lapack-netlib/TESTING/xeigtsts
lapack-netlib/TESTING/xeigtstz
lapack-netlib/TESTING/xlintstc
lapack-netlib/TESTING/xlintstd
lapack-netlib/TESTING/xlintstds
lapack-netlib/TESTING/xlintstrfc
lapack-netlib/TESTING/xlintstrfd
lapack-netlib/TESTING/xlintstrfs
lapack-netlib/TESTING/xlintstrfz
lapack-netlib/TESTING/xlintsts
lapack-netlib/TESTING/xlintstz
lapack-netlib/TESTING/xlintstzc
*.so
*.so.*
*.a
.svn
*~
lib.grd
nohup.out
config.h
config_kernel.h
Makefile.conf
Makefile.conf_last
Makefile_kernel.conf
config_last.h
getarch
getarch_2nd
utest/openblas_utest
utest/openblas_utest_ext
ctest/xccblat1
ctest/xccblat2
ctest/xccblat3
ctest/xccblat3_3m
ctest/xdcblat1
ctest/xdcblat2
ctest/xdcblat3
ctest/xdcblat3_3m
ctest/xscblat1
ctest/xscblat2
ctest/xscblat3
ctest/xscblat3_3m
ctest/xzcblat1
ctest/xzcblat2
ctest/xzcblat3
ctest/xzcblat3_3m
exports/linktest.c
exports/linux.def
kernel/setparam_*.c
kernel/kernel_*.h
test/CBLAT2.SUMM
test/CBLAT3.SUMM
test/CBLAT3_3M.SUMM
test/DBLAT2.SUMM
test/DBLAT3.SUMM
test/DBLAT3_3M.SUMM
test/SBLAT2.SUMM
test/SBLAT3.SUMM
test/SBLAT3_3M.SUMM
test/ZBLAT2.SUMM
test/ZBLAT3.SUMM
test/ZBLAT3_3M.SUMM
test/SHBLAT3.SUMM
test/SBBLAT2.SUMM
test/SBBLAT3.SUMM
test/BBLAT2.SUMM
test/BBLAT3.SUMM
test/cblat1
test/cblat2
test/cblat3
test/cblat3_3m
test/dblat1
test/dblat2
test/dblat3
test/dblat3_3m
test/sblat1
test/sblat2
test/sblat3
test/sblat3_3m
test/test_shgemm
test/test_sbgemm
test/test_sbgemv
test/test_bgemm
test/test_bgemv
test/zblat1
test/zblat2
test/zblat3
test/zblat3_3m
build
build.*
*.swp
benchmark/*.goto
benchmark/smallscaling
.vscode
CMakeCache.txt
CMakeFiles/*
.vscode
**/__pycache__

+ 0
- 0
.nojekyll View File


+ 0
- 320
.travis.yml View File

@@ -1,320 +0,0 @@
# XXX: Precise is already deprecated, new default is Trusty.
# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming
dist: focal
sudo: true
language: c

matrix:
include:
- &test-ubuntu
# os: linux
compiler: gcc
addons:
apt:
packages:
- gfortran
# before_script: &common-before
# - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32"
# script:
# - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# - make -C test $COMMON_FLAGS $BTYPE
# - make -C ctest $COMMON_FLAGS $BTYPE
# - make -C utest $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64"
#
# - <<: *test-ubuntu
os: linux
arch: ppc64le
before_script: &common-before
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=POWER8 NUM_THREADS=32"
script:
- travis_wait 50 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"

- <<: *test-ubuntu
os: linux
arch: s390x
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
- sudo apt-get install --only-upgrade binutils
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=1"

- <<: *test-ubuntu
os: linux
dist: focal
arch: s390x
compiler: clang
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=Z13 NUM_THREADS=32"
- sudo apt-get install --only-upgrade binutils
env:
# for matrix annotation only
- TARGET_BOX=IBMZ_LINUX
- BTYPE="BINARY=64 USE_OPENMP=0 CC=clang"

# - <<: *test-ubuntu
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 USE_OPENMP=1"
#
# - <<: *test-ubuntu
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 INTERFACE64=1"
#
# - <<: *test-ubuntu
# compiler: clang
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 CC=clang"
#
# - <<: *test-ubuntu
# compiler: clang
# env:
# - TARGET_BOX=LINUX64
# - BTYPE="BINARY=64 INTERFACE64=1 CC=clang"
#
# - <<: *test-ubuntu
# addons:
# apt:
# packages:
# - gcc-multilib
# - gfortran-multilib
# env:
# - TARGET_BOX=LINUX32
# - BTYPE="BINARY=32"
#
- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- travis_wait 50 make QUIET_MAKE=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9

- os: linux
arch: ppc64le
dist: bionic
compiler: gcc
before_script:
- sudo add-apt-repository 'ppa:ubuntu-toolchain-r/test' -y
- sudo apt-get update
- sudo apt-get install gcc-9 gfortran-9 -y
script:
- travis_wait 50 make QUIET_MAKE=1 BUILD_BFLOAT16=1 BINARY=64 USE_OPENMP=1 CC=gcc-9 FC=gfortran-9
- make -C test $COMMON_FLAGS $BTYPE
- make -C ctest $COMMON_FLAGS $BTYPE
- make -C utest $COMMON_FLAGS $BTYPE
env:
# for matrix annotation only
- TARGET_BOX=PPC64LE_LINUX_P9

# - os: linux
# compiler: gcc
# addons:
# apt:
# packages:
# - binutils-mingw-w64-x86-64
# - gcc-mingw-w64-x86-64
# - gfortran-mingw-w64-x86-64
# before_script: *common-before
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=WIN64
# - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran"
#
# Build & test on Alpine Linux inside chroot, i.e. on system with musl libc.
# These jobs needs sudo, so Travis runs them on VM-based infrastructure
# which is slower than container-based infrastructure used for jobs
# that don't require sudo.
# - &test-alpine
# os: linux
# dist: trusty
# sudo: true
# language: minimal
# before_install:
# - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
# && echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
# - alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
# install:
# - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'
# before_script: *common-before
# script:
# # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size.
# - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types"
# - alpine make -C test $COMMON_FLAGS $BTYPE
# - alpine make -C ctest $COMMON_FLAGS $BTYPE
# - alpine make -C utest $COMMON_FLAGS $BTYPE
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64"

# XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS,
# but only on Travis CI, cannot reproduce it elsewhere.
#- &test-alpine-openmp
# <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 USE_OPENMP=1"

# - <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 INTERFACE64=1"
#
# # Build with the same flags as Alpine do in OpenBLAS package.
# - <<: *test-alpine
# env:
# - TARGET_BOX=LINUX64_MUSL
# - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=CORE2"

# - &test-cmake
# os: linux
# compiler: clang
# addons:
# apt:
# packages:
# - gfortran
# - cmake
# dist: trusty
# sudo: true
# before_script:
# - COMMON_ARGS="-DTARGET=NEHALEM -DNUM_THREADS=32"
# script:
# - mkdir build
# - CONFIG=Release
# - cmake -Bbuild -H. $CMAKE_ARGS $COMMON_ARGS -DCMAKE_BUILD_TYPE=$CONFIG
# - cmake --build build --config $CONFIG -- -j2
# env:
# - CMAKE=1
# - <<: *test-cmake
# env:
# - CMAKE=1 CMAKE_ARGS="-DNOFORTRAN=1"
# - <<: *test-cmake
# compiler: gcc
# env:
# - CMAKE=1

# - &test-macos
# os: osx
# osx_image: xcode11.5
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"
#
# - <<: *test-macos
# osx_image: xcode12
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=HASWELL USE_OPENMP=1 BINARY=64 INTERFACE64=1 CC=gcc-10 FC=gfortran-10"
#
# - <<: *test-macos
# osx_image: xcode12
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# script:
# - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
# env:
# - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"

# - <<: *test-macos
# osx_image: xcode10
# env:
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"

# - <<: *test-macos
# osx_image: xcode11.5
# before_script:
# - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
# - brew update
# env:
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
# - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"
# - <<: *test-macos
# osx_image: xcode11.5
# env:
## - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
## - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
# - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
# - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"

- &test-neoversen1
os: linux
arch: arm64
dist: focal
group: edge
virt: lxd
compiler: gcc
addons:
apt:
packages:
- gfortran
script:
- travis_wait 45 make && make lapack-test
env:
- TARGET_BOX=NEOVERSE_N1
- &test-neon1-gcc8
os: linux
arch: arm64
dist: focal
group: edge
virt: lxd
compiler: gcc
addons:
apt:
packages:
- gcc-8
- gfortran-8
script:
- travis_wait 45 make QUIET_MAKE=1 CC=gcc-8 FC=gfortran-8 DYNAMIC_ARCH=1
env:
- TARGET_BOX=NEOVERSE_N1-GCC8
# whitelist
branches:
only:
- master
- develop

notifications:
webhooks:
urls:
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344
on_success: change # options: [always|never|change] default: always
on_failure: always # options: [always|never|change] default: always
on_start: never # options: [always|never|change] default: always

+ 0
- 43
BACKERS.md View File

@@ -1,43 +0,0 @@
Thank you for the support.

### [2019.12/2021.9] [Chan-Zuckerberg Foundation EOSS Initiative](https://chanzuckerberg.com/eoss/)

Between December 2019 and September 2021, development and maintaining of OpenBLAS was funded in part by the Chan-Zuckerberg Foundation in the context of two grants awarded to the NumPy Foundation and managed by NumFocus (Cycles 1 and 3 of the Essential Open Source Software for Science (EOSS) Initiative of the Chan-Zuckerberg Foundation)

### [2013.8] [Testbed for OpenBLAS project](https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project)

https://www.bountysource.com/fundraisers/443-testbed-for-openblas-project/pledges

In chronological order:

* aeberspaecher
* fmolina
* saullocastro
* xianyi
* cuda
* carter
* StefanKarpinski
* staticfloat
* sebastien-villemot
* JeffBezanson
* ihnorton
* simonp0420
* andrioni
* Tim Holy
* ivarne
* johnmyleswhite
* traz
* Jean-Francis Roy
* bkalpert
* Anirban
* pgermain
* alexandre.lacoste.18
* foges
* ssam
* WestleyArgentum
* daniebmariani
* pjpuglia
* albarrentine
* Alexander Vogt



+ 0
- 729
CMakeLists.txt View File

@@ -1,729 +0,0 @@
##
## Author: Hank Anderson <hank@statease.com>
##

cmake_minimum_required(VERSION 3.16.0)

set (CMAKE_ASM_SOURCE_FILE_EXTENSIONS "S")
project(OpenBLAS C ASM)

set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 30.dev)

set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")

# Adhere to GNU filesystem layout conventions
include(GNUInstallDirs)

include(CMakePackageConfigHelpers)

#######
option(BUILD_WITHOUT_LAPACK "Do not build LAPACK and LAPACKE (Only BLAS or CBLAS)" OFF)

option(BUILD_WITHOUT_LAPACKE "Do not build the C interface to LAPACK)" OFF)

option(BUILD_LAPACK_DEPRECATED "When building LAPACK, include also some older, deprecated routines" ON)

set(LAPACK_STRLEN "" CACHE STRING "When building LAPACK, use this type (e.g. \"int\") for character lengths (defaults to size_t)")

option(BUILD_TESTING "Build LAPACK testsuite when building LAPACK" ON)

option(BUILD_BENCHMARKS "Build the collection of BLAS/LAPACK benchmarks" OFF)

option(C_LAPACK "Build LAPACK from C sources instead of the original Fortran" OFF)

option(BUILD_WITHOUT_CBLAS "Do not build the C interface (CBLAS) to the BLAS functions" OFF)

option(DYNAMIC_ARCH "Include support for multiple CPU targets, with automatic selection at runtime (x86/x86_64, aarch64, ppc or RISCV64-RVV1.0 only)" OFF)

option(DYNAMIC_OLDER "Include specific support for older x86 cpu models (Penryn,Dunnington,Atom,Nano,Opteron) with DYNAMIC_ARCH" OFF)

option(BUILD_RELAPACK "Build with ReLAPACK (recursive implementation of several LAPACK functions on top of standard LAPACK)" OFF)

option(USE_LOCKING "Use locks even in single-threaded builds to make them callable from multiple threads" OFF)

option(USE_PERL "Use the older PERL scripts for build preparation instead of universal shell scripts" OFF)

option(NO_WARMUP "Do not run a benchmark on each startup just to find the best location for the memory buffer" ON)

option(FIXED_LIBNAME "Use a non-versioned name for the library and no symbolic linking to variant names" OFF)

set(LIBNAMEPREFIX "" CACHE STRING "Add a prefix to the openblas part of the library name" )
set(LIBNAMESUFFIX "" CACHE STRING "Add a suffix after the openblas part of the library name" )

if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding processes from e.g. R or numpy/scipy to a single core" ON)
else()
set(NO_AFFINITY 1)
endif()

option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)

option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)
option(BUILD_STATIC_LIBS "Build static library" OFF)
option(BUILD_SHARED_LIBS "Build shared library" OFF)
if(NOT BUILD_STATIC_LIBS AND NOT BUILD_SHARED_LIBS)
set(BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
endif()
if((BUILD_STATIC_LIBS AND BUILD_SHARED_LIBS) AND MSVC)
message(WARNING "Could not enable both BUILD_STATIC_LIBS and BUILD_SHARED_LIBS with MSVC, Disable BUILD_SHARED_LIBS")
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build static library" FORCE)
endif()

# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
set(SYMBOLPREFIX "" CACHE STRING "Add a prefix to all exported symbol names in the shared library to avoid conflicts with other BLAS libraries" )

set(SYMBOLSUFFIX "" CACHE STRING "Add a suffix to all exported symbol names in the shared library, e.g. _64 for INTERFACE64 builds" )

if (CMAKE_SYSTEM_NAME MATCHES "Windows" AND BUILD_SHARED_LIBS AND NOT ("${SYMBOLPREFIX}${SYMBOLSUFFIX}" STREQUAL ""))
set (DELETE_STATIC_LIBS "")
if (NOT BUILD_STATIC_LIBS)
message (STATUS "forcing build of a temporary static library for symbol renaming")
set (BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared library" FORCE)
set (BUILD_STATIC_LIBS ON CACHE BOOL "Build static library" FORCE)
set (DELETE_STATIC_LIBS file (REMOVE $<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.lib))
endif ()
endif()


#######
if(BUILD_WITHOUT_LAPACK)
set(NO_LAPACK 1)
set(NO_LAPACKE 1)
endif()

if (BUILD_WITHOUT_LAPACKE)
set(NO_LAPACKE 1)
endif()

if(BUILD_WITHOUT_CBLAS)
set(NO_CBLAS 1)
endif()

#######

if(MSVC AND MSVC_STATIC_CRT)
set(CompilerFlags
CMAKE_CXX_FLAGS
CMAKE_CXX_FLAGS_DEBUG
CMAKE_CXX_FLAGS_RELEASE
CMAKE_C_FLAGS
CMAKE_C_FLAGS_DEBUG
CMAKE_C_FLAGS_RELEASE
)
foreach(CompilerFlag ${CompilerFlags})
string(REPLACE "/MD" "/MT" ${CompilerFlag} "${${CompilerFlag}}")
endforeach()
endif()

message(WARNING "CMake support is experimental. It does not yet support all build options and may not produce the same Makefiles that OpenBLAS ships with.")

include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake")
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake")

string(FIND "${LIBNAMESUFFIX}" "${SUFFIX64_UNDERSCORE}" HAVE64)
if (${HAVE64} GREATER -1)
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX})
else ()
set(OpenBLAS_LIBNAME ${LIBNAMEPREFIX}openblas${LIBNAMESUFFIX}${SUFFIX64_UNDERSCORE})
endif ()

set(BLASDIRS interface driver/level2 driver/level3 driver/others)

if (NOT DYNAMIC_ARCH)
list(APPEND BLASDIRS kernel)
endif ()

if (DEFINED SANITY_CHECK)
list(APPEND BLASDIRS reference)
endif ()

set(SUBDIRS ${BLASDIRS})
if (NOT NO_LAPACK)
if(BUILD_RELAPACK)
list(APPEND SUBDIRS relapack/src)
endif()
list(APPEND SUBDIRS lapack)
endif ()

if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif ()
if (NOT DEFINED BUILD_HFLOAT16)
set (BUILD_HFLOAT16 false)
endif ()
# set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true)
set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true)
set(BUILD_COMPLEX16 true)
endif ()

if (NOT DEFINED BUILD_MATGEN)
set(BUILD_MATGEN true)
endif()

set(FLOAT_TYPES "")
if (BUILD_SINGLE)
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif ()

if (BUILD_DOUBLE)
message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif ()

if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()

if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif ()

if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision")
# list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif ()

if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.")
endif ()

#Set default output directory
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
if(MSVC)
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY_DEBUG ${PROJECT_BINARY_DIR}/lib/Debug)
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY_RELEASE ${PROJECT_BINARY_DIR}/lib/Release)
endif ()

# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html)
set(TARGET_OBJS "")
foreach (SUBDIR ${SUBDIRS})
add_subdirectory(${SUBDIR})
string(REPLACE "/" "_" subdir_obj ${SUBDIR})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>")
endforeach ()

# netlib:

# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke.
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want.
if (NOT NO_LAPACK)
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake")
if (NOT NO_LAPACKE)
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake")
endif ()
endif ()

# Only generate .def for dll on MSVC and always produce pdb files for debug and release
if(MSVC)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def")
endif()
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /Zi")
set(CMAKE_SHARED_LINKER_FLAGS_RELEASE "${CMAKE_SHARED_LINKER_FLAGS_RELEASE} /DEBUG /OPT:REF /OPT:ICF")
endif()

if (${DYNAMIC_ARCH})
add_subdirectory(kernel)
foreach(TARGET_CORE ${DYNAMIC_CORE})
message("${TARGET_CORE}")
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:kernel_${TARGET_CORE}>")
endforeach()
endif ()

# add objects to the openblas lib
if(NOT NO_LAPACK)
add_library(LAPACK_OVERRIDES OBJECT ${LA_SOURCES})
if (USE_OPENMP AND (NOT NOFORTRAN))
# Disable OpenMP for LAPACK Fortran codes on Windows.
if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
target_link_libraries(LAPACK_OVERRIDES OpenMP::OpenMP_Fortran)
endif()
endif()
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACK_OVERRIDES>")
endif()
if(NOT NO_LAPACKE)
add_library(LAPACKE OBJECT ${LAPACKE_SOURCES})
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:LAPACKE>")
endif()
#if(BUILD_RELAPACK)
# add_library(RELAPACK OBJECT ${RELA_SOURCES})
# list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:RELAPACK>")
#endif()
set(OpenBLAS_LIBS "")
if(BUILD_STATIC_LIBS)
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME}_static INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_static)
endif()
if(BUILD_SHARED_LIBS)
add_library(${OpenBLAS_LIBNAME}_shared SHARED ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
target_include_directories(${OpenBLAS_LIBNAME}_shared INTERFACE $<INSTALL_INTERFACE:include/openblas${SUFFIX64}>)
list(APPEND OpenBLAS_LIBS ${OpenBLAS_LIBNAME}_shared)
endif()
if(BUILD_STATIC_LIBS)
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_static)
else()
add_library(${OpenBLAS_LIBNAME} ALIAS ${OpenBLAS_LIBNAME}_shared)
endif()

set_target_properties(${OpenBLAS_LIBS} PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME})

# Android needs to explicitly link against libm
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX|Android|Linux|FreeBSD|OpenBSD|NetBSD|DragonFly|Darwin")
if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static m)
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_shared m)
endif()
endif()

if (USE_OPENMP)
if(BUILD_STATIC_LIBS)
if(NOFORTRAN)
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C)
else()
target_link_libraries(${OpenBLAS_LIBNAME}_static OpenMP::OpenMP_C OpenMP::OpenMP_Fortran)
endif()
endif()
if(BUILD_SHARED_LIBS)
if(NOFORTRAN)
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C)
else()
target_link_libraries(${OpenBLAS_LIBNAME}_shared OpenMP::OpenMP_C OpenMP::OpenMP_Fortran)
endif()
endif()
endif()

# Fix "Argument list too long" for macOS with POWERPC or Intel CPUs
if(APPLE AND (NOT CMAKE_HOST_SYSTEM_PROCESSOR STREQUAL "arm64"))
# Use response files
set(CMAKE_C_USE_RESPONSE_FILE_FOR_OBJECTS 1)
# Always build static library first
if(BUILD_STATIC_LIBS)
set(STATIC_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/lib${OpenBLAS_LIBNAME}.a")
else()
add_library(${OpenBLAS_LIBNAME}_static STATIC ${TARGET_OBJS} ${OpenBLAS_DEF_FILE})
set(STATIC_PATH "lib${OpenBLAS_LIBNAME}.a")
endif()
set(CREATE_STATIC_LIBRARY_COMMAND
"sh -c 'cat ${CMAKE_BINARY_DIR}/CMakeFiles/${OpenBLAS_LIBNAME}_static.dir/objects*.rsp | xargs -n 1024 ${CMAKE_AR} -ru ${STATIC_PATH} && exit 0' "
"sh -c '${CMAKE_AR} -rs ${STATIC_PATH} ${CMAKE_BINARY_DIR}/driver/others/CMakeFiles/driver_others.dir/xerbla.c.o && exit 0' ")
if(BUILD_SHARED_LIBS)
add_dependencies(${OpenBLAS_LIBNAME}_shared ${OpenBLAS_LIBNAME}_static)
set(SHARED_PATH "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}/libopenblas.${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.dylib")
endif()
if(USE_OPENMP)
get_target_property(OMP_LIB OpenMP::OpenMP_C INTERFACE_LINK_LIBRARIES)
else()
set(OMP_LIB "")
endif()
if(NOT NOFORTRAN)
set(CMAKE_Fortran_USE_RESPONSE_FILE_FOR_OBJECTS 1)
set(CMAKE_Fortran_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND})
if(BUILD_SHARED_LIBS)
set(CMAKE_Fortran_CREATE_SHARED_LIBRARY
"sh -c 'echo \"\" | ${CMAKE_Fortran_COMPILER} -o dummy.o -c -x f95-cpp-input - '"
"sh -c '${CMAKE_Fortran_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,${STATIC_PATH} dummy.o -o ${SHARED_PATH} ${OMP_LIB}'")
endif()
else()
set(CMAKE_C_CREATE_STATIC_LIBRARY ${CREATE_STATIC_LIBRARY_COMMAND})
if(BUILD_SHARED_LIBS)
set(CMAKE_C_CREATE_SHARED_LIBRARY
"sh -c '${CMAKE_C_COMPILER} -fpic -shared -Wl,-all_load -Wl,-force_load,${STATIC_PATH} -o ${SHARED_PATH} ${OMP_LIB}'")
endif()
endif()
endif()

# Handle MSVC exports
if(MSVC AND BUILD_SHARED_LIBS)
if (${CMAKE_MAJOR_VERSION}.${CMAKE_MINOR_VERSION} VERSION_LESS 3.4)
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake")
else()
# Creates verbose .def file (51KB vs 18KB)
set_target_properties(${OpenBLAS_LIBNAME}_shared PROPERTIES WINDOWS_EXPORT_ALL_SYMBOLS true)
endif()
endif()

# Set output for libopenblas
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d")
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES EXPORT_NAME "OpenBLAS")

foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES})
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG )

set_target_properties( ${OpenBLAS_LIBS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
set_target_properties( ${OpenBLAS_LIBS} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib/${OUTPUTCONFIG} )
endforeach()

enable_testing()

if (USE_THREAD)
# Add threading library to linker
find_package(Threads)
if (THREADS_HAVE_PTHREAD_ARG)
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
COMPILE_OPTIONS "-pthread"
INTERFACE_COMPILE_OPTIONS "-pthread"
)
endif()
if(BUILD_STATIC_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_static ${CMAKE_THREAD_LIBS_INIT})
endif()
if(BUILD_SHARED_LIBS)
target_link_libraries(${OpenBLAS_LIBNAME}_shared ${CMAKE_THREAD_LIBS_INIT})
endif()
endif()

#if (MSVC OR NOT NOFORTRAN)
if (NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
# Broken without fortran on unix
add_subdirectory(utest)
endif()
endif()

if (NOT NOFORTRAN)
if (NOT ONLY_CBLAS)
# Build test and ctest
add_subdirectory(test)
endif()
if (BUILD_TESTING AND NOT BUILD_WITHOUT_LAPACK)
add_subdirectory(lapack-netlib/TESTING)
endif()
endif()
if(NOT NO_CBLAS)
if (NOT ONLY_CBLAS)
add_subdirectory(ctest)
endif()
endif()
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()

if (NOT FIXED_LIBNAME)
set_target_properties(${OpenBLAS_LIBS} PROPERTIES
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}
SOVERSION ${OpenBLAS_MAJOR_VERSION}
)
endif()
if (BUILD_SHARED_LIBS AND BUILD_RELAPACK)
if (NOT MSVC)
target_link_libraries(${OpenBLAS_LIBNAME}_shared "-Wl,-allow-multiple-definition")
else()
set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /FORCE:MULTIPLE")
endif()
endif()

if (BUILD_SHARED_LIBS OR DELETE_STATIC_LIBS AND NOT ${SYMBOLPREFIX}${SYMBOLSUFFIX} STREQUAL "")
if (NOT DEFINED ARCH)
set(ARCH_IN "x86_64")
else()
set(ARCH_IN ${ARCH})
endif()

if (${CORE} STREQUAL "generic")
set(ARCH_IN "GENERIC")
endif ()

if (NOT DEFINED EXPRECISION)
set(EXPRECISION_IN 0)
else()
set(EXPRECISION_IN ${EXPRECISION})
endif()

if (NOT DEFINED NO_CBLAS)
set(NO_CBLAS_IN 0)
else()
set(NO_CBLAS_IN ${NO_CBLAS})
endif()

if (NOT DEFINED NO_LAPACK)
set(NO_LAPACK_IN 0)
else()
set(NO_LAPACK_IN ${NO_LAPACK})
endif()

if (NOT DEFINED NO_LAPACKE)
set(NO_LAPACKE_IN 0)
else()
set(NO_LAPACKE_IN ${NO_LAPACKE})
endif()

if (NOT DEFINED NEED2UNDERSCORES)
set(NEED2UNDERSCORES_IN 0)
else()
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES})
endif()

if (NOT DEFINED ONLY_CBLAS)
set(ONLY_CBLAS_IN 0)
else()
set(ONLY_CBLAS_IN ${ONLY_CBLAS})
endif()

if (NOT DEFINED BU)
set(BU _)
endif()

if (NOT ${SYMBOLPREFIX} STREQUAL "")
message(STATUS "adding prefix ${SYMBOLPREFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
message(STATUS "adding suffix ${SYMBOLSUFFIX} to names of exported symbols in ${OpenBLAS_LIBNAME}")
endif()

if (${BUILD_LAPACK_DEPRECATED})
set (BLD 1)
else ()
set (BLD 0)
endif()
if (${BUILD_BFLOAT16})
set (BBF16 1)
else ()
set (BBF16 0)
endif()
if (${BUILD_SINGLE})
set (BS 1)
else ()
set (BS 0)
endif()
if (${BUILD_DOUBLE})
set (BD 1)
else ()
set (BD 0)
endif()
if (${BUILD_COMPLEX})
set (BC 1)
else ()
set (BC 0)
endif()
if (${BUILD_COMPLEX16})
set (BZ 1)
else ()
set (BZ 0)
endif()

if (CMAKE_SYSTEM_NAME MATCHES "Windows")
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
if (CMAKE_BUILD_TYPE MATCHES "Debug")
set (CRTLIB msvcrtd)
set (PDBOPT -debug -pdb:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.pdb)
set (PDB_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)
else ()
set (CRTLIB msvcrt)
set (PDBOPT "")
endif()
#if (USE_PERL)
message(STATUS "adding postbuild instruction to rename syms")
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_static POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "win2k" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/renamesyms.def
COMMAND ${CMAKE_C_COMPILER} ${CMAKE_C_FLAGS} -I${PROJECT_SOURCE_DIR} -I${PROJECT_BINARY_DIR} -c -o ${PROJECT_BINARY_DIR}/dllinit.o ${PROJECT_SOURCE_DIR}/exports/dllinit.c
COMMAND lld-link -nodefaultlib:libcmt -defaultlib:${CRTLIB} ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -wholearchive:$<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -dll -out:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll -implib:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll.a ${PDBOPT}
#COMMAND lld-link -nodefaultlib:libcmt -defaultlib:msvcrt ${CMAKE_LINKER_FLAGS} -errorlimit:0 -def:${PROJECT_BINARY_DIR}/renamesyms.def ${PROJECT_BINARY_DIR}/dllinit.o $<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -wholearchive:$<TARGET_FILE:${OpenBLAS_LIBNAME}_static> -dll -out:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll -implib:$<TARGET_FILE_DIR:${OpenBLAS_LIBNAME}_static>/${OpenBLAS_LIBNAME}.dll.a
${REMOVE_STATIC_LIB} VERBATIM
)
#endif ()
else ()
if (NOT USE_PERL)
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND sh ${PROJECT_SOURCE_DIR}/exports/gensymbol "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
else()
add_custom_command(TARGET ${OpenBLAS_LIBNAME}_shared POST_BUILD
COMMAND perl ${PROJECT_SOURCE_DIR}/exports/gensymbol.pl "objcopy" "${ARCH}" "${BU}" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" \"${SYMBOLPREFIX}\" \"${SYMBOLSUFFIX}\" "${BLD}" "${BBF16}" "${BS}" "${BD}" "${BC}" "${BZ}" > ${PROJECT_BINARY_DIR}/objcopy.def
COMMAND objcopy --redefine-syms ${PROJECT_BINARY_DIR}/objcopy.def ${PROJECT_BINARY_DIR}/lib/lib${OpenBLAS_LIBNAME}.so
COMMENT "renaming symbols"
)
endif()
endif()
endif()

if (BUILD_BENCHMARKS)
#find_package(OpenMP REQUIRED)
file(GLOB SOURCES "benchmark/*.c")
if (NOT USE_OPENMP)
file(GLOB REMFILE "benchmark/smallscaling.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (BUILD_WITHOUT_LAPACK)
file(GLOB REMFILE "benchmark/cholesky.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/geev.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/gesv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/getri.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/potrf.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/spmv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/symv.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
file(GLOB REMFILE "benchmark/linpack.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
if (NOT USE_GEMM3M)
file(GLOB REMFILE "benchmark/gemm3m.c")
list(REMOVE_ITEM SOURCES ${REMFILE})
endif()
foreach(source ${SOURCES})
get_filename_component(name ${source} NAME_WE)
if ((NOT ${name} STREQUAL "zdot-intel") AND (NOT ${name} STREQUAL "cula_wrapper"))
set(defines DEFAULT COMPLEX DOUBLE "COMPLEX\;DOUBLE")
foreach(define ${defines})
set(target_name "benchmark_${name}")
if (NOT "${define}" STREQUAL "DEFAULT")
string(JOIN "_" define_str ${define})
set(target_name "${target_name}_${define_str}")
endif()
if ((NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imax_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_imin_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_max_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_max_COMPLEX_DOUBLE") AND
(NOT ${target_name} STREQUAL "benchmark_min_COMPLEX") AND (NOT ${target_name} STREQUAL "benchmark_min_COMPLEX_DOUBLE"))
add_executable(${target_name} ${source})
target_include_directories(${target_name} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} )
# target_link_libraries(${target_name} ${OpenBLAS_LIBNAME} OpenMP::OpenMP_C)
if (NOT "${define}" STREQUAL "DEFAULT")
target_compile_definitions(${target_name} PRIVATE ${define})
endif()
endif()
endforeach()
endif()
endforeach()
endif()


# Install project

# Install libraries
if(BUILD_SHARED_LIBS AND BUILD_STATIC_LIBS)
install(TARGETS ${OpenBLAS_LIBNAME}_shared
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
install(TARGETS ${OpenBLAS_LIBNAME}_static
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
else()
install(TARGETS ${OpenBLAS_LIBS}
EXPORT "OpenBLAS${SUFFIX64}Targets"
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} )
endif()

# Install headers
set(CMAKE_INSTALL_INCLUDEDIR ${CMAKE_INSTALL_INCLUDEDIR}/openblas${SUFFIX64})
set(CMAKE_INSTALL_FULL_INCLUDEDIR ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})

message(STATUS "Generating openblas_config.h in ${CMAKE_INSTALL_INCLUDEDIR}")

set(OPENBLAS_CONFIG_H ${CMAKE_BINARY_DIR}/openblas_config.h)
file(WRITE ${OPENBLAS_CONFIG_H} "#ifndef OPENBLAS_CONFIG_H\n")
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_CONFIG_H\n")
file(STRINGS ${PROJECT_BINARY_DIR}/config.h __lines)
foreach(line ${__lines})
string(REPLACE "#define " "" line ${line})
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_${line}\n")
endforeach()
file(APPEND ${OPENBLAS_CONFIG_H} "#define OPENBLAS_VERSION \"OpenBLAS ${OpenBLAS_VERSION}\"\n")
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/openblas_config_template.h OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS)
file(APPEND ${OPENBLAS_CONFIG_H} "${OPENBLAS_CONFIG_TEMPLATE_H_CONTENTS}\n")
file(APPEND ${OPENBLAS_CONFIG_H} "#endif /* OPENBLAS_CONFIG_H */\n")
install (FILES ${OPENBLAS_CONFIG_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

if(NOT NOFORTRAN)
message(STATUS "Generating f77blas.h in ${CMAKE_INSTALL_INCLUDEDIR}")

set(F77BLAS_H ${CMAKE_BINARY_DIR}/generated/f77blas.h)
file(WRITE ${F77BLAS_H} "#ifndef OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#define OPENBLAS_F77BLAS_H\n")
file(APPEND ${F77BLAS_H} "#include \"openblas_config.h\"\n")
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/common_interface.h COMMON_INTERFACE_H_CONTENTS)
file(APPEND ${F77BLAS_H} "${COMMON_INTERFACE_H_CONTENTS}\n")
file(APPEND ${F77BLAS_H} "#endif")
install (FILES ${F77BLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

if(NOT NO_CBLAS)
message (STATUS "Generating cblas.h in ${CMAKE_INSTALL_INCLUDEDIR}")
set(CBLAS_H ${CMAKE_BINARY_DIR}/generated/cblas.h)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/cblas.h CBLAS_H_CONTENTS)
string(REPLACE "common" "openblas_config" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
if (NOT ${SYMBOLPREFIX} STREQUAL "")
string(REPLACE " cblas" " ${SYMBOLPREFIX}cblas" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " openblas" " ${SYMBOLPREFIX}openblas" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string (REPLACE " ${SYMBOLPREFIX}openblas_complex" " openblas_complex" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REPLACE " goto" " ${SYMBOLPREFIX}goto" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
if (NOT ${SYMBOLSUFFIX} STREQUAL "")
string(REGEX REPLACE "(cblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(openblas[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
string(REGEX REPLACE "(openblas_complex[^ ]*)${SYMBOLSUFFIX}" "\\1" CBLAS_H_CONTENTS "${CBLAS_H_CONTENTS_NEW}")
string(REGEX REPLACE "(goto[^ (]*)" "\\1${SYMBOLSUFFIX}" CBLAS_H_CONTENTS_NEW "${CBLAS_H_CONTENTS}")
endif()
file(WRITE ${CBLAS_H} "${CBLAS_H_CONTENTS_NEW}")
install (FILES ${CBLAS_H} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

if(NOT NO_LAPACKE)
message (STATUS "Copying LAPACKE header files to ${CMAKE_INSTALL_INCLUDEDIR}")
if(BUILD_STATIC_LIBS)
add_dependencies( ${OpenBLAS_LIBNAME}_static genlapacke)
endif()
if(BUILD_SHARED_LIBS)
add_dependencies( ${OpenBLAS_LIBNAME}_shared genlapacke)
endif()
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/*.h")
install (FILES ${INCLUDE_FILES} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

ADD_CUSTOM_TARGET(genlapacke
COMMAND ${CMAKE_COMMAND} -E copy ${CMAKE_CURRENT_SOURCE_DIR}/lapack-netlib/LAPACKE/include/lapacke_mangling_with_flags.h.in "${CMAKE_BINARY_DIR}/lapacke_mangling.h"
)
install (FILES ${CMAKE_BINARY_DIR}/lapacke_mangling.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
endif()

# Install pkg-config files
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc @ONLY)
install (FILES ${PROJECT_BINARY_DIR}/openblas${SUFFIX64}.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig/)


set(PN OpenBLAS)
set(CMAKECONFIG_INSTALL_DIR "${CMAKE_INSTALL_LIBDIR}/cmake/${PN}${SUFFIX64}")
configure_package_config_file(cmake/${PN}Config.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake"
INSTALL_DESTINATION ${CMAKECONFIG_INSTALL_DIR})
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
VERSION ${${PN}_VERSION}
COMPATIBILITY AnyNewerVersion)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}${SUFFIX64}Config.cmake
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/${PN}ConfigVersion.cmake
RENAME ${PN}${SUFFIX64}ConfigVersion.cmake
DESTINATION ${CMAKECONFIG_INSTALL_DIR})
install(EXPORT "${PN}${SUFFIX64}Targets"
NAMESPACE "${PN}${SUFFIX64}::"
DESTINATION ${CMAKECONFIG_INSTALL_DIR})


+ 1
- 0
CNAME View File

@@ -0,0 +1 @@
openmathlib.org

+ 0
- 269
CONTRIBUTORS.md View File

@@ -1,269 +0,0 @@
# Contributions to the OpenBLAS project

## Creator & Maintainer

* Zhang Xianyi <traits.zhang@gmail.com>

## Active Developers

* Wang Qian <traz0824@gmail.com>
* Optimize BLAS3 on ICT Loongson 3A.
* Optimize BLAS3 on Intel Sandy Bridge.

* Werner Saar <wernsaar@googlemail.com>
* [2013-03-04] Optimize AVX and FMA4 DGEMM on AMD Bulldozer
* [2013-04-27] Optimize AVX and FMA4 TRSM on AMD Bulldozer
* [2013-06-09] Optimize AVX and FMA4 SGEMM on AMD Bulldozer
* [2013-06-11] Optimize AVX and FMA4 ZGEMM on AMD Bulldozer
* [2013-06-12] Optimize AVX and FMA4 CGEMM on AMD Bulldozer
* [2013-06-16] Optimize dgemv_n kernel on AMD Bulldozer
* [2013-06-20] Optimize ddot, daxpy kernel on AMD Bulldozer
* [2013-06-21] Optimize dcopy kernel on AMD Bulldozer
* Porting and Optimization on ARM Cortex-A9
* Optimization on AMD Piledriver
* Optimization on Intel Haswell

* Chris Sidebottom <chris.sidebottom@arm.com>
* Optimizations and other improvements targeting AArch64

* Annop Wongwathanarat <annop.wongwathanarat@arm.com>
* Optimizations and other improvements targeting AArch64

## Previous Developers

* Zaheer Chothia <zaheer.chothia@gmail.com>
* Improve the compatibility about complex number
* Build LAPACKE: C interface to LAPACK
* Improve the windows build.

* Chen Shaohu <huhumartinwar@gmail.com>
* Optimize GEMV on the Loongson 3A processor.

* Luo Wen
* Intern. Test Level-2 BLAS.

## Contributors

In chronological order:

* pipping <http://page.mi.fu-berlin.de/pipping>
* [2011-06-11] Make USE_OPENMP=0 disable openmp.

* Stefan Karpinski <stefan@karpinski.org>
* [2011-12-28] Fix a bug about SystemStubs on Mac OS X.

* Alexander Eberspächer <https://github.com/aeberspaecher>
* [2012-05-02] Add note on patch for segfaults on Linux kernel 2.6.32.

* Mike Nolta <mike@nolta.net>
* [2012-05-19] Fix building bug on FreeBSD and NetBSD.

* Sylvestre Ledru <https://github.com/sylvestre>
* [2012-07-01] Improve the detection of sparc. Fix building bug under
Hurd and kfreebsd.

* Jameson Nash <https://github.com/vtjnash>
* [2012-08-20] Provide support for passing CFLAGS, FFLAGS, PFLAGS, FPFLAGS to
make on the command line.

* Alexander Nasonov <alnsn@yandex.ru>
* [2012-11-10] Fix NetBSD build.

* Sébastien Villemot <sebastien@debian.org>
* [2012-11-14] Fix compilation with TARGET=GENERIC. Patch applied to Debian package.
* [2013-08-28] Avoid failure on qemu guests declaring an Athlon CPU without 3dnow!

* Kang-Che Sung <Explorer09@gmail.com>
* [2013-05-17] Fix typo in the document. Re-order the architecture list in getarch.c.

* Kenneth Hoste <kenneth.hoste@gmail.com>
* [2013-05-22] Adjust Makefile about downloading LAPACK source files.

* Lei WANG <https://github.com/wlbksy>
* [2013-05-22] Fix a bug about wget.

* Dan Luu <http://www.linkedin.com/in/danluu>
* [2013-06-30] Add Intel Haswell support (using sandybridge optimizations).

* grisuthedragon <https://github.com/grisuthedragon>
* [2013-07-11] create openblas_get_parallel to retrieve information which parallelization
model is used by OpenBLAS.

* Elliot Saba <staticfloat@gmail.com>
* [2013-07-22] Add in return value for `interface/trtri.c`

* Sébastien Fabbro <bicatali@gentoo.org>
* [2013-07-24] Modify makefile to respect user's LDFLAGS
* [2013-07-24] Add stack markings for GNU as arch-independent for assembler files

* Viral B. Shah <viral@mayin.org>
* [2013-08-21] Patch LAPACK XLASD4.f as discussed in JuliaLang/julia#2340

* Lars Buitinck <https://github.com/larsmans>
* [2013-08-28] get rid of the generated cblas_noconst.h file
* [2013-08-28] Missing threshold in gemm.c
* [2013-08-28] fix default prefix handling in makefiles

* yieldthought <https://github.com/yieldthought>
* [2013-10-08] Remove -Wl,--retain-symbols-file from dynamic link line to fix tool support

* Keno Fischer <https://github.com/loladiro>
* [2013-10-23] Use FC instead of CC to link the dynamic library on OS X

* Christopher Meng <cickumqt@gmail.com>
* [2013-12-09] Add DESTDIR support for easier building on RPM based distros.
Use install command instead of cp to install files with permissions control.

* Lucas Beyer <lucasb.eyer.be@gmail.com>
* [2013-12-10] Added support for NO_SHARED in make install.

* carlkl <https://github.com/carlkl>
* [2013-12-13] Fixed LAPACKE building bug on Windows

* Isaac Dunham <https://github.com/idunham>
* [2014-08-03] Fixed link error on Linux/musl

* Dave Nuechterlein
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1).
ARMv8 support.

* Jerome Robert <jeromerobert@gmx.com>
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478)
* [2015-12-23] `stack_check` in `gemv.c` (bug #722)
* [2015-12-28] Allow to force the number of parallel make job
* [2015-12-28] Fix detection of AMD E2-3200 detection
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731)
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742)
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760)
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727)

* Dan Kortschak
* [2015-01-07] Added test for drotmg bug #484.

* Ton van den Heuvel <https://github.com/ton>
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity().

* Martin Koehler <https://github.com/grisuthedragon/>
* [2015-09-07] Improved imatcopy

* Ashwin Sekhar T K <https://github.com/ashwinyes/>
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8)
* [2015-11-20] lapack-test fixes for Cortex-A57
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57

* theoractice <https://github.com/theoractice/>
* [2016-03-20] Fix compiler error in VisualStudio with CMake
* [2016-03-22] Fix access violation on Windows while static linking

* Paul Mustière <https://github.com/buffer51/>
* [2016-02-04] Fix Android build on ARMV7
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8

* Shivraj Patil <https://github.com/sva-img/>
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA

* Kaustubh Raste <https://github.com/ksraste/>
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA

* Abdelrauf <https://github.com/quickwritereader>
* [2017-01-01] dgemm and dtrmm kernels for IBM z13
* [2017-02-26] ztrmm kernel for IBM z13
* [2017-03-13] strmm and ctrmm kernel for IBM z13
* [2017-09-01] initial Blas Level-1,2 (double precision) for IBM z13
* [2018-03-07] added missing Blas Level 1-2 (double precision) simd codes
* [2019-02-01] added missing Blas Level-1,2 (single precision) simd codes
* [2019-03-14] power9 dgemm/dtrmm kernel
* [2019-04-29] power9 sgemm/strmm kernel

* Jiachen Wang <https://github.com/wjc404>
* [2019-07-29] optimize AVX2 DGEMM
* [2019-10-20] AVX512 DGEMM kernel (4x8)
* [2019-11-06] optimize AVX512 SGEMM
* [2019-11-12] AVX512 CGEMM & ZGEMM kernels
* [2019-12-23] optimize AVX2 CGEMM and ZGEMM
* [2019-12-30] AVX2 CGEMM3M & ZGEMM3M kernels
* [2020-01-07] optimize AVX2 SGEMM and STRMM

* Rajalakshmi Srinivasaraghavan <https://github.com/RajalakshmiSR>
* [2020-04-15] Half-precision GEMM for bfloat16

* Marius Hillenbrand <https://github.com/mhillenibm>
* [2020-05-12] Revise dynamic architecture detection for IBM z
* [2020-05-12] Add new sgemm and strmm kernel for IBM z14
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support

* Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

* Jake Arkinstall <https://github.com/jake-arkinstall>
* [2021-02-10] Remove in-source configure_file to enable builds in read-only contexts (issue #3100, PR #3101)

* River Dillon <oss@outerpassage.net>
* [2021-07-10] fix compilation with musl libc

* Bine Brank <https://github.com/binebrank>
* [2021-10-27] Add vector-length-agnostic DGEMM kernels for Arm SVE
* [2021-11-20] Vector-length-agnostic Arm SVE copy routines for DGEMM, DTRMM, DSYMM
* [2021-11-12] SVE kernels for SGEMM, STRMM and corresponding SVE copy functions
* [2022-01-06] SVE kernels for CGEMM, ZGEMM, CTRMM, ZTRMM and corresponding SVE copy functions
* [2022-01-18] SVE kernels and copy functions for TRSM

* Ilya Kurdyukov <https://github.com/ilyakurdyukov>
* [2021-02-21] Add basic support for the Elbrus E2000 architecture

* PLCT Lab, Institute of Software Chinese Academy of Sciences
* [2022-03] Support RISC-V Vector Intrinisc 1.0 version.
* Pablo Romero <https://github.com/pablorcum>
* [2022-08] Fix building from sources for QNX

* Mark Seminatore <https://github.com/mseminatore>
* [2023-11-09] Improve Windows threading performance scaling
* [2024-02-09] Introduce MT_TRACE facility and improve code consistency

* Dirreke <https://github.com/mseminatore>
* [2024-01-16] Add basic support for the CSKY architecture

* Christopher Daley <https://github.com/cdaley>
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems

* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

* Annop Wongwathanarat <annop.wongwathanarat@arm.com>
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
* [2025-01-21] Optimize gemv_t_sve_v1x3 kernel
* [2025-02-26] Add sbgemv_t_bfdot kernel
* [2025-03-12] Fix aarch64 sbgemv_t compilation error for GCC < 13
* [2025-03-12] Optimize aarch64 sgemm_ncopy

* Marek Michalowski <marek.michalowski@arm.com>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2`
* [2025-02-19] Add thread throttling profile for SGEMV on `NEOVERSEV2`

* Ye Tao <ye.tao@arm.com>
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1
* [2025-02-27] Add sbgemv_n_neon kernel
* [2025-05-17] Impl prototype of BGEMM inferface

* Abhishek Kumar <https://github.com/abhishek-iitmadras>
* [2025-04-22] Optimise dot kernel for NEOVERSE V1
* [2025-07-23] ARM64-Enable bfloat16 kernels by default

* Sharif Inamdar <sharif.inamdar@arm.com>
* [2025-06-05] Optimize gemv_n_sve_v1x3 kernel

* Guoyuan Li <https://github.com/guoyuanplct>
* [2025-04-11] Optimise gemv kernel for RISCV64_ZVL256B
* [2025-05-01] Optimise zgemv kernel for RISCV64_ZVL256B
* [2025-05-17] Optimise omatcopy/zomatcopy kernel for RISCV64_ZVL256B
* [2025-05-29] Optimise axpby kernel for RISCV64_ZVL256B
* [2025-06-05] Optimise hbmv kernel for RISCV64_ZVL256B


+ 0
- 2402
Changelog.txt
File diff suppressed because it is too large
View File


+ 0
- 32
GotoBLAS_00License.txt View File

@@ -1,32 +0,0 @@

Copyright 2009, 2010 The University of Texas at Austin.
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.

THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT AUSTIN ``AS IS''
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT
AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

The views and conclusions contained in the software and documentation
are those of the authors and should not be interpreted as representing
official policies, either expressed or implied, of The University of
Texas at Austin.

+ 0
- 93
GotoBLAS_01Readme.txt View File

@@ -1,93 +0,0 @@
Optimized GotoBLAS2 libraries version 1.13

By Kazushige Goto <kgoto@tacc.utexas.edu>

# This is the last update and done on 5th Feb. 2010.

0. License

See 00TACC_Research_License.txt.

1. Supported OS

Linux
FreeBSD(Also it may work on NetBSD)
OSX
Soralis
Windows 2k, XP, Server 2003 and 2008(both 32bit and 64bit)
AIX
Tru64 UNIX

2. Supported Architecture

X86 : Pentium3 Katmai
Coppermine
Athlon (not well optimized, though)
PentiumM Banias, Yonah
Pentium4 Northwood
Nocona (Prescott)
Core 2 Woodcrest
Core 2 Penryn
Nehalem-EP Corei{3,5,7}
Atom
AMD Opteron
AMD Barlcelona, Shanghai, Istanbul
VIA NANO

X86_64: Pentium4 Nocona
Core 2 Woodcrest
Core 2 Penryn
Nehalem
Atom
AMD Opteron
AMD Barlcelona, Shanghai, Istanbul
VIA NANO

IA64 : Itanium2

Alpha : EV4, EV5, EV6

POWER : POWER4
PPC970/PPC970FX
PPC970MP
CELL (PPU only)
POWER5
PPC440 (QCDOC)
PPC440FP2(BG/L)
POWERPC G4(PPC7450)
POWER6

SPARC : SPARC IV
SPARC VI, VII (Fujitsu chip)

MIPS64/32: Sicortex

3. Supported compiler

C compiler : GNU CC
Cygwin, MinGW
Other commercial compiler(especially for x86/x86_64)

Fortran Compiler : GNU G77, GFORTRAN
G95
Open64
Compaq
F2C
IBM
Intel
PathScale
PGI
SUN
Fujitsu

4. Supported precision

Now x86/x86_64 version support 80bit FP precision in addition to
normal double presicion and single precision. Currently only
gfortran supports 80bit FP with "REAL*10".


5. How to build library?

Please see 02QuickInstall.txt or just type "make".


+ 0
- 118
GotoBLAS_02QuickInstall.txt View File

@@ -1,118 +0,0 @@
Quick installation for GotoBLAS2

***************************************************************************
***************************************************************************
** **
** **
** Just type "make" <<return>>. **
** **
** If you're not satisfied with this library, **
** please read following instruction and customize it. **
** **
** **
***************************************************************************
***************************************************************************


1. REALLY REALLY quick way to build library

Type "make" or "gmake".

$shell> make

The script will detect Fortran compiler, number of cores and
architecture which you're using. If default gcc binary type is
64bit, 64 bit library will be created. Otherwise 32 bit library
will be created.

After finishing compile, you'll find various information about
generated library.

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

GotoBLAS2 build complete.

OS ... Linux
Architecture ... x86_64
BINARY ... 64bit
C compiler ... GCC (command line : gcc)
Fortran compiler ... PATHSCALE (command line : pathf90)
Library Name ... libgoto_barcelonap-r1.27.a (Multi threaded; Max
num-threads is 16)

=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=


2. Specifying 32bit or 64bit library

If you need 32bit binary,

$shell> make BINARY=32

If you need 64bit binary,

$shell> make BINARY=64


3. Specifying target architecture

If you need library for different architecture, you can use TARGET
option. You can find current available options in top of getarch.c.
For example, if you need library for Intel core2 architecture,
you'll find FORCE_CORE2 option in getarch.c. Therefore you can
specify TARGET=CORE2 (get rid of FORCE_) with make.

$shell> make TARGET=CORE2

Also if you want GotoBLAS2 to support multiple architecture,

$shell> make DYNAMIC_ARCH=1

All kernel will be included in the library and dynamically switched
the best architecutre at run time.


4. Specifying for enabling multi-threaded

Script will detect number of cores and will enable multi threaded
library if number of cores is more than two. If you still want to
create single threaded library,

$shell> make USE_THREAD=0

Or if you need threaded library by force,

$shell> make USE_THREAD=1


5. Specifying target OS

Target architecture will be determined by the CC. If you
specify cross compiler for MIPS, you can create library for
MIPS architecture.

$shell> make CC=mips64el-linux-gcc TARGET=SICORTEX

Or you can specify your favorite C compiler with absolute path.

$shell> make CC=/opt/intel/cc/32/10.0.026/bin/icc TARGET=BARCELONA

Binary type (32bit/64bit) is determined by checking CC, you
can control binary type with this option.

$shell> make CC="pathcc -m32"

In this case, 32bit library will be created.


6. Specifying Fortran compiler

If you need to support other Fortran compiler, you can specify with
FC option.

$shell> make FC=gfortran


7. Other useful options

You'll find other useful options in Makefile.rule.

+ 0
- 128
GotoBLAS_03FAQ.txt View File

@@ -1,128 +0,0 @@
GotoBLAS2 FAQ

1. General

1.1 Q Can I find useful paper about GotoBLAS2?

A You may check following URL.

http://www.cs.utexas.edu/users/flame/Publications/index.htm

11. Kazushige Goto and Robert A. van de Geijn, " Anatomy of
High-Performance Matrix Multiplication," ACM Transactions on
Mathematical Software, accepted.

15. Kazushige Goto and Robert van de Geijn, "High-Performance
Implementation of the Level-3 BLAS." ACM Transactions on
Mathematical Software, submitted.


1.2 Q Does GotoBLAS2 work with Hyperthread (SMT)?

A Yes, it will work. GotoBLAS2 detects Hyperthread and
avoid scheduling on the same core.


1.3 Q When I type "make", following error occured. What's wrong?

$shell> make
"./Makefile.rule", line 58: Missing dependency operator
"./Makefile.rule", line 61: Need an operator
...

A This error occurs because you didn't use GNU make. Some binary
packages install GNU make as "gmake" and it's worth to try.


1.4 Q Function "xxx" is slow. Why?

A Generally GotoBLAS2 has many well optimized functions, but it's
far and far from perfect. Especially Level 1/2 function
performance depends on how you call BLAS. You should understand
what happends between your function and GotoBLAS2 by using profile
enabled version or hardware performance counter. Again, please
don't regard GotoBLAS2 as a black box.


1.5 Q I have a commercial C compiler and want to compile GotoBLAS2 with
it. Is it possible?

A All function that affects performance is written in assembler
and C code is just used for wrapper of assembler functions or
complicated functions. Also I use many inline assembler functions,
unfortunately most of commercial compiler can't handle inline
assembler. Therefore you should use gcc.


1.6 Q I use OpenMP compiler. How can I use GotoBLAS2 with it?

A Please understand that OpenMP is a compromised method to use
thread. If you want to use OpenMP based code with GotoBLAS2, you
should enable "USE_OPENMP=1" in Makefile.rule.


1.7 Q Could you tell me how to use profiled library?

A You need to build and link your application with -pg
option. After executing your application, "gmon.out" is
generated in your current directory.

$shell> gprof <your application name> gmon.out

Each sample counts as 0.01 seconds.
% cumulative self self total
time seconds seconds calls Ks/call Ks/call name
89.86 975.02 975.02 79317 0.00 0.00 .dgemm_kernel
4.19 1020.47 45.45 40 0.00 0.00 .dlaswp00N
2.28 1045.16 24.69 2539 0.00 0.00 .dtrsm_kernel_LT
1.19 1058.03 12.87 79317 0.00 0.00 .dgemm_otcopy
1.05 1069.40 11.37 4999 0.00 0.00 .dgemm_oncopy
....

I think profiled BLAS library is really useful for your
research. Please find bottleneck of your application and
improve it.

1.8 Q Is number of thread limited?

A Basically, there is no limitation about number of threads. You
can specify number of threads as many as you want, but larger
number of threads will consume extra resource. I recommend you to
specify minimum number of threads.

1.9 Q I have segfaults when I compile with USE_OPENMP=1. What's wrong?

A This may be related to a bug in the Linux kernel 2.6.32. Try applying
the patch segaults.patch using

patch < segfaults.patch

and see if the crashes persist. Note that this patch will lead to many
compiler warnings.

2. Architecture Specific issue or Implementation

2.1 Q GotoBLAS2 seems to support any combination with OS and
architecture. Is it possible?

A Combination is limited by current OS and architecture. For
examble, the combination OSX with SPARC is impossible. But it
will be possible with slight modification if these combination
appears in front of us.


2.2 Q I have POWER architecture systems. Do I need extra work?

A Although POWER architecture defined special instruction
like CPUID to detect correct architecture, it's privileged
and can't be accessed by user process. So you have to set
the architecture that you have manually in getarch.c.


2.3 Q I can't create DLL on Cygwin (Error 53). What's wrong?

A You have to make sure if lib.exe and mspdb80.dll are in Microsoft
Studio PATH. The easiest way is to use 'which' command.

$shell> which lib.exe
/cygdrive/c/Program Files/Microsoft Visual Studio/VC98/bin/lib.exe

+ 0
- 13
GotoBLAS_04FAQ.txt View File

@@ -1,13 +0,0 @@

Quick guide to build library for Windows 64bit.

1. What you need

a. Windows Server 2003 or later
b. Cygwin environment(make, gcc, g77, perl, sed, wget)
c. MinGW64 compiler
d. Microsoft Visual Studio (lib.exe and mspdb80.dll are required to create dll)

2. Do ./quickbuild.win64

Good luck

+ 0
- 53
GotoBLAS_05LargePage.txt View File

@@ -1,53 +0,0 @@
To enhance perfomance, I'd recommend you to enable large page on
your OS (root account is required).

A) Linux

x86 32bit ... (number of core) * 4 pages
x86 64bit ... (number of core) * 8 pages
POWER 32/64bit ... (number of core) * 1 pages

If you want to allocate 64 large pages,

$shell> echo 0 > /proc/sys/vm/nr_hugepages # need to be reset
$shell> echo 65 > /proc/sys/vm/nr_hugepages # add 1 extra page
$shell> echo 3355443200 > /proc/sys/kernel/shmmax # just large number
$shell> echo 3355443200 > /proc/sys/kernel/shmall

Also may add a few lines into /etc/security/limits.conf file.

* hard memlock unlimited
* soft memlock unlimited

Then restart sshd (/etc/init.d/sshd restart).

B) Solaris

You don't have to set up.

C) Windows (Windows Server 2003 or later, XP 64bit)

You have to assign memory lock operation to your account.

Control Panel -> Administrative Tools -> Local Security Policy ->
Local Policies -> User Rights Assignment -> Lock pages in memory

D) AIX

Ask your administrator

E) Tru64 UNIX

Assign shared memory at boot time.

F) Other aarchitecture which doesn't have Large TLB enhancement

If you have root permission, please install device driver which
located in drivers/mapper.

$shell> cd drivers/mapper
$shell> make
$shell> insmod mapper.ko
$shell> ./device_setup

Then enable DEVICEDRIVER_ALLOCATION = 1 in Makefile.rule.

+ 0
- 22
GotoBLAS_06WeirdPerformance.txt View File

@@ -1,22 +0,0 @@
Weird Performance

1. If you see serious performance loss (extremely low performance),
probably you created too many threads or process. Basically GotoBLAS
assumes that available cores that you specify are exclusively for
BLAS computation. Even one small thread/process conflicts with BLAS
threads, performance will become worse.

The best solution is to reduce your number of threads or insert
some synchronization mechanism and suspend your threads until BLAS
operation is finished.


2. Similar problem may happen under virtual machine. If supervisor
allocates different cores for each scheduling, BLAS performnace
will be bad. This is because BLAS also utilizes all cache,
unexpected re-schedule for different core may result of heavy
performance loss.


Anyway, if you see any weird performance loss, it means your code or
algorithm is not optimal.

+ 0
- 14
Jenkinsfile View File

@@ -1,14 +0,0 @@
pipeline {
agent {
docker {
image 'osuosl/ubuntu-s390x'
}
}
stages {
stage('Build') {
steps {
sh 'make clean && make'
}
}
}
}

+ 0
- 16
Jenkinsfile.pwr View File

@@ -1,16 +0,0 @@
pipeline {
agent {
docker {
image 'osuosl/ubuntu-ppc64le:18.04'
}
}
stages {
stage('Build') {
steps {
sh 'sudo apt update'
sh 'sudo apt install gfortran -y'
sh 'make clean && make'
}
}
}
}

+ 0
- 29
LICENSE View File

@@ -1,29 +0,0 @@
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+ 0
- 459
Makefile View File

@@ -1,459 +0,0 @@
TOPDIR = .
include ./Makefile.system
LNCMD = ln -fs
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif

BLASDIRS = interface driver/level2 driver/level3 driver/others

ifneq ($(DYNAMIC_ARCH), 1)
BLASDIRS += kernel
endif

ifdef SANITY_CHECK
BLASDIRS += reference
endif

SUBDIRS = $(BLASDIRS)
ifneq ($(NO_LAPACK), 1)
SUBDIRS += lapack
endif

RELA =
ifeq ($(BUILD_RELAPACK), 1)
RELA = re_lapack
endif

ifeq ($(NO_FORTRAN), 1)
define NOFORTRAN
1
endef
ifneq ($(NO_LAPACK), 1)
define C_LAPACK
1
endef
endif
export NOFORTRAN
export NO_LAPACK
export C_LAPACK
endif

ifeq ($(F_COMPILER),CRAY)
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -Og -Os,$(LAPACK_FFLAGS))
else
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast -O -Og -Os,$(LAPACK_FFLAGS))
endif

ifdef LAPACK_STRLEN
LAPACK_FFLAGS += -DLAPACK_STRLEN=$(LAPACK_STRLEN)
endif

SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench cpp_thread_test

.PHONY : all libs netlib $(RELA) test ctest shared install
.NOTPARALLEL : shared

all :: tests
@echo
@echo " OpenBLAS build complete. ($(LIB_COMPONENTS))"
@echo
@echo " OS ... $(OSNAME) "
@echo " Architecture ... $(ARCH) "
ifndef BINARY64
@echo " BINARY ... 32bit "
else
@echo " BINARY ... 64bit "
endif

ifdef INTERFACE64
ifneq ($(INTERFACE64), 0)
@echo " Use 64 bits int (equivalent to \"-i8\" in Fortran) "
endif
endif
@$(CC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
cverinfo=`$(CC) --version | sed -n '1p'`; \
if [ -z "$${cverinfo}" ]; then \
cverinfo=`$(CC) --version | sed -n '2p'`; \
fi; \
echo " C compiler ... $(C_COMPILER) (cmd & version : $${cverinfo})";\
else \
echo " C compiler ... $(C_COMPILER) (command line : $(CC))";\
fi
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
@$(FC) --version > /dev/null 2>&1;\
if [ $$? -eq 0 ]; then \
fverinfo=`$(FC) --version | sed -n '1p'`; \
if [ -z "$${fverinfo}" ]; then \
fverinfo=`$(FC) --version | sed -n '2p'`; \
fi; \
echo " Fortran compiler ... $(F_COMPILER) (cmd & version : $${fverinfo})";\
else \
echo " Fortran compiler ... $(F_COMPILER) (command line : $(FC))";\
fi
endif

ifeq ($(OSNAME), WINNT)
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif

ifneq ($(OSNAME), AIX)
@echo -n " Library Name ... $(LIBNAME)"
else
@echo " Library Name ... $(LIBNAME)"
endif

ifndef SMP
@echo " (Single-threading) "
else
@echo " (Multi-threading; Max num-threads is $(NUM_THREADS))"
endif

ifeq ($(DYNAMIC_ARCH), 1)
@echo " Supporting multiple $(ARCH) cpu models with minimum requirement for the common code being $(CORE)"
endif

ifeq ($(USE_OPENMP), 1)
@echo
@echo " Use OpenMP in the multithreading. Because of ignoring OPENBLAS_NUM_THREADS and GOTO_NUM_THREADS flags, "
@echo " you should use OMP_NUM_THREADS environment variable to control the number of threads."
@echo
endif

ifeq ($(OSNAME), Darwin)
@echo "WARNING: If you plan to use the dynamic library $(LIBDYNNAME), you must run:"
@echo
@echo "\"make PREFIX=/your_installation_path/ install\"."
@echo
@echo "(or set PREFIX in Makefile.rule and run make install."
@echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo
@echo "If you want to move the .dylib to a new location later, make sure you change"
@echo "the internal name of the dylib with:"
@echo
@echo "install_name_tool -id /new/absolute/path/to/$(LIBDYNNAME) $(LIBDYNNAME)"
endif
@echo
@echo "To install the library, you can run \"make PREFIX=/path/to/your/installation install\"."
@echo
@echo "Note that any flags passed to make during build should also be passed to make install"
@echo "to circumvent any install errors."
@echo

shared : libs netlib $(RELA)
ifneq ($(NO_SHARED), 1)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@$(MAKE) -C exports so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif
ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@$(MAKE) -C exports so
@$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@$(MAKE) -C exports dyn
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib
@$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@$(MAKE) -C exports dll
endif
ifeq ($(OSNAME), CYGWIN_NT)
@$(MAKE) -C exports dll
endif
ifeq ($(OSNAME), AIX)
@$(MAKE) -C exports so
endif
endif

tests : shared
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
touch $(LIBNAME)
ifndef NO_FBLAS
$(MAKE) -C test all
endif
endif
ifneq ($(ONLY_CBLAS), 1)
$(MAKE) -C utest all
endif
ifneq ($(NO_CBLAS), 1)
ifneq ($(ONLY_CBLAS), 1)
$(MAKE) -C ctest all
endif
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
$(MAKE) -C cpp_thread_test all
endif
endif

libs :
ifeq ($(CORE), UNKNOWN)
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.)
endif
ifeq ($(NOFORTRAN), 1)
$(info OpenBLAS: Detecting fortran compiler failed. Can only compile BLAS and f2c-converted LAPACK.)
endif
ifeq ($(NO_STATIC), 1)
ifeq ($(NO_SHARED), 1)
$(error OpenBLAS: neither static nor shared are enabled.)
endif
endif
@for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
#Save the config files for installation
@cp Makefile.conf Makefile.conf_last
@cp config.h config_last.h
ifdef QUAD_PRECISION
@echo "#define QUAD_PRECISION">> config_last.h
endif
ifeq ($(EXPRECISION), 1)
@echo "#define EXPRECISION">> config_last.h
endif
##
ifeq ($(DYNAMIC_ARCH), 1)
@$(MAKE) -C kernel commonlibs || exit 1
@for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
done
@echo DYNAMIC_ARCH=1 >> Makefile.conf_last
ifeq ($(DYNAMIC_OLDER), 1)
@echo DYNAMIC_OLDER=1 >> Makefile.conf_last
endif
endif
@echo TARGET=$(CORE) >> Makefile.conf_last
ifdef USE_THREAD
@echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last
endif
ifdef SMP
ifdef NUM_THREADS
@echo NUM_THREADS=$(NUM_THREADS) >> Makefile.conf_last
else
@echo NUM_THREADS=$(NUM_CORES) >> Makefile.conf_last
endif
endif
ifeq ($(USE_OPENMP),1)
@echo USE_OPENMP=1 >> Makefile.conf_last
endif
ifeq ($(INTERFACE64),1)
@echo INTERFACE64=1 >> Makefile.conf_last
endif
@echo THELIBNAME=$(LIBNAME) >> Makefile.conf_last
@echo THELIBSONAME=$(LIBSONAME) >> Makefile.conf_last
@-$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
@touch lib.grd

prof : prof_blas prof_lapack

prof_blas :
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d prof || exit 1 ; \
fi; \
done
ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonprof || exit 1
endif

blas :
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ; \
do if test -d $$d; then \
$(MAKE) -C $$d libs || exit 1 ; \
fi; \
done

hpl :
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
for d in $(BLASDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
ifeq ($(DYNAMIC_ARCH), 1)
$(MAKE) -C kernel commonlibs || exit 1
for d in $(DYNAMIC_CORE) ; \
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\
done
endif

hpl_p :
$(LNCMD) $(LIBNAME_P) $(LIBPREFIX)_p.$(LIBSUFFIX)
for d in $(SUBDIRS) ../laswp exports ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done

netlib : lapack_prebuild
ifneq ($(NO_LAPACK), 1)
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapacklib
@$(MAKE) -C $(NETLIB_LAPACK_DIR) tmglib
endif
ifneq ($(NO_LAPACKE), 1)
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapackelib
endif

ifeq ($(NO_LAPACK), 1)
re_lapack :

else
re_lapack :
@$(MAKE) -C relapack
endif

prof_lapack : lapack_prebuild
@$(MAKE) -C $(NETLIB_LAPACK_DIR) lapack_prof

lapack_prebuild :
ifeq ($(NO_LAPACK), $(filter 0,$(NO_LAPACK)))
-@echo "FC = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "override FFLAGS = $(LAPACK_FFLAGS) -fno-tree-vectorize" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "override FFLAGS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "FFLAGS_DRV = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
else
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGIBM1)
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
endif
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "ARFLAGS = $(ARFLAGS) -ru" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "RANLIB = $(RANLIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB = ../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "TMGLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "BLASLIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKELIB = ../../../$(LIBNAME)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LAPACKLIB_P = ../$(LIBNAME_P)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP
ifeq ($(OSNAME), WINNT)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else ifeq ($(OSNAME), Haiku)
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_LAPACK_DEPRECATED), 1)
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_SINGLE), 1)
-@echo "BUILD_SINGLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_DOUBLE), 1)
-@echo "BUILD_DOUBLE = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX), 1)
-@echo "BUILD_COMPLEX = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
ifeq ($(BUILD_COMPLEX16), 1)
-@echo "BUILD_COMPLEX16 = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "LAPACKE_WITH_TMG = 1" >> $(NETLIB_LAPACK_DIR)/make.inc
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif

large.tgz :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
if [ ! -a $< ]; then
-wget http://www.netlib.org/lapack/timing/large.tgz;
fi
endif

timing.tgz :
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
if [ ! -a $< ]; then
-wget http://www.netlib.org/lapack/timing/timing.tgz;
fi
endif

lapack-timing : large.tgz timing.tgz
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING)
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz )
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING
endif


lapack-test :
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out)
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/EIG xeigtstc xeigtstd xeigtsts xeigtstz
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING/LIN xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc
ifneq ($(CROSS), 1)
( cd $(NETLIB_LAPACK_DIR)/INSTALL; $(MAKE) all; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING)
endif

lapack-runtest: lapack-test
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \
./testsecond; ./testdsecnd; ./testieee; ./testversion )
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r -b TESTING )


blas-test:
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && rm -f x* *.out)

$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing
(cd $(NETLIB_LAPACK_DIR)/BLAS/TESTING && cat *.out)


dummy :

install :
$(MAKE) -f Makefile.install install

install_tests :
$(MAKE) -f Makefile.install install_tests

clean ::
@for d in $(SUBDIRS_ALL) ; \
do if test -d $$d; then \
$(MAKE) -C $$d $(@F) || exit 1 ; \
fi; \
done
#ifdef DYNAMIC_ARCH
@$(MAKE) -C kernel clean
#endif
@$(MAKE) -C reference clean
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf $(LIBPREFIX).$(LIBSUFFIX) $(LIBPREFIX)_p.$(LIBSUFFIX) $(LIBPREFIX).so.$(MAJOR_VERSION) *.lnk myconfig.h *.so.renamed *.a.renamed *.so.0
ifeq ($(OSNAME), Darwin)
@rm -rf getarch.dSYM getarch_2nd.dSYM
endif
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib
@rm -f cblas.tmp cblas.tmp2
@touch $(NETLIB_LAPACK_DIR)/make.inc
@$(MAKE) -C $(NETLIB_LAPACK_DIR) clean
@rm -f $(NETLIB_LAPACK_DIR)/make.inc
@$(MAKE) -C relapack clean
@rm -f *.grd Makefile.conf_last config_last.h
@(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out testing_results.txt)
@echo Done.

+ 0
- 39
Makefile.alpha View File

@@ -1,39 +0,0 @@
ifneq ($(COMPILER), NATIVE)
# GCC User
ifeq ($(CORE), EV4)
CCOMMON_OPT += -mcpu=ev4
endif
ifeq ($(CORE), EV5)
CCOMMON_OPT += -mcpu=ev5
endif
ifeq ($(CORE), EV6)
CCOMMON_OPT += -mcpu=ev6
endif
else
# Compaq Compiler User
ifeq ($(CORE), EV4)
CCOMMON_OPT += -tune ev4 -arch ev4
endif
ifeq ($(CORE), EV5)
CCOMMON_OPT += -tune ev5 -arch ev5
endif
ifeq ($(CORE), EV6)
CCOMMON_OPT += -tune ev6 -arch ev6
endif
endif

ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -mieee
endif

ifeq ($(F_COMPILER), G77)
FCOMMON_OPT += -mieee
endif

ifndef SMP
LIBCXML = -lcxml -lots -lm
LIBATLAS = -L/usr/lib/atlas3.7.8 -lf77blas -latlas -lm
else
LIBCXML = -lcxmlp -lots -lm
LIBATLAS = -L/usr/lib/atlas3.7.8p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
endif

+ 0
- 19
Makefile.arm View File

@@ -1,19 +0,0 @@
ifeq ($(CORE), $(filter $(CORE),ARMV7 CORTEXA9 CORTEXA15))
ifeq ($(OSNAME), Android)
CCOMMON_OPT += -mfpu=neon -march=armv7-a
FCOMMON_OPT += -mfpu=neon -march=armv7-a
else
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a
endif
endif

ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp
FCOMMON_OPT += -mfpu=vfp
endif

ifdef HAVE_NEON
CCOMMON_OPT += -mfpu=neon
FCOMMON_OPT += -mfpu=neon
endif

+ 0
- 429
Makefile.arm64 View File

@@ -1,429 +0,0 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

ifneq ($(C_COMPILER), PGI)

ifeq ($(C_COMPILER), CLANG)
ISCLANG=1
endif
ifeq ($(C_COMPILER), FUJITSU)
ISCLANG=1
endif
ifneq (1, $(filter 1,$(GCCVERSIONGT4) $(ISCLANG)))
CCOMMON_OPT += -march=armv8-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a
endif


else


ifeq ($(CORE), ARMV8)
CCOMMON_OPT += -march=armv8-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a
endif
endif

ifeq ($(CORE), ARMV8SVE)
CCOMMON_OPT += -march=armv8-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a+sve
endif
endif

ifeq ($(CORE), ARMV9SME)
CCOMMON_OPT += -march=armv9-a+sve2+sme
FCOMMON_OPT += -march=armv9-a+sve2
endif

ifeq ($(CORE), CORTEXA53)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif

ifeq ($(CORE), CORTEXA57)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a57
endif
endif

ifeq ($(CORE), CORTEXA72)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif

ifeq ($(CORE), CORTEXA73)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a73
endif
endif

ifeq ($(CORE), CORTEXA76)
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a76
endif
endif

ifeq ($(CORE), FT2000)
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif

# Use a72 tunings because Neoverse-N1 is only available
# in GCC>=9
ifeq ($(CORE), NEOVERSEN1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif

# Use a72 tunings because Neoverse-V1 is only available
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEV1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
ifeq (1, $(ISCLANG))
CCOMMON_OPT += -mtune=cortex-x1
else
CCOMMON_OPT += -mtune=neoverse-v1
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-v1
endif
else
CCOMMON_OPT += -march=armv8.4-a+sve+bf16
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a+sve -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif

# Use a72 tunings because Neoverse-N2 is only available
# in GCC>=10.4
ifeq ($(CORE), NEOVERSEN2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ11) $(ISCLANG)))
ifneq ($(OSNAME), Darwin)
CCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
else
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a+sve+sve2+bf16 -mtune=neoverse-n2
endif
else
CCOMMON_OPT += -march=armv8.5-a+sve+bf16
ifneq ($(CROSS), 1)
CCOMMON_OPT += -mtune=native
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.5-a
ifneq ($(CROSS), 1)
FCOMMON_OPT += -mtune=native
endif
endif
endif
else
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
else
CCOMMON_OPT += -march=armv8-a+sve+bf16 -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a72
endif
endif
endif

# Detect ARM Neoverse V2.
ifeq ($(CORE), NEOVERSEV2)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ13) $(ISCLANG)))
CCOMMON_OPT += -mcpu=neoverse-v2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mcpu=neoverse-v2
endif
else
CCOMMON_OPT += -march=armv8.2-a+sve+bf16 -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=neoverse-n1
endif
endif
endif

# Detect Ampere AmpereOne(ampere1,ampere1a) processors.
ifeq ($(CORE), AMPERE1)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.6-a+crypto+crc+fp16+sha3+rng
endif
endif
endif

# Use a53 tunings because a55 is only available in GCC>=8.1
ifeq ($(CORE), CORTEXA55)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ7) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ8) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a55
endif
else
CCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a53
endif
endif
else
CCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=cortex-a53
endif
endif
endif

ifeq ($(CORE), THUNDERX)
CCOMMON_OPT += -march=armv8-a -mtune=thunderx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=thunderx
endif
endif

ifeq ($(CORE), FALKOR)
CCOMMON_OPT += -march=armv8-a -mtune=falkor
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=falkor
endif
endif

ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif

ifeq ($(CORE), THUNDERX3T110)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.3-a
ifeq (0, $(ISCLANG))
CCOMMON_OPT += -mtune=thunderx3t110
else
CCOMMON_OPT += -mtune=thunderx2t99
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a -mtune=thunderx3t110
endif
else
CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99
endif
endif
endif

ifeq ($(CORE), VORTEX)
CCOMMON_OPT += -march=armv8.3-a
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.3-a
endif
endif

ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(CORE), TSV110)
CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110
endif
endif
endif

ifeq (1, $(filter 1,$(GCCVERSIONGTEQ9) $(ISCLANG)))
ifeq ($(CORE), EMAG8180)
CCOMMON_OPT += -march=armv8-a
ifeq ($(ISCLANG), 0)
CCOMMON_OPT += -mtune=emag
endif
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8-a -mtune=emag
endif
endif
endif

ifeq ($(CORE), A64FX)
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ10) $(ISCLANG)))
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ3) $(GCCVERSIONGTEQ11) $(ISCLANG)))
CCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a+sve -mtune=a64fx
endif
else
CCOMMON_OPT += -march=armv8.4-a+sve -mtune=neoverse-n1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a -mtune=neoverse-n1
endif
endif
endif
endif

ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX1)
CCOMMON_OPT += -march=armv8.2-a
ifeq (1, $(filter 1,$(GCCMINORVERSIONGTEQ4) $(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-x1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-x1
endif
else
CCOMMON_OPT += -mtune=cortex-a72
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.2-a -mtune=cortex-a72
endif
endif
endif
endif

ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXX2)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-x2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mtune=cortex-x2
endif
endif
endif
endif

#ifeq (1, $(filter 1,$(ISCLANG)))
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA510)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
endif
endif

ifeq (1, $(filter 1,$(GCCVERSIONGTEQ11) $(ISCLANG)))
ifeq ($(CORE), CORTEXA710)
CCOMMON_OPT += -march=armv8.4-a+sve
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=armv8.4-a+sve
endif
ifeq (1, $(filter 1,$(GCCVERSIONGTEQ12) $(ISCLANG)))
CCOMMON_OPT += -mtune=cortex-a710
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mtune=cortex-a710
endif
endif
endif
endif

endif

else
# NVIDIA HPC options necessary to enable SVE in the compiler
ifeq ($(CORE), THUNDERX2T99)
CCOMMON_OPT += -tp=thunderx2t99
FCOMMON_OPT += -tp=thunderx2t99
endif
ifeq ($(CORE), NEOVERSEN1)
CCOMMON_OPT += -tp=neoverse-n1
FCOMMON_OPT += -tp=neoverse-n1
endif
ifeq ($(CORE), NEOVERSEV1)
CCOMMON_OPT += -tp=neoverse-v1
FCOMMON_OPT += -tp=neoverse-v1
endif
ifeq ($(CORE), NEOVERSEV2)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif
ifeq ($(CORE), ARMV8SVE)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif
ifeq ($(CORE), ARMV9SVE)
CCOMMON_OPT += -tp=neoverse-v2
FCOMMON_OPT += -tp=neoverse-v2
endif

endif

+ 0
- 4
Makefile.csky View File

@@ -1,4 +0,0 @@
ifeq ($(CORE), CK860FV)
CCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
FCOMMON_OPT += -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float -static
endif

+ 0
- 1
Makefile.e2k View File

@@ -1 +0,0 @@
COPT = -Wall -O2 # -DGEMMTEST

+ 0
- 1
Makefile.generic View File

@@ -1 +0,0 @@
COPT = -Wall -O2 # -DGEMMTEST

+ 0
- 22
Makefile.ia64 View File

@@ -1,22 +0,0 @@
CCOMMON_COPT += # -DUSE64BITINT # -DGEMMTEST

# CCOMMON_OPT += -DPARAMTEST
FLAMEPATH = $(HOME)/flame/lib/ia64

ifndef SMP
LIBMKL = -L$(MKLPATH)/64 -Wl,-rpath,$(MKLPATH)/64 -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lguide -lpthread -lm
else
LIBMKL = -L$(MKLPATH)/64 -Wl,-rpath,$(MKLPATH)/64 -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm
endif

LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame $(TOPDIR)/$(LIBNAME) -lgfortran -lpthread -lm

LIBMLIB = ../../level1/others/libmisc.a -L/opt/intel/fc/ia64/9.1.040/lib -L/opt/mlib/lib \
-llapack -lguide -lifcore -lm -lpthread
LIBSCSL = -L/opt/scsl/1.4.1.0/lib -Wl,-rpath,/opt/scsl/1.4.1.0/lib -lscs

ifndef SMP
LIBATLAS = -L/usr/lib/atlas3.6.0 -lf77blas -latlas -lm
else
LIBATLAS = -L$(HOME)/misc/lib -L/usr/lib/atlas3.6.0p -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
endif

+ 0
- 345
Makefile.install View File

@@ -1,345 +0,0 @@
TOPDIR = .
export GOTOBLAS_MAKEFILE = 1
-include $(TOPDIR)/Makefile.conf_last
include ./Makefile.system
LNCMD = ln -fs

ifdef THELIBNAME
LIBNAME=$(THELIBNAME)
LIBSONAME=$(THELIBSONAME)
endif
ifeq ($(FIXED_LIBNAME), 1)
LNCMD = true
endif
ifeq ($(INTERFACE64),1)
USE_64BITINT=1
endif
ifeq ($(USE_OPENMP),1)
FOMP_OPT:= -fopenmp
endif

PREFIX ?= /opt/OpenBLAS

OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
PKG_EXTRALIB := $(EXTRALIB)
ifeq ($(INTERFACE64),1)
SUFFIX64=64
endif
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"

ifeq ($(USE_OPENMP), 1)
ifeq ($(C_COMPILER), PGI)
PKG_EXTRALIB += -lomp
else
PKG_EXTRALIB += -lgomp
endif
endif

.PHONY : install
.NOTPARALLEL : install

lib.grd :
$(error OpenBLAS: Please run "make" firstly)

install : lib.grd
@-mkdir -p "$(DESTDIR)$(PREFIX)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)"
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
#for inc
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h"

@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h"

ifneq ($(NO_CBLAS),1)
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@cp cblas.h cblas.tmp
ifdef SYMBOLPREFIX
@sed 's/cblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/$(SYMBOLPREFIX)openblas_complex_/openblas_complex_/g' cblas.tmp > cblas.tmp2
@sed 's/goto[^() ]*/$(SYMBOLPREFIX)&/g' cblas.tmp2 > cblas.tmp
endif
ifdef SYMBOLSUFFIX
@sed 's/cblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp > cblas.tmp2
@sed 's/openblas[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
#change back any openblas_complex_float and double that got hit
@sed 's/\(openblas_complex_\)\([^ ]*\)$(SYMBOLSUFFIX)/\1\2 /g' cblas.tmp > cblas.tmp2
@sed 's/goto[^() ]*/&$(SYMBOLSUFFIX)/g' cblas.tmp2 > cblas.tmp
endif
@sed 's/common/openblas_config/g' cblas.tmp > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h"
endif

ifneq ($(OSNAME), AIX)
ifneq ($(NO_LAPACKE), 1)
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-install -m644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif

#for install static library
ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@install -m644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS Android Haiku FreeBSD DragonFly))
@install -m755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif

ifeq ($(OSNAME), $(filter $(OSNAME),OpenBSD NetBSD))
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so
endif
ifeq ($(OSNAME), Darwin)
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@-install_name_tool -id "$(OPENBLAS_LIBRARY_DIR)/$(LIBPREFIX).$(MAJOR_VERSION).dylib" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).dylib ; \
$(LNCMD) $(LIBDYNNAME) $(LIBPREFIX).$(MAJOR_VERSION).dylib
endif
ifeq ($(OSNAME), WINNT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
endif
ifeq ($(OSNAME), CYGWIN_NT)
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)"
@-cp $(IMPLIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
endif
endif

else
#install on AIX has different options syntax
ifneq ($(NO_LAPACKE), 1)
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapack.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapack.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h"
@-installbsd -c -m 644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h"
endif

#for install static library
ifneq ($(NO_STATIC),1)
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX)
endif
#for install shared library
ifneq ($(NO_SHARED),1)
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)
@installbsd -c -m 755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)"
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so ; \
$(LNCMD) $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION)
endif

endif

#Generating openblas.pc
ifeq ($(INTERFACE64),1)
SUFFIX64=64
endif
PKGFILE="$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE)$(SUFFIX64).pc"

@echo Generating $(LIBSONAMEBASE)$(SUFFIX64).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(PKGFILE)"
@echo 'libprefix='$(LIBNAMEPREFIX) >> "$(PKGFILE)"
@echo 'libnamesuffix='$(LIBNAMESUFFIX) >> "$(PKGFILE)"
@echo 'libsuffix='$(SYMBOLSUFFIX) >> "$(PKGFILE)"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(PKGFILE)"
@echo 'omp_opt='$(FOMP_OPT) >> "$(PKGFILE)"
@echo 'openblas_config= USE_64BITINT='$(INTERFACE64) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(TARGET) 'MAX_THREADS='$(NUM_THREADS)>> "$(PKGFILE)"
@echo 'version='$(VERSION) >> "$(PKGFILE)"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(PKGFILE)"
@cat openblas.pc.in >> "$(PKGFILE)"


#Generating OpenBLASConfig.cmake
ifneq ($(origin _OpenBLAS_ROOT_DIR),"undefined")

@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "file(REAL_PATH \"../../..\" _OpenBLAS_ROOT_DIR BASE_DIRECTORY \$${CMAKE_CURRENT_LIST_DIR} )" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS \$${_OpenBLAS_ROOT_DIR}/include)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"

ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/bin/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
@echo "add_library(OpenBLAS::OpenBLAS SHARED IMPORTED)"
@echo "target_include_directories(OpenBLAS::OpenBLAS INTERFACE \$${OpenBLAS_INCLUDE_DIRS})"
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_LOCATION \$${OpenBLAS_LIBRARIES})"
@echo "set_property(TARGET OpenBLAS::OpenBLAS PROPERTY IMPORTED_IMPLIB \$${_OpenBLAS_ROOT_DIR}/lib/libopenblas.lib)"
endif
else
#only static
@echo "SET(OpenBLAS_LIBRARIES \$${_OpenBLAS_ROOT_DIR}/lib/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
else
echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
ifneq ($(NO_SHARED),1)
#ifeq logical or
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD OpenBSD DragonFly))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX)$(SYMBOLSUFFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT))
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
ifeq ($(OSNAME), Darwin)
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
else
#only static
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)"
endif
endif
endif

#Generating OpenBLASConfigVersion.cmake
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR)
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)"
@echo Install OK!

install_tests : lib.grd
ifneq ($(ONLY_CBLAS), 1)
@install -m 666 utest/openblas_utest $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 utest/openblas_utest_ext $(DESTDIR)$(OPENBLAS_BINARY_DIR)
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifndef NO_FBLAS
ifeq ($(BUILD_BFLOAT16),1)
@install -m 666 test/test_sbgemm $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
ifeq ($(BUILD_SINGLE),1)
@install -m 666 test/sblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/sblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/sblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/sblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/sblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
ifeq ($(BUILD_DOUBLE),1)
@install -m 666 test/dblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/dblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/dblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/dblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/dblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
ifeq ($(BUILD_COMPLEX),1)
@install -m 666 test/cblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/cblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/cblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/cblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/cblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS))
@install -m 666 test/cblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/cblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
endif
ifeq ($(BUILD_COMPLEX16),1)
@install -m 666 test/zblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/zblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/zblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/zblat2.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/zblat3.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS))
@install -m 666 test/zblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 test/zblat3_3m.dat $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
endif
endif
endif
ifneq ($(ONLY_CBLAS), 1)
ifeq ($(BUILD_SINGLE),1)
@install -m 666 ctest/xscblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xscblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xscblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/sin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/sin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
ifeq ($(BUILD_DOUBLE),1)
@install -m 666 ctest/xdcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xdcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xdcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/din2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/din3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
ifeq ($(BUILD_COMPLEX),1)
@install -m 666 ctest/xccblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xccblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xccblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/cin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/cin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS))
@install -m 666 ctest/xccblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/cin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
endif
ifeq ($(BUILD_COMPLEX16),1)
@install -m 666 ctest/xzcblat1 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xzcblat2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/xzcblat3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/zin2 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/zin3 $(DESTDIR)$(OPENBLAS_BINARY_DIR)
ifeq ($(ARCH), filter($(ARCH), x86 x86_64 ia64 MIPS))
@install -m 666 ctest/xzcblat3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 ctest/zin3_3m $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
endif

endif
ifeq ($(CPP_THREAD_SAFETY_TEST), 1)
@install -m 666 cpp_thread_test/dgemm_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR)
@install -m 666 cpp_thread_test/dgemv_tester $(DESTDIR)$(OPENBLAS_BINARY_DIR)
endif
endif


+ 0
- 3
Makefile.loongarch64 View File

@@ -1,3 +0,0 @@
ifdef BINARY64
else
endif

+ 0
- 4
Makefile.mips View File

@@ -1,4 +0,0 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64
else
endif

+ 0
- 4
Makefile.mips64 View File

@@ -1,4 +0,0 @@
MSA_FLAGS = -mmsa -mfp64 -mload-store-pairs
ifdef BINARY64
else
endif

+ 0
- 224
Makefile.power View File

@@ -1,224 +0,0 @@

ifdef USE_THREAD
ifeq ($(USE_THREAD), 0)
USE_OPENMP = 0
else
USE_OPENMP = 1
endif
else
USE_OPENMP = 1
endif

ifeq ($(CORE), POWER10)
ifneq ($(C_COMPILER), PGI)
ifeq ($(C_COMPILER), GCC)
ifeq ($(GCCVERSIONGTEQ10), 1)
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
else ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
$(warning your compiler is too old to fully support POWER10, getting a newer version of gcc is recommended)
CCOMMON_OPT += -O3 -mcpu=power9 -mtune=power9 -mvsx -fno-fast-math
endif
else
CCOMMON_OPT += -O3 -mcpu=power10 -mtune=power10 -mvsx -fno-fast-math
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr10 -qtune=pwr10 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power10 -mtune=power10 -fno-fast-math
endif
endif
endif

ifeq ($(CORE), POWER9)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -O3 -mvsx -fno-fast-math
ifeq ($(C_COMPILER), GCC)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
CCOMMON_OPT += -mcpu=power8 -mtune=power8
else
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr9 -qtune=pwr9 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -fno-fast-math -mcpu=power9 -mtune=power9
endif

ifeq ($(F_COMPILER), GFORTRAN)
ifneq ($(GCCVERSIONGT4), 1)
$(warning your compiler is too old to fully support POWER9, getting a newer version of gcc is recommended)
FCOMMON_OPT += -mcpu=power8 -mtune=power8
else
FCOMMON_OPT += -mcpu=power9 -mtune=power9
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
endif
endif

ifeq ($(CORE), POWER8)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -O3 -mcpu=power8 -mtune=power8 -mvsx -fno-fast-math
else
CCOMMON_OPT += -fast -Mvect=simd -Mcache_align
endif
ifneq ($(F_COMPILER), PGI)
ifeq ($(OSNAME), AIX)
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O1 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
else
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -O2 -qrecur -qnosave -qarch=pwr8 -qtune=pwr8 -qfloat=nomaf -qzerosize
else
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -fno-fast-math
endif
endif
else
FCOMMON_OPT += -O2 -Mrecursive
endif
endif

ifeq ($(USE_OPENMP), 1)
ifneq ($(C_COMPILER), PGI)
CCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
CCOMMON_OPT += -DUSE_OPENMP -mp
endif
ifeq ($(F_COMPILER), IBM)
FCOMMON_OPT += -DUSE_OPENMP
else
ifneq ($(F_COMPILER), PGI)
FCOMMON_OPT += -DUSE_OPENMP -fopenmp
else
FCOMMON_OPT += -DUSE_OPENMP -mp
endif
endif
endif

ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -fno-integrated-as
endif
# workaround for C->FORTRAN ABI violation in LAPACKE
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -fno-optimize-sibling-calls
endif

FLAMEPATH = $(HOME)/flame/lib

#ifeq ($(CORE), CELL)
#CELL_SDK_ROOT = /opt/IBM/cell-sdk-1.1/sysroot/usr
#SPU_CC = spu-gcc
#EXTRALIB += -lspe
#endif

ifeq ($(OSNAME), Linux)
ifdef BINARY64
# COMPILER_PREFIX = powerpc64-linux-
else
# COMPILER_PREFIX = powerpc-linux-
endif
endif

#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library
#USE_MASS = 1

ifeq ($(USE_MASS), 1)
# Path to MASS libs, change it if the libs are installed at any other location
MASSPATH = /opt/ibm/xlmass/8.1.5/lib
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8
endif

ifdef BINARY64


ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), GCCIBMAIX)
$(error Using GCC and XLF on AIX is not a supported combination.)
endif
ifeq ($(C_COMPILER)$(F_COMPILER)$(OSNAME), CLANGGFORTRANAIX)
$(error Using Clang and gFortran on AIX is not a supported combination.)
endif

ifeq ($(OSNAME), AIX)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mpowerpc64 -maix64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(F_COMPILER), GFORTRAN)
FCOMMON_OPT += -mpowerpc64 -maix64
endif
ifeq ($(COMPILER_F77), xlf)
FCOMMON_OPT += -q64
endif
ARFLAGS = -X 64
ASFLAGS = -a64
endif
else
ifeq ($(OSNAME), AIX)
CCOMMON_OPT += -Wa,-a32
ARFLAGS = -X 32
ASFLAGS = -a32
endif
endif

# CCOMMON_OPT += -maltivec -mabi=altivec

LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS)

ifeq ($(OSNAME), Darwin)
CCOMMON_OPT += -force_cpusubtype_ALL
endif


ifndef BINARY64
ifeq ($(OSNAME), Linux)
ESSLPATH = -L/opt/ibmcmp/lib -L/opt/ibmcmp/xlf/11.1/lib -Wl,-rpath,/opt/ibmcmp/lib -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib -lxlf90_r -lxlomp_ser -lxlfmath -lxl -lpthread
else
ESSLPATH = -lxlf90_r
endif


LIBVECLIB = -framework VecLib
ifndef SMP
LIBATLAS = -L/usr/lib/atlas3.7.11 -lf77blas -latlas -lg2c -lm
LIBESSL = -lessl $(ESSLPATH) ../../level1/others/libmisc.a -lm
else
LIBATLAS = -L/usr/lib/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
LIBESSL = -lesslsmp $(ESSLPATH) ../../level1/others/libmisc.a -lm
endif
else
ifeq ($(OSNAME), Linux)
ESSLPATH = -L/opt/ibmcmp/lib64 -Wl,-rpath,/opt/ibmcmp/lib64 -L/opt/ibmcmp/xlf/11.1/lib64 -Wl,-rpath,/opt/ibmcmp/xlf/11.1/lib64 -lxlf90_r -lxlomp_ser
else
ESSLPATH = -lxlf90_r
endif

LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib

ifndef SMP
LIBATLAS = -L/usr/lib64/atlas3.7.11 -lf77blas -latlas -lg2c -lm
LIBESSL = -lessl $(ESSLPATH) -lm
else
LIBATLAS = -L/usr/lib64/atlas3.7.11p -lptf77blas -latlas -lm -lpthread
LIBESSL = -lesslsmp $(ESSLPATH) -lxlsmp -lm
endif
endif

+ 0
- 113
Makefile.prebuild View File

@@ -1,113 +0,0 @@
# This is triggered by Makefile.system and runs before any of the code is built.

export BINARY
export USE_OPENMP

ifdef DYNAMIC_ARCH
override HOST_CFLAGS += -DDYNAMIC_ARCH
endif

ifdef TARGET_CORE
TARGET_MAKE = Makefile_kernel.conf
TARGET_CONF = config_kernel.h
else
TARGET_MAKE = Makefile.conf
TARGET_CONF = config.h
endif

ifdef USE_PERL
SCRIPTSUFFIX = .pl
else
SCRIPTSUFFIX =
endif

# CPUIDEMU = ../../cpuid/table.o

ifdef CPUIDEMU
EXFLAGS = -DCPUIDEMU -DVENDOR=99
endif

ifeq ($(TARGET), MIPS24K)
TARGET_FLAGS = -mips32r2
endif

ifeq ($(TARGET), MIPS1004K)
TARGET_FLAGS = -mips32r2
endif

ifeq ($(TARGET), P5600)
TARGET_FLAGS = -mips32r5
endif

ifeq ($(TARGET), I6400)
TARGET_FLAGS = -mips64r6
endif

ifeq ($(TARGET), P6600)
TARGET_FLAGS = -mips64r6
endif

ifeq ($(TARGET), I6500)
TARGET_FLAGS = -mips64r6
endif

ifeq ($(TARGET), C910V)
TARGET_FLAGS = -march=rv64gcv0p7_zfh_xtheadc -mabi=lp64d
endif

ifeq ($(TARGET), CK860FV)
TARGET_FLAGS = -march=ck860v -mcpu=ck860fv -mfdivdu -mhard-float
endif

ifeq ($(TARGET), x280)
TARGET_FLAGS = -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d
endif

ifeq ($(TARGET), RISCV64_ZVL256B)
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
endif

ifeq ($(TARGET), RISCV64_ZVL128B)
TARGET_FLAGS = -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
endif

ifeq ($(TARGET), RISCV64_GENERIC)
TARGET_FLAGS = -march=rv64imafdc -mabi=lp64d
endif

all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF)

$(TARGET_CONF): c_check$(SCRIPTSUFFIX) f_check$(SCRIPTSUFFIX) getarch
./c_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(CC)" $(TARGET_FLAGS) $(CFLAGS)
ifneq ($(ONLY_CBLAS), 1)
./f_check$(SCRIPTSUFFIX) $(TARGET_MAKE) $(TARGET_CONF) "$(FC)" $(TARGET_FLAGS)
else
#When we only build CBLAS, we set NOFORTRAN=2
echo "NOFORTRAN=2" >> $(TARGET_MAKE)
echo "NO_FBLAS=1" >> $(TARGET_MAKE)
echo "F_COMPILER=GFORTRAN" >> $(TARGET_MAKE)
echo "BU=_" >> $(TARGET_MAKE)
echo "#define BUNDERSCORE _" >> $(TARGET_CONF)
echo "#define NEEDBUNDERSCORE 1" >> $(TARGET_CONF)
endif
./getarch 0 >> $(TARGET_MAKE)
./getarch 1 >> $(TARGET_CONF)


getarch : getarch.c cpuid.S dummy $(CPUIDEMU)
avx512=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_AVX512); \
rv64gv=$$(./c_check$(SCRIPTSUFFIX) - - "$(CC)" $(TARGET_FLAGS) $(CFLAGS) | grep NO_RV64GV); \
$(HOSTCC) $(HOST_CFLAGS) $(EXFLAGS) $${avx512:+-D$${avx512}} $${rv64gv:+-D$${rv64gv}} -o $(@F) getarch.c cpuid.S $(CPUIDEMU)

getarch_2nd : getarch_2nd.c $(TARGET_CONF) dummy
ifndef TARGET_CORE
$(HOSTCC) -I. $(HOST_CFLAGS) -o $(@F) getarch_2nd.c
else
$(HOSTCC) -I. $(HOST_CFLAGS) -DBUILD_KERNEL -o $(@F) getarch_2nd.c
endif

dummy:

.PHONY: dummy

+ 0
- 20
Makefile.riscv64 View File

@@ -1,20 +0,0 @@
ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920
FCOMMON_OPT += -march=rv64imafdcv0p7_zfh_xtheadc -mabi=lp64d -mtune=c920 -static
endif
ifeq ($(CORE), x280)
CCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh_zvl512b -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv_zba_zbb_zfh -mabi=lp64d -static
endif
ifeq ($(CORE), RISCV64_ZVL256B)
CCOMMON_OPT += -march=rv64imafdcv_zvl256b_zvfh_zfh -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
endif
ifeq ($(CORE), RISCV64_ZVL128B)
CCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdcv_zvfh_zfh -mabi=lp64d
endif
ifeq ($(CORE), RISCV64_GENERIC)
CCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
FCOMMON_OPT += -march=rv64imafdc -mabi=lp64d
endif

+ 0
- 337
Makefile.rule View File

@@ -1,337 +0,0 @@
#
# Beginning of user configuration
#

# This library's version
VERSION = 0.3.30.dev

# If you set this prefix, the library name will be lib$(LIBNAMESUFFIX)openblas.a
# and lib$(LIBNAMESUFFIX)openblas.so, with a matching soname in the shared library
#
# LIBNAMEPREFIX = scipy

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library
# is libopenblas_$(LIBNAMESUFFIX).so.0.
# LIBNAMESUFFIX = omp

# You can specify the target architecture, otherwise it's
# automatically detected.
# TARGET = PENRYN

# If you want to support multiple architecture in one binary
# DYNAMIC_ARCH = 1

# If you want the full list of x86_64 architectures supported in DYNAMIC_ARCH
# mode (including individual optimizied codes for PENRYN, DUNNINGTON, OPTERON,
# OPTERON_SSE3, ATOM and NANO rather than fallbacks to older architectures)
# DYNAMIC_OLDER = 1

# C compiler including binary type(32bit / 64bit). Default is gcc.
# Don't use Intel Compiler or PGI, it won't generate right codes as I expect.
# CC = gcc

# Fortran compiler. Default is g77.
# FC = gfortran

# Even you can specify cross compiler. Meanwhile, please set HOSTCC.

# cross compiler for Windows
# CC = x86_64-w64-mingw32-gcc
# FC = x86_64-w64-mingw32-gfortran

# cross compiler for 32bit ARM
# CC = arm-linux-gnueabihf-gcc
# FC = arm-linux-gnueabihf-gfortran

# cross compiler for 64bit ARM
# CC = aarch64-linux-gnu-gcc
# FC = aarch64-linux-gnu-gfortran


# If you use the cross compiler, please set this host compiler.
# HOSTCC = gcc

# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64
# Please note that AVX is not available on 32-bit.
# Setting BINARY=32 disables AVX/AVX2/AVX-512.
# BINARY=64

# About threaded BLAS. It will be automatically detected if you don't
# specify it.
# For force setting for single threaded, specify USE_THREAD = 0
# For force setting for multi threaded, specify USE_THREAD = 1
# USE_THREAD = 0

# If you want to build a single-threaded OpenBLAS, but expect to call this
# from several concurrent threads in some other program, comment this in for
# thread safety. (This is done automatically for USE_THREAD=1 , and should not
# be necessary when USE_OPENMP=1)
# USE_LOCKING = 1

# If you're going to use this library with OpenMP, please comment it in.
# This flag is always set for POWER8. Don't set USE_OPENMP = 0 if you're targeting POWER8.
# USE_OPENMP = 1

# The OpenMP scheduler to use - by default this is "static" and you
# will normally not want to change this unless you know that your main
# workload will involve tasks that have highly unbalanced running times
# for individual threads. Changing away from "static" may also adversely
# affect memory access locality in NUMA systems. Setting to "runtime" will
# allow you to select the scheduler from the environment variable OMP_SCHEDULE
# CCOMMON_OPT += -DOMP_SCHED=dynamic

# You can define the maximum number of threads. Basically it should be less
# than or equal to the number of CPU threads. If you don't specify one, it's
# automatically detected by the build system.
# If SMT (aka. HT) is enabled on the system, it may or may not be beneficial to
# restrict NUM_THREADS to the number of physical cores. By default, the automatic
# detection includes logical CPUs, thus allowing the use of SMT.
# Users may opt at runtime to use less than NUM_THREADS threads.
#
# Note for package maintainers: you can build OpenBLAS with a large NUM_THREADS
# value (eg. 32-256) if you expect your users to use that many threads. Due to the way
# some internal structures are allocated, using a large NUM_THREADS value has a RAM
# footprint penalty, even if users reduce the actual number of threads at runtime.
# NUM_THREADS = 24

# If you have enabled USE_OPENMP and your application would call
# OpenBLAS's calculation API from multiple threads, please comment this in.
# This flag defines how many instances of OpenBLAS's calculation API can actually
# run in parallel. If more than NUM_PARALLEL threads call OpenBLAS's calculation API,
# they need to wait for the preceding API calls to finish or risk data corruption.
# NUM_PARALLEL = 2

# When multithreading, OpenBLAS needs to use a memory buffer for communicating
# and collating results for individual subranges of the original matrix. Since
# the original GotoBLAS of the early 2000s, the default size of this buffer has
# been set at a value of 32<<20 (which is 32MB) on x86_64 , twice that on PPC.
# If you expect to handle large problem sizes (beyond about 30000x30000) uncomment
# this line and adjust the (32<<n) factor if necessary. Usually an insufficient value
# manifests itself as a crash in the relevant scal kernel (sscal_k, dscal_k etc)
# BUFFERSIZE = 25

# If you don't need to install the static library, please comment this in.
# NO_STATIC = 1

# If you don't need to generate the shared library, please comment this in.
# NO_SHARED = 1

# If you don't need the CBLAS interface, please comment this in.
# NO_CBLAS = 1

# If you only want the CBLAS interface without installing a Fortran compiler,
# please comment this in.
# ONLY_CBLAS = 1

# If you don't need LAPACK, please comment this in.
# If you set NO_LAPACK=1, the build system automatically sets NO_LAPACKE=1.
# NO_LAPACK = 1

# If you don't need LAPACKE (C Interface to LAPACK), please comment this in.
# NO_LAPACKE = 1

# Build LAPACK Deprecated functions since LAPACK 3.6.0
BUILD_LAPACK_DEPRECATED = 1

# The variable type assumed for the length of character arguments when passing
# data between Fortran LAPACK and C BLAS (defaults to "size_t", but older GCC
# versions used "int"). Mismatches will not cause runtime failures but may result
# in build warnings or errors when building with link-time optimization (LTO)
# LAPACK_STRLEN=int

# Build RecursiveLAPACK on top of LAPACK
# BUILD_RELAPACK = 1
# Have RecursiveLAPACK actually replace standard LAPACK routines instead of
# just adding its equivalents with a RELAPACK_ prefix
# RELAPACK_REPLACE = 1

# If you want to use the legacy threaded Level 3 implementation.
# USE_SIMPLE_THREADED_LEVEL3 = 1

# If you want to use the new, still somewhat experimental code that uses
# thread-local storage instead of a central memory buffer in memory.c
# Note that if your system uses GLIBC, it needs to have at least glibc 2.21
# for this to work.
# USE_TLS = 1

# If you want to drive whole 64bit region by BLAS. Not all Fortran
# compilers support this. It's safe to keep this commented out if you
# are not sure. (This is equivalent to the "-i8" ifort option).
# INTERFACE64 = 1

# Unfortunately most of kernel won't give us high quality buffer.
# BLAS tries to find the best region before entering main function,
# but it will consume time. If you don't like it, you can disable one.
NO_WARMUP = 1

# Comment this in if you want to disable OpenBLAS's CPU/Memory affinity handling.
# This feature is only implemented on Linux, and is always disabled on other platforms.
# Enabling affinity handling may improve performance, especially on NUMA systems, but
# it may conflict with certain applications that also try to manage affinity.
# This conflict can result in threads of the application calling OpenBLAS ending up locked
# to the same core(s) as OpenBLAS, possibly binding all threads to a single core.
# For this reason, affinity handling is disabled by default. Can be safely enabled if nothing
# else modifies affinity settings.
# Note: enabling affinity has been known to cause problems with NumPy and R
NO_AFFINITY = 1

# If you are compiling for Linux and you have more than 16 numa nodes or more than 256 cpus
# BIGNUMA = 1

# If you are compiling for an embedded system ("bare metal") like Cortex M series
# Note that you will have to provide implementations of malloc() and free() in this case
# EMBEDDED = 1

# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers
# and OS. However, the performance is low.
# NO_AVX = 1

# Don't use Haswell optimizations if binutils is too old (e.g. RHEL6)
# NO_AVX2 = 1

# Don't use SkylakeX optimizations if binutils or compiler are too old (the build
# system will try to determine this automatically)
# NO_AVX512 = 1

# Don't use parallel make.
# NO_PARALLEL_MAKE = 1

# Force number of make jobs. The default is the number of logical CPU of the host.
# This is particularly useful when using distcc.
# A negative value will disable adding a -j flag to make, allowing to use a parent
# make -j value. This is useful to call OpenBLAS make from an other project
# makefile
# MAKE_NB_JOBS = 2

# If you would like to know minute performance report of GotoBLAS.
# FUNCTION_PROFILE = 1

# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# This option should not be used - it is a holdover from unfinished code present
# in the original GotoBLAS2 library that may be usable as a starting point but
# is not even expected to compile in its present form.
# QUAD_PRECISION = 1

# Theads are still working for a while after finishing BLAS operation
# to reduce thread activate/deactivate overhead. You can determine
# time out to improve performance. This number should be from 4 to 30
# which corresponds to (1 << n) cycles. For example, if you set to 26,
# thread will be running for (1 << 26) cycles(about 25ms on 3.0GHz
# system). Also you can control this number by THREAD_TIMEOUT
# CCOMMON_OPT += -DTHREAD_TIMEOUT=26

# Using special device driver for mapping physically contiguous memory
# to the user space. If bigphysarea is enabled, it will use it.
# DEVICEDRIVER_ALLOCATION = 1

# Use large page allocation (called hugepage support in Linux context)
# for the thread buffers (with access by shared memory operations)
# HUGETLB_ALLOCATION = 1

# Use large page allocation called hugepages in Linux) based on mmap accessing
# a memory-backed pseudofile (requires hugetlbfs to be mounted in the system,
# the example below has it mounted on /hugepages. OpenBLAS will create the backing
# file as gotoblas.processid in that path)
# HUGETLBFILE_ALLOCATION = /hugepages

# If you need to synchronize FP CSR between threads (for x86/x86_64 and aarch64 only).
# CONSISTENT_FPCSR = 1

# If any gemm argument m, n or k is less or equal this threshold, gemm will be execute
# with single thread. (Actually in recent versions this is a factor proportional to the
# number of floating point operations necessary for the given problem size, no longer
# an individual dimension). You can use this setting to avoid the overhead of multi-
# threading in small matrix sizes. The default value is 4, but values as high as 50 have
# been reported to be optimal for certain workloads (50 is the recommended value for Julia).
# GEMM_MULTITHREAD_THRESHOLD = 4

# If you need sanity check by comparing results to reference BLAS. It'll be very
# slow (Not implemented yet).
# SANITY_CHECK = 1

# The installation directory.
# PREFIX = /opt/OpenBLAS

# Common Optimization Flag;
# The default -O2 is enough.
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT
# COMMON_OPT = -O2

# gfortran option for LAPACK to improve thread-safety
# It is enabled by default in Makefile.system for gfortran
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT
# FCOMMON_OPT = -frecursive

# Profiling flags
COMMON_PROF = -pg

# Build Debug version
# DEBUG = 1

# Set maximum stack allocation.
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482
#
# MAX_STACK_ALLOC = 0

# Add a prefix or suffix to all exported symbol names in the shared library.
# Avoid conflicts with other BLAS libraries, especially when using
# 64 bit integer interfaces in OpenBLAS.
# For details, https://github.com/xianyi/OpenBLAS/pull/459
#
# The same prefix and suffix are also added to the library name,
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas
#
# SYMBOLPREFIX=
# SYMBOLSUFFIX=

# Run a C++ based thread safety tester after the build is done.
# This is mostly intended as a developer feature to spot regressions, but users and
# package maintainers can enable this if they have doubts about the thread safety of
# the library, given the configuration in this file.
# By default, the thread safety tester launches 52 concurrent calculations at the same
# time.
#
# Please note that the test uses ~1300 MiB of RAM for the DGEMM test.
#
# The test requires CBLAS to be built, a C++11 capable compiler and the presence of
# an OpenMP implementation. If you are cross-compiling this test will probably not
# work at all.
#
# CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1


# If you want to enable the experimental BFLOAT16 support
# BUILD_BFLOAT16 = 1

# If you want to enable the experimental HFLOAT16 support
# BUILD_HFLOAT16 = 1

# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160



# By default the library contains BLAS functions (and LAPACK if selected) for all input types.
# To build a smaller library supporting e.g. only single precision real (SGEMM etc.) or only
# the functions for complex numbers, uncomment the desired type(s) below
# BUILD_SINGLE = 1
# BUILD_DOUBLE = 1
# BUILD_COMPLEX = 1
# BUILD_COMPLEX16 = 1
#
# End of user configuration
#

+ 0
- 48
Makefile.sparc View File

@@ -1,48 +0,0 @@
CPP = $(CC) -E
RANLIB = ranlib

ifdef BINARY64

ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64
endif
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -m64
endif
else

ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif

ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9
endif
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v8plusb
endif

endif

LIBNAME = $(LIBPREFIX).a

ifndef SMP
LIBCXML = -L/opt/SUNWspro/lib/v9
LIBATLAS = -L$(HOME)/misc/lib -lf77blas -latlas -lm
else
LIBCXML = -lcxmlp -lots -lm
endif
ifdef BINARY64
LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
-Wl,-R,/opt/SUNWspro/lib/v9 -lsunperf -lompstubs -lfui -lfsu -lsunmath
else
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
endif

+ 0
- 1971
Makefile.system
File diff suppressed because it is too large
View File


+ 0
- 658
Makefile.tail View File

@@ -1,658 +0,0 @@
###############################################################################
# Copyright (c) 2025, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.
###############################################################################

BBLASOBJS_P = $(BBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBBLASOBJS_P = $(SBBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SHBLASPBJS_P = $(SHBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBLASOBJS_P = $(SBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
DBLASOBJS_P = $(DBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX))
SBEXTOBJS_P = $(SBEXTOBJS:.$(SUFFIX)=.$(PSUFFIX))

COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX))

HPLOBJS_P = $(HPLOBJS:.$(SUFFIX)=.$(PSUFFIX))

BLASOBJS = $(SHBLASOBJS) $(BBLASOBJS) $(SBEXTOBJS) $(SBBLASOBJS) $(SBLASOBJS) $(DBLASOBJS) $(CBLASOBJS) $(ZBLASOBJS) $(CBAUXOBJS)
BLASOBJS_P = $(SHBLASPBJS_P) $(BBLASOBJS_P) $(SBEXTOBJS_P) $(SBBLASOBJS_P) $(SBLASOBJS_P) $(DBLASOBJS_P) $(CBLASOBJS_P) $(ZBLASOBJS_P) $(CBAUXOBJS_P)

ifdef EXPRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif

ifdef QUAD_PRECISION
BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif

$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHFLOAT16 -UDOUBLE -UCOMPLEX
$(BBLASOBJS) $(BBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -DBGEMM -UDOUBLE -UCOMPLEX
$(SBBLASOBJS) $(SBBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SBEXTOBJS) $(SBEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX

$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(BBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBEXTOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)

libs :: $(BLASOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

prof :: $(BLASOBJS_P) $(COMMONOBJS_P)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^

hpl :: $(HPLOBJS) $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

hpl_p :: $(HPLOBJS_P) $(COMMONOBJS_P)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^

kernel :: $(BLASOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

commonlibs :: $(COMMONOBJS)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^

commonprof :: $(COMMONOBJS_P)
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME_P) $^

quick :
$(MAKE) -C $(TOPDIR) libs

bms.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)

bmd.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

bmd-k.$(SUFFIX):bm-k.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

ifdef QUAD_PRECISION
bmq.$(SUFFIX):bmq.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

bmx.$(SUFFIX):bmx.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)
else
bmq.$(SUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

bmx.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)
endif

bmc.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)

bmz.$(SUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F)

bmd_nn.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DNN -c $< -o $(@F)

bmd_nt.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DNT -c $< -o $(@F)

bmd_tn.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DTN -c $< -o $(@F)

bmd_tt.$(SUFFIX):bm_special.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -DTT -c $< -o $(@F)

bm-phy.$(SUFFIX):bm-phy.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

bms.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)

bmd.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

ifdef QUAD_PRECISION
bmq.$(PSUFFIX):bmq.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

bmx.$(PSUFFIX):bmx.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)
else
bmq.$(PSUFFIX):bm.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

bmx.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)
endif

bmc.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)

bmz.$(PSUFFIX):bmz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(PFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F)

bms : bms.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd : bmd.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) -lm

bmd-k : bmd-k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) -lm

bmq : bmq.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmc : bmc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB) $(FEXTRALIB)

bmz : bmz.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmx : bmx.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd_nn : bmd_nn.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd_nt : bmd_nt.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd_tn : bmd_tn.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd_tt : bmd_tt.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bm-phy:bm-phy.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmcc : bmcc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmzc : bmzc.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bms.prof : bms.$(PSUFFIX) $(SBLASOBJS_P) $(COMMONOBJS_P) $(SOBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmd.prof : bmd.$(PSUFFIX) $(DBLASOBJS_P) $(COMMONOBJS_P) $(DOBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmq.prof : bmq.$(PSUFFIX) $(QBLASOBJS_P) $(COMMONOBJS_P) $(QOBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmc.prof : bmc.$(PSUFFIX) $(CBLASOBJS_P) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmz.prof : bmz.$(PSUFFIX) $(ZBLASOBJS_P) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bmx.prof : bmz.$(PSUFFIX) $(XBLASOBJS_P) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS_P)
$(CC) -o $(@F) $(PFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bms.cxml : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML)

bmd.cxml : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML)

bmc.cxml : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML)

bmz.cxml : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML)

bms.scsl : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL)

bmd.scsl : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL)

bmc.scsl : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL)

bmz.scsl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL)

bms.acml : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

bmd.acml : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

bmc.acml : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

bmz.acml : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

bms.sun : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB)

bmd.sun : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB)

bmc.sun : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB)

bmz.sun : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB)

bms.atlas : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

bmd.atlas : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

bmc.atlas : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

bmz.atlas : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

bms.essl : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) $(FCOMMON_OPT) -o $(@F) $^ $(LIBESSL)

bmd.essl : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL)

bmc.essl : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(F77) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL)

bmz.essl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) $(CCOMMON_OPT) -o $(@F) $^ $(LIBESSL)

bms.flame : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME)

bmd.flame : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME)

bmc.flame : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME)

bmz.flame : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME)

bms.flame.prof : bms.$(SUFFIX) $(SOBJS) $(OBJS_P)
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME)

bmd.flame.prof : bmd.$(SUFFIX) $(DOBJS) $(OBJS_P)
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME)

bmc.flame.prof : bmc.$(SUFFIX) $(COBJS) $(OBJS_P)
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME)

bmz.flame.prof : bmz.$(SUFFIX) $(ZOBJS) $(OBJS_P)
$(F77) -o $(@F) $(PFLAGS) $^ $(LIBFLAME)

bms.mkl : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bmd.mkl : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bmc.mkl : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bmz.mkl : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bmq.mkl : bmq.$(SUFFIX) $(QOBJS) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bms.mkl.prof : bms.$(PSUFFIX) $(SOBJS) $(OBJS)
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL)

bmd.mkl.prof : bmd.$(PSUFFIX) $(DOBJS) $(OBJS)
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL)

bmc.mkl.prof : bmc.$(PSUFFIX) $(COBJS) $(OBJS)
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL)

bmz.mkl.prof : bmz.$(PSUFFIX) $(ZOBJS) $(OBJS)
$(CC) -o $(@F) $(PFLAGS) $^ $(LIBMKL)

bms.mlib : bms.$(SUFFIX) $(SOBJS) $(OBJS)
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB)

bmd.mlib : bmd.$(SUFFIX) $(DOBJS) $(OBJS)
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB)

bmc.mlib : bmc.$(SUFFIX) $(COBJS) $(OBJS)
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB)

bmz.mlib : bmz.$(SUFFIX) $(ZOBJS) $(OBJS)
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB)

bms.veclib : bms.$(SUFFIX) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB)

bmd.veclib : bmd.$(SUFFIX) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB)

bmc.veclib : bmc.$(SUFFIX) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB)

bmz.veclib : bmz.$(SUFFIX) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB)

bms.fuji : bms.$(SUFFIX) $(SOBJS)
ifndef SMP
fcc -KV9FMADD -SSL2 -o $(@F) $^
else
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^
endif

bmd.fuji : bmd.$(SUFFIX) $(DOBJS)
ifndef SMP
fcc -KV9FMADD -SSL2 -o $(@F) $^
else
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^
endif

bmc.fuji : bmc.$(SUFFIX) $(COBJS)
ifndef SMP
fcc -KV9FMADD -SSL2 -o $(@F) $^
else
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^
endif

bmz.fuji : bmz.$(SUFFIX) $(ZOBJS)
ifndef SMP
fcc -KV9FMADD -SSL2 -o $(@F) $^
else
fcc -KV9FMADD -SSL2BLAMP -o $(@F) $^
endif

bench: bench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

bench.$(SUFFIX): bench.c
$(CC) -c -o $(@F) $(CFLAGS) $^

bench_old: bench_old.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

kbench: kbench.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

prebench: prebench.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

kbench_rank_k: kbench_rank_k.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

smallbench: smallbench.$(SUFFIX) $(BLASOBJS) $(COMMONOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

smallbench.mkl: smallbench.$(SUFFIX)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bench.sun: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF) $(EXTRALIB) $(CEXTRALIB)

bench.cxml: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBCXML)

bench.atlas: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

bench.essl: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL) ../../level1/others/libmisc.$(LIBSUFFIX)

bench.scsl: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSCSL) $(EXTRALIB) $(CEXTRALIB)

bench.acml: bench.$(SUFFIX) $(OBJS)
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBACML) $(EXTRALIB) $(CEXTRALIB)

bench.flame: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)

kbench.mkl: kbench.$(SUFFIX) $(OBJS)
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bench.mkl: bench.$(SUFFIX) $(OBJS)
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bench_old.mkl: bench_old.$(SUFFIX) $(OBJS)
$(CC) -static -o $(@F) $(CFLAGS) $^ $(LIBMKL) $(EXTRALIB) $(CEXTRALIB)

bench.mlib: bench.$(SUFFIX) $(OBJS)
$(F77) -o $(@F) $(CFLAGS) $^ $(LIBMLIB)

bench.veclib: bench.$(SUFFIX) $(OBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBVECLIB)

params : params.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramd : paramd.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramq : paramq.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramc : paramc.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramz : paramz.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramx : paramx.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

params-ex : params-ex.$(SUFFIX) $(SBLASOBJS) $(COMMONOBJS) $(SOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramd-ex : paramd-ex.$(SUFFIX) $(DBLASOBJS) $(COMMONOBJS) $(DOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramq-ex : paramq-ex.$(SUFFIX) $(QBLASOBJS) $(COMMONOBJS) $(QOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramc-ex : paramc-ex.$(SUFFIX) $(CBLASOBJS) $(COMMONOBJS) $(COBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramz-ex : paramz-ex.$(SUFFIX) $(ZBLASOBJS) $(COMMONOBJS) $(ZOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

paramx-ex : paramx-ex.$(SUFFIX) $(XBLASOBJS) $(COMMONOBJS) $(XOBJS) $(OBJS) $(LIBS)
$(CC) -o $(@F) $(CFLAGS) $^ $(EXTRALIB) $(CEXTRALIB)

params.atlas : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

paramd.atlas : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

paramc.atlas : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

paramz.atlas : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBATLAS)

params.sun : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF)

paramd.sun : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF)

paramc.sun : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF)

paramz.sun : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBSUNPERF)

params.essl : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL)

paramd.essl : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL)

paramc.essl : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL)

paramz.essl : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBESSL)

params.mkl : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL)

paramd.mkl : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL)

paramc.mkl : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL)

paramz.mkl : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBMKL)

params.acml : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

paramd.acml : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

paramc.acml : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

paramz.acml : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBACML)

params.flame : params.$(SUFFIX) $(OBJS) $(SOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)

paramd.flame : paramd.$(SUFFIX) $(OBJS) $(DOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)

paramc.flame : paramc.$(SUFFIX) $(OBJS) $(COBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)

paramz.flame : paramz.$(SUFFIX) $(OBJS) $(ZOBJS)
$(CC) -o $(@F) $(CFLAGS) $^ $(LIBFLAME) $(EXTRALIB) $(CEXTRALIB)

params.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)

paramd.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

paramq.$(SUFFIX):param.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

paramc.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)

paramz.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F)

paramx.$(SUFFIX):paramz.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)

params-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -UCOMPLEX -c $< -o $(@F)

paramd-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -UCOMPLEX -c $< -o $(@F)

paramq-ex.$(SUFFIX):param-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -UCOMPLEX -c $< -o $(@F)

paramc-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -UDOUBLE -DCOMPLEX -c $< -o $(@F)

paramz-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DDOUBLE -DCOMPLEX -c $< -o $(@F)

paramx-ex.$(SUFFIX):paramz-ex.c $(TOPDIR)/../bench/bmcommon.h
$(CC) $(CFLAGS) -DXDOUBLE -DCOMPLEX -c $< -o $(@F)

gen_insn_flash.c :
echo '#include <stdio.h>' > gen_insn_flash.c
echo '#include <stdlib.h>' >> gen_insn_flash.c
echo '#define ICACHE_SIZE ( 256 << 10)' >> gen_insn_flash.c
echo 'int main(void){' >> gen_insn_flash.c
echo 'int i;' >> gen_insn_flash.c
echo '#ifdef __alpha' >> gen_insn_flash.c
echo 'printf(".set noat;.set noreorder;\n");' >> gen_insn_flash.c
echo 'printf(".arch ev6;.text;.align 5\n");' >> gen_insn_flash.c
echo 'printf(".globl insn_flash\n");' >> gen_insn_flash.c
echo 'printf(".ent insn_flash\n");' >> gen_insn_flash.c
echo 'printf("insn_flash:\n");' >> gen_insn_flash.c
echo 'for (i = 0; i < ICACHE_SIZE / 4; i++)' >> gen_insn_flash.c
echo 'printf("br 1f\n 1:\n");' >> gen_insn_flash.c
echo 'printf(".align 5;ret;.end insn_flash\n");'>> gen_insn_flash.c
echo '#else' >> gen_insn_flash.c
echo 'printf(".text;.align 32\n");' >> gen_insn_flash.c
echo 'printf(".globl insn_flash\n");' >> gen_insn_flash.c
echo 'printf("insn_flash:\n");' >> gen_insn_flash.c
echo 'for (i = 0; i < ICACHE_SIZE / 2; i++)' >> gen_insn_flash.c
echo 'printf("jmp 1f\n 1:\n");' >> gen_insn_flash.c
echo 'printf(".align 32;ret\n");' >> gen_insn_flash.c
echo '#endif' >> gen_insn_flash.c
echo 'return 0;' >> gen_insn_flash.c
echo '}' >> gen_insn_flash.c

insn_flash.$(SUFFIX) : gen_insn_flash
./gen_insn_flash > temp.s
$(AS) -o $(@F) temp.s
rm -f temp.s

dummy :

clean ::
@if test -d $(ARCH); then \
(cd $(ARCH) && $(MAKE) clean) \
fi
@find . -name '*.o' | xargs rm -rf
@rm -rf *.a *.s *.po *.obj *.i *.so core core.* gmon.out *.cso \
*.csx *.is *~ *.exe *.flame *.pdb *.dwf \
gen_insn_flash.c gen_insn_flash *.stackdump *.dll *.exp *.lib \
*.pc *.pcl *.def *.i *.prof linktest.c \
bms bmd bmc bmz bmq bmx \
params paramd paramc paramz paramq paramx \
params-ex paramd-ex paramc-ex paramz-ex paramq-ex paramx-ex \
bench tpp kbench kbench2 \
*.mkl *.sun *.acml *.cxml *.essl *.atlas *.scsl *.mlib *.veclib *.fuji

+ 0
- 89
Makefile.x86 View File

@@ -1,89 +0,0 @@
# COMPILER_PREFIX = mingw32-

ifneq ($(DYNAMIC_ARCH),1)
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif

ifdef ADD_CPUFLAGS
ifdef HAVE_SSE
CCOMMON_OPT += -msse
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse
endif
endif
endif

ifeq ($(OSNAME), Interix)
ARFLAGS = -m x86
endif

ifndef SMP
LIBMKL = -L$(MKLPATH)/32 -Wl,-rpath,$(MKLPATH)/32 -lmkl_intel -lmkl_sequential -lmkl_core -lguide -lpthread -lm
else
LIBMKL = -L$(MKLPATH)/32 -Wl,-rpath,$(MKLPATH)/32 -lmkl_intel -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm
endif

# LIBMKL = -L$(MKLPATH)/32 -lmkl_lapack -lmkl_ia32 -lguide -lpthread -lm

ifndef SMP
LIBATLAS = -L$(ATLAS) -lf77blas -latlas -lg2c -lm
else
LIBATLAS = -L$(ATLAS) -lptf77blas -latlas -lpthread -lg2c -lm
endif

ifeq ($(COMPILER_F77), g77)
LIBACML = -L$(ACMLPATH)/gnu32/lib -Wl,-rpath,$(ACMLPATH)/gnu32/lib -lacml -lg2c
endif

LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame-lapack -lflame-base $(LIBS)

ifeq ($(F_COMPILER), GFORTRAN)
ifndef SMP
LIBACML = -L$(ACMLPATH)/gfortran32/lib -Wl,-rpath,$(ACMLPATH)/gfortran32/lib -lacml -lgfortran -lm
else
LIBACML = -L$(ACMLPATH)/gfortran32_mp/lib -Wl,-rpath,$(ACMLPATH)/gfortran32_mp/lib -lacml_mp -lgfortran -lgomp -lm
endif
endif

ifeq ($(COMPILER_F77), pgf77)
LIBACML = -L$(ACMLPATH)/pgi32/lib -lacml -L/opt/pgi/linux86-64/5.2/lib -lpgftnrtl -lnspgc -lpgc
endif

ifeq ($(F_COMPILER), PATHSCALE)
ifndef SMP
LIBACML = -L$(ACMLPATH)/pathscale32/lib -Wl,-rpath,$(ACMLPATH)/pathscale32/lib -lacml -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lpathfortran -lm
else
LIBACML = -L$(ACMLPATH)/pathscale32_mp/lib -Wl,-rpath,$(ACMLPATH)/pathscale32_mp/lib -lacml_mp -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lopenmp -lpathfortran -lm
endif
endif

LIBSUNPERF = -L/opt/SUNWspro/lib/sse2 -Wl,-R,/opt/SUNWspro/lib/sse2 -lsunperf

LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib

ifndef SMP
LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
else
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
endif
ifdef HAVE_SSE2
CCOMMON_OPT += -msse2
FCOMMON_OPT += -msse2
endif
ifdef HAVE_SSE3
CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1
endif
endif


+ 0
- 299
Makefile.x86_64 View File

@@ -1,299 +0,0 @@
# CCOMMON_OPT += -DFASTCPU

ifeq ($(OSNAME), SunOS)
ifdef BINARY64
ifeq ($(F_COMPILER), SUN)
FCOMMON_OPT += -m64
endif
endif
endif

ifeq ($(C_COMPILER), CLANG)
ifeq ($(findstring icx,$(CC)),icx)
CCOMMON_OPT += -fp-model=consistent
endif
endif

ifneq ($(DYNAMIC_ARCH),1)
ADD_CPUFLAGS = 1
else
ifdef TARGET_CORE
ADD_CPUFLAGS = 1
endif
endif

ifdef ADD_CPUFLAGS
ifdef HAVE_SSE3
CCOMMON_OPT += -msse3
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse3
endif
endif
ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mssse3
endif
endif
ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -msse4.1
endif
endif
ifndef OLDGCC
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mavx
endif
endif
endif
ifndef NO_AVX2
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -mavx2
endif
endif
endif

ifeq ($(CORE), SKYLAKEX)
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif

ifeq ($(CORE), COOPERLAKE)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# cooperlake support was added in clang 9
ifeq ($(CLANGVERSIONGTEQ9), 1)
CCOMMON_OPT += -march=cooperlake
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=cooperlake
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif

ifeq ($(CORE), SAPPHIRERAPIDS)
ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC)
# sapphire rapids support was added in 11
ifeq ($(GCCVERSIONGTEQ11), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # gcc not support, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
else ifeq ($(C_COMPILER), CLANG)
# sapphire rapids support was added in clang 12
ifeq ($(CLANGVERSIONGTEQ12), 1)
CCOMMON_OPT += -march=sapphirerapids
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=sapphirerapids
endif
else # not supported in clang, fallback to avx512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
endif
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif

ifeq ($(CORE), ZEN)
ifdef HAVE_AVX512VL
ifndef NO_AVX512
CCOMMON_OPT += -march=skylake-avx512
ifneq ($(F_COMPILER), NAG)
FCOMMON_OPT += -march=skylake-avx512
endif
ifeq ($(OSNAME), CYGWIN_NT)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
ifeq ($(OSNAME), WINNT)
ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -fno-asynchronous-unwind-tables
FCOMMON_OPT += -fno-asynchronous-unwind-tables
endif
endif
endif
endif
endif


ifdef HAVE_AVX2
ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
CCOMMON_OPT += -mavx2
endif
else
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -mavx2
endif
endif
ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
ifeq ($(F_COMPILER), GFORTRAN)
# AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
FCOMMON_OPT += -mavx2
endif
else
ifeq ($(F_COMPILER), FLANG)
FCOMMON_OPT += -mavx2
endif
endif
endif
endif
endif

endif


ifeq ($(OSNAME), Interix)
ARFLAGS = -m x64
endif

ifeq ($(OSNAME), Darwin)
ifndef SMP
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -lguide -lpthread -lm
else
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -lguide -lpthread -lm
endif
else
ifndef SMP
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -Wl,--start-group -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -Wl,--end-group -lguide -lpthread -lm
else
LIBMKL = -L$(MKLPATH)/em64t -Wl,-rpath,$(MKLPATH)/em64t -Wl,--start-group -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core -Wl,--end-group -lguide -lpthread -lm
endif
endif


ifndef SMP
LIBATLAS = -L$(ATLASPATH)64 -llapack -lcblas -lf77blas -latlas -lm
else
LIBATLAS = -L$(ATLASPATH)64 -llapack -lptcblas -lptf77blas -latlas -lpthread -lm
endif

LIBFLAME = -L$(FLAMEPATH) -llapack2flame -lflame $(TOPDIR)/$(LIBNAME) -lgfortran -lpthread -lm


ifeq ($(F_COMPILER), g77)
LIBACML = -L$(ACMLPATH)/gnu64/lib -Wl,-rpath,$(ACMLPATH)/gnu64/lib -lacml -lacml_mv -lg2c -lm
endif

ifeq ($(F_COMPILER), GFORTRAN)
ifndef SMP
LIBACML = -L$(ACMLPATH)/gfortran64/lib -Wl,-rpath,$(ACMLPATH)/gfortran64/lib -lacml -lacml_mv -lgfortran -lm
else
LIBACML = -L$(ACMLPATH)/gfortran64_mp/lib -Wl,-rpath,$(ACMLPATH)/gfortran64_mp/lib -lacml_mp -lacml_mv -lgfortran -lgomp -lm
endif
endif

ifeq ($(F_COMPILER), INTEL)
ifndef SMP
LIBACML = -L$(ACMLPATH)/ifort64/lib -Wl,-rpath,$(ACMLPATH)/ifort64/lib -lacml -lacml_mv -lifcoremt_pic -lirc -lm -lpthread -ldl
else
LIBACML = -L$(ACMLPATH)/ifort64_mp/lib -Wl,-rpath,$(ACMLPATH)/ifort64_mp/lib -lacml_mp -lacml_mv -lifcoremt_pic -liomp5 -lirc -lm -lpthread -ldl
endif
endif

ifeq ($(F_COMPILER), OPEN64)
ifndef SMP
LIBACML = -L$(ACMLPATH)/open64/lib -Wl,-rpath,$(ACMLPATH)/open64/lib -lacml -lacml_mv -lm
else
LIBACML = -L$(ACMLPATH)/open64_mp/lib -Wl,-rpath,$(ACMLPATH)/open64_mp/lib -lacml_mp -lacml_mv -lm -lpthread
endif
endif

ifeq ($(F_COMPILER), pgf77)
ifndef SMP
LIBACML = -L$(ACMLPATH)/pgi64/lib -Wl,-rpath,$(ACMLPATH)/pgi64/lib -lacml -lacml_mv -L$(PGIPATH) -Wl,-rpath,$(PGIPATH) -lpgftnrtl -lnspgc -lpgmp -lpgc
else
LIBACML = -L$(ACMLPATH)/pgi64_mp/lib -Wl,-rpath,$(ACMLPATH)/pgi64_mp/lib -lacml -lacml_mv -L$(PGIPATH) -Wl,-rpath,$(PGIPATH) -lpgftnrtl -lnspgc -lpgmp -lpgc
endif
endif

ifeq ($(F_COMPILER), PATHSCALE)
ifndef SMP
LIBACML = -L$(ACMLPATH)/pathscale64/lib -Wl,-rpath,$(ACMLPATH)/pathscale64/lib -lacml -lacml_mv -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lpathfortran -lm
else
LIBACML = -L$(ACMLPATH)/pathscale64_mp/lib -Wl,-rpath,$(ACMLPATH)/pathscale64_mp/lib -lacml_mp -lacml_mv -Wl,-rpath,$(PATHSCALEPATH) -L$(PATHSCALEPATH) -lopenmp -lpathfortran -lm
endif
endif

ifeq ($(F_COMPILER), f90)
LIBACML = -L$(ACMLPATH)/sun64/lib -Wl,-R,$(ACMLPATH)/sun64/lib -L$(SUNPATH)/lib/amd64 -Wl,-R,$(SUNPATH)/lib/amd64 -lacml -lacml_mv -lfsu
endif

LIBSUNPERF = -L$(SUNPATH)/lib/amd64 -L$(SUNPATH)/rtlibs/amd64 -Wl,-R,$(SUNPATH)/lib/amd64 -Wl,-R,$(SUNPATH)/rtlibs/amd64 -lsunperf -lfui -lfsu -lmtsk

LIBVECLIB = /System/Library/Frameworks/vecLib.framework/Versions/Current/vecLib

+ 0
- 16
Makefile.zarch View File

@@ -1,16 +0,0 @@

ifeq ($(CORE), Z13)
CCOMMON_OPT += -march=z13 -mzvector
FCOMMON_OPT += -march=z13 -mzvector
endif

ifeq ($(CORE), Z14)
CCOMMON_OPT += -march=z14 -mzvector -O3
FCOMMON_OPT += -march=z14 -mzvector
endif

# Enable floating-point expression contraction for clang, since it is the
# default for gcc
ifeq ($(C_COMPILER), CLANG)
CCOMMON_OPT += -ffp-contract=on
endif

+ 0
- 370
README.md View File

@@ -1,370 +0,0 @@
# OpenBLAS

[![Join the chat at https://gitter.im/xianyi/OpenBLAS](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

Cirrus CI: [![Build Status](https://api.cirrus-ci.com/github/xianyi/OpenBLAS.svg?branch=develop)](https://cirrus-ci.com/github/xianyi/OpenBLAS)



[![Build Status](https://dev.azure.com/xianyi/OpenBLAS/_apis/build/status/xianyi.OpenBLAS?branchName=develop)](https://dev.azure.com/xianyi/OpenBLAS/_build/latest?definitionId=1&branchName=develop)

OSUOSL POWERCI [![Build Status](https://powerci.osuosl.org/buildStatus/icon?job=OpenBLAS_gh%2Fdevelop)](http://powerci.osuosl.org/job/OpenBLAS_gh/job/develop/)

OSUOSL IBMZ-CI [![Build Status](http://ibmz-ci.osuosl.org/buildStatus/icon?job=OpenBLAS-Z%2Fdevelop)](http://ibmz-ci.osuosl.org/job/OpenBLAS-Z/job/develop/)
## Introduction

OpenBLAS is an optimized BLAS (Basic Linear Algebra Subprograms) library based on GotoBLAS2 1.13 BSD version.

For more information about OpenBLAS, please see:

- The documentation at [openmathlib.org/OpenBLAS/docs/](http://www.openmathlib.org/OpenBLAS/docs),
- The home page at [openmathlib.org/OpenBLAS/](http://www.openmathlib.org/OpenBLAS).

For a general introduction to the BLAS routines, please refer to the extensive documentation of their reference implementation hosted at netlib:
<https://www.netlib.org/blas>. On that site you will likewise find documentation for the reference implementation of the higher-level library LAPACK - the **L**inear **A**lgebra **Pack**age that comes included with OpenBLAS. If you are looking for a general primer or refresher on Linear Algebra, the set of six
20-minute lecture videos by Prof. Gilbert Strang on either MIT OpenCourseWare [here](https://ocw.mit.edu/resources/res-18-010-a-2020-vision-of-linear-algebra-spring-2020/) or YouTube [here](https://www.youtube.com/playlist?list=PLUl4u3cNGP61iQEFiWLE21EJCxwmWvvek) may be helpful.

## Binary Packages

We provide official binary packages for the following platform:

* Windows x86/x86_64
* Windows arm64 (woa)

You can download them from [file hosting on sourceforge.net](https://sourceforge.net/projects/openblas/files/) or from the [Releases section of the GitHub project page](https://github.com/OpenMathLib/OpenBLAS/releases).

OpenBLAS is also packaged for many package managers - see [the installation section of the docs](http://www.openmathlib.org/OpenBLAS/docs/install/) for details.

## Installation from Source

Obtain the source code from https://github.com/OpenMathLib/OpenBLAS/. Note that the default branch
is `develop` (a `master` branch is still present, but far out of date).

Build-time parameters can be chosen in `Makefile.rule`, see there for a short description of each option.
Most options can also be given directly on the command line as parameters to your `make` or `cmake` invocation.

### Dependencies

Building OpenBLAS requires the following to be installed:

* GNU Make or CMake
* A C compiler, e.g. GCC or Clang
* A Fortran compiler (optional, for LAPACK)

In general, using a recent version of the compiler is strongly recommended.
If a Fortran compiler is not available, it is possible to compile an older version of the included LAPACK
that has been machine-translated to C.

### Normal compile

Simply invoking `make` (or `gmake` on BSD) will detect the CPU automatically.
To set a specific target CPU, use `make TARGET=xxx`, e.g. `make TARGET=NEHALEM`.
The full target list is in the file `TargetList.txt`, other build optionss are documented in Makefile.rule and
can either be set there (typically by removing the comment character from the respective line), or used on the
`make` command line.
Note that when you run `make install` after building, you need to repeat all command line options you provided to `make`
in the build step, as some settings like the supported maximum number of threads are automatically derived from the
build host by default, which might not be what you want.
For building with `cmake`, the usual conventions apply, i.e. create a build directory either underneath the toplevel
OpenBLAS source directory or separate from it, and invoke `cmake` there with the path to the source tree and any
build options you plan to set.

For more details, see the [Building from source](http://www.openmathlib.org/OpenBLAS/docs/install/#building-from-source)
section in the docs.

### Cross compile

Set `CC` and `FC` to point to the cross toolchains, and if you use `make`, also set `HOSTCC` to your host C compiler.
The target must be specified explicitly when cross compiling.

Examples:

* On a Linux system, cross-compiling to an older MIPS64 router board:
```sh
make BINARY=64 CC=mipsisa64r6el-linux-gnuabi64-gcc FC=mipsisa64r6el-linux-gnuabi64-gfortran HOSTCC=gcc TARGET=P6600
```
* or to a Windows x64 host:
```sh
make CC="i686-w64-mingw32-gcc -Bstatic" FC="i686-w64-mingw32-gfortran -static-libgfortran" TARGET=HASWELL BINARY=32 CROSS=1 NUM_THREADS=20 CONSISTENT_FPCSR=1 HOSTCC=gcc
```

You can find instructions for other cases both in the "Supported Systems" section below and in
the [Building from source docs](http://www.openmathlib.org/OpenBLAS/docs/install).
The `.yml` scripts included with the sources (which contain the
build scripts for the "continuous integration" (CI) build tests automatically run on every proposed change to the sources) may also provide additional hints.

When compiling for a more modern CPU target of the same architecture, e.g. `TARGET=SKYLAKEX` on a `HASWELL` host, option `CROSS=1` can be used to suppress the automatic invocation of the tests at the end of the build.

### Debug version

A debug version can be built using `make DEBUG=1`.

### Compile with MASS support on Power CPU (optional)

The [IBM MASS](https://www.ibm.com/support/home/product/W511326D80541V01/other_software/mathematical_acceleration_subsystem) library consists of a set of mathematical functions for C, C++, and Fortran applications that are tuned for optimum performance on POWER architectures.
OpenBLAS with MASS requires a 64-bit, little-endian OS on POWER.
The library can be installed as shown:

* On Ubuntu:
```sh
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add -
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list
sudo apt-get update
sudo apt-get install libxlmass-devel.8.1.5
```

* On RHEL/CentOS:
```sh
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key
sudo rpm --import repomd.xml.key
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/
sudo yum install libxlmass-devel.8.1.5
```

After installing the MASS library, compile OpenBLAS with `USE_MASS=1`.
For example, to compile on Power8 with MASS support: `make USE_MASS=1 TARGET=POWER8`.

### Install to a specific directory (optional)

Use `PREFIX=` when invoking `make`, for example

```sh
make install PREFIX=your_installation_directory
```
(along with all options you added on the `make` command line in the preceding build step)
The default installation directory is `/opt/OpenBLAS`.

## Supported CPUs and Operating Systems

Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by the 2010 GotoBLAS.

### Additional supported CPUs

#### x86/x86-64

- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
- **Intel Sandy Bridge**: Optimized Level-3 and Level-2 BLAS with AVX on x86-64.
- **Intel Haswell**: Optimized Level-3 and Level-2 BLAS with AVX2 and FMA on x86-64.
- **Intel Skylake-X**: Optimized Level-3 and Level-2 BLAS with AVX512 and FMA on x86-64.
- **Intel Cooper Lake**: as Skylake-X with improved BFLOAT16 support.
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: x86-64 ?GEMM FMA4 kernels. (Thanks to Werner Saar)
- **AMD PILEDRIVER**: Uses Bulldozer codes with some optimizations.
- **AMD STEAMROLLER**: Uses Bulldozer codes with some optimizations.
- **AMD ZEN**: Uses Haswell codes with some optimizations for Zen 2/3 (use SkylakeX for Zen4)

#### MIPS32

- **MIPS 1004K**: uses P5600 codes
- **MIPS 24K**: uses P5600 codes

#### MIPS64

- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.
- **ICT Loongson 3B**: Experimental

#### ARM

- **ARMv6**: Optimized BLAS for vfpv2 and vfpv3-d16 (e.g. BCM2835, Cortex M0+)
- **ARMv7**: Optimized BLAS for vfpv3-d32 (e.g. Cortex A8, A9 and A15)

#### ARM64

- **ARMv8**: Basic ARMV8 with small caches, optimized Level-3 and Level-2 BLAS
- **Cortex-A53**: same as ARMV8 (different cpu specifications)
- **Cortex-A55**: same as ARMV8 (different cpu specifications)
- **Cortex A57**: Optimized Level-3 and Level-2 functions
- **Cortex A72**: same as A57 ( different cpu specifications)
- **Cortex A73**: same as A57 (different cpu specifications)
- **Cortex A76**: same as A57 (different cpu specifications)
- **Falkor**: same as A57 (different cpu specifications)
- **ThunderX**: Optimized some Level-1 functions
- **ThunderX2T99**: Optimized Level-3 BLAS and parts of Levels 1 and 2
- **ThunderX3T110**
- **TSV110**: Optimized some Level-3 helper functions
- **EMAG 8180**: preliminary support based on A57
- **Neoverse N1**: (AWS Graviton2) preliminary support
- **Neoverse V1**: (AWS Graviton3) optimized Level-3 BLAS
- **Apple Vortex**: preliminary support based on ThunderX2/3
- **A64FX**: preliminary support, optimized Level-3 BLAS
- **ARMV8SVE**: any ARMV8 cpu with SVE extensions

#### PPC/PPC64

- **POWER8**: Optimized BLAS, only for PPC64LE (Little Endian), only with `USE_OPENMP=1`
- **POWER9**: Optimized Level-3 BLAS (real) and some Level-1,2. PPC64LE with OpenMP only.
- **POWER10**: Optimized Level-3 BLAS including SBGEMM and some Level-1,2.

- **AIX**: Dynamic architecture with OpenXL and OpenMP.
```sh
make CC=ibm-clang_r FC=xlf_r TARGET=POWER7 BINARY=64 USE_OPENMP=1 INTERFACE64=1 DYNAMIC_ARCH=1 USE_THREAD=1
```

#### IBM zEnterprise System

- **Z13**: Optimized Level-3 BLAS and Level-1,2
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2

#### RISC-V

- **C910V**: Optimized Level-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
```sh
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
```
(also known to work on C906 as long as you use only single-precision functions - its instruction set support appears to be incomplete in double precision)

- **x280**: Level-3 BLAS and Level-1,2 are optimized by RISC-V Vector extension 1.0.
```sh
make HOSTCC=gcc TARGET=x280 NUM_THREADS=8 CC=riscv64-unknown-linux-gnu-clang FC=riscv64-unknown-linux-gnu-gfortran
```

- **ZVL???B**: Level-3 BLAS and Level-1,2 including vectorised kernels targeting generic RISCV cores with vector support with registers of at least the corresponding width; ZVL128B and ZVL256B are available.
e.g.:
```sh
make TARGET=RISCV64_ZVL256B CFLAGS="-DTARGET=RISCV64_ZVL256B" \
BINARY=64 ARCH=riscv64 CC='clang -target riscv64-unknown-linux-gnu' \
AR=riscv64-unknown-linux-gnu-ar AS=riscv64-unknown-linux-gnu-gcc \
LD=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran \
HOSTCC=gcc HOSTFC=gfortran -j
```

#### LOONGARCH64

- **LA64_GENERIC**: Optimized Level-3, Level-2 and Level-1 BLAS with scalar instruction
```sh
make HOSTCC=gcc TARGET=LA64_GENERIC CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSONGENERIC is still supported

- **LA264**: Optimized Level-3, Level-2 and Level-1 BLAS with LSX instruction
```sh
make HOSTCC=gcc TARGET=LA264 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSON2K1000 is still supported

- **LA464**: Optimized Level-3, Level-2 and Level-1 BLAS with LASX instruction
```sh
make HOSTCC=gcc TARGET=LA464 CC=loongarch64-unknown-linux-gnu-gcc FC=loongarch64-unknown-linux-gnu-gfortran USE_SIMPLE_THREADED_LEVEL3=1
```
The old-style TARGET=LOONGSON3R5 is still supported

### Support for multiple targets in a single library

OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.

For **x86_64**, the list of targets this activates contains Prescott, Core2, Nehalem, Barcelona, Sandybridge, Bulldozer, Piledriver, Steamroller, Excavator, Haswell, Zen, SkylakeX, Cooper Lake, Sapphire Rapids. For cpu generations not included in this list, the corresponding older model is used. If you also specify `DYNAMIC_OLDER=1`, specific support for Penryn, Dunnington, Opteron, Opteron/SSE3, Bobcat, Atom and Nano is added. Finally there is an option `DYNAMIC_LIST` that allows to specify an individual list of targets to include instead of the default.

`DYNAMIC_ARCH` is also supported on **x86**, where it translates to Katmai, Coppermine, Northwood, Prescott, Banias,
Core2, Penryn, Dunnington, Nehalem, Athlon, Opteron, Opteron_SSE3, Barcelona, Bobcat, Atom and Nano.

On **ARMV8**, it enables support for CortexA53, CortexA57, CortexA72, CortexA73, Falkor, ThunderX, ThunderX2T99, TSV110 as well as generic ARMV8 cpus. If compiler support for SVE is available at build time, support for NeoverseN2, NeoverseV1 as well as generic ArmV8SVE targets is also enabled.

For **POWER**, the list encompasses POWER6, POWER8 and POWER9. POWER10 is additionally available if a sufficiently recent compiler is used for the build.

on **ZARCH** it comprises Z13 and Z14 as well as generic zarch support.

On **riscv64**, DYNAMIC_ARCH enables support for riscv64_zvl128b and riscv64_zvl256b in addition to generic riscv64 support. A compiler that supports RVV 1.0 is required to build OpenBLAS for riscv64 when DYNAMIC_ARCH is enabled.

On **LoongArch64**, it comprises LA264 and LA464 as well as generic LoongArch64 support.

The `TARGET` option can - and usually **should** - be used in conjunction with `DYNAMIC_ARCH=1` to specify which cpu model should be assumed for all the common code in the library, usually you will want to set this to the oldest model you expect to encounter.
Failure to specify this may lead to advanced instructions being used by the compiler, just because the build host happens to support them. This is most likely to happen when aggressive optimization options are in effect, and the resulting library may then crash with an
illegal instruction error on weaker hardware, before it even reaches the BLAS routines specifically included for that cpu.

Please note that it is not possible to combine support for different architectures, so no combined 32 and 64 bit or x86_64 and arm64 in the same library.

### Supported OS

- **GNU/Linux**
- **MinGW or Visual Studio (CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>.
- **Darwin/macOS/OSX/iOS**: Experimental. Although GotoBLAS2 already supports Darwin, we are not OSX/iOS experts.
- **FreeBSD**: Supported by the community. We don't actively test the library on this OS.
- **OpenBSD**: Supported by the community. We don't actively test the library on this OS.
- **NetBSD**: Supported by the community. We don't actively test the library on this OS.
- **DragonFly BSD**: Supported by the community. We don't actively test the library on this OS.
- **Android**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>.
- **AIX**: Supported on PPC up to POWER10
- **Haiku**: Supported by the community. We don't actively test the library on this OS.
- **SunOS**: Supported by the community. We don't actively test the library on this OS.
- **Cortex-M**: Supported by the community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-on-Cortex-M>.

## Usage

Statically link with `libopenblas.a` or dynamically link with `-lopenblas` if OpenBLAS was
compiled as a shared library.

### Setting the number of threads using environment variables

Environment variables are used to specify a maximum number of threads.
For example,

```sh
export OPENBLAS_NUM_THREADS=4
export GOTO_NUM_THREADS=4
export OMP_NUM_THREADS=4
```

The priorities are `OPENBLAS_NUM_THREADS` > `GOTO_NUM_THREADS` > `OMP_NUM_THREADS`.

If you compile this library with `USE_OPENMP=1`, you should set the `OMP_NUM_THREADS`
environment variable; OpenBLAS ignores `OPENBLAS_NUM_THREADS` and `GOTO_NUM_THREADS` when
compiled with `USE_OPENMP=1`.

### Setting the number of threads at runtime

We provide the following functions to control the number of threads at runtime:

```c
void goto_set_num_threads(int num_threads);
void openblas_set_num_threads(int num_threads);
```
Note that these are only used once at library initialization, and are not available for
fine-tuning thread numbers in individual BLAS calls.
If you compile this library with `USE_OPENMP=1`, you should use the above functions too.

## Reporting bugs

Please submit an issue in https://github.com/OpenMathLib/OpenBLAS/issues.

## Contact

+ Use github discussions: https://github.com/OpenMathLib/OpenBLAS/discussions
* OpenBLAS users mailing list: https://groups.google.com/forum/#!forum/openblas-users
* OpenBLAS developers mailing list: https://groups.google.com/forum/#!forum/openblas-dev

## Change log

Please see Changelog.txt.

## Troubleshooting

* Please read the [FAQ](http://www.openmathlib.org/OpenBLAS/docs/faq) section of the docs first.
* Please use GCC version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MinGW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture.
Clang 3.0 will generate the wrong AVX binary code.
* Please use GCC version 6 or LLVM version 6 and above to compile Skylake/CooperLake AVX512 kernels
* Please use LLVM version 18 and above (version 19 and above on Windows) if you plan to use
its new flang compiler for Fortran
* Please use GCC version 11 and above to compile OpenBLAS on the POWER architecture
* The number of CPUs/cores should be less than or equal to 256. On Linux `x86_64` (`amd64`),
there is experimental support for up to 1024 CPUs/cores and 128 numa nodes if you build
the library with `BIGNUMA=1`.
* OpenBLAS does not set processor affinity by default.
On Linux, you can enable processor affinity by commenting out the line `NO_AFFINITY=1` in
Makefile.rule. However, note that this may cause
[a conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A, `make test` may fail with a `pthread_create` error (`EAGAIN`).
However, it will be okay when you run the same test case on the shell.

## Contributing

1. [Check for open issues](https://github.com/OpenMathLib/OpenBLAS/issues) or open a fresh issue
to start a discussion around a feature idea or a bug.
2. Fork the [OpenBLAS](https://github.com/OpenMathLib/OpenBLAS) repository to start making your changes.
3. Write a test which shows that the bug was fixed or that the feature works as expected.
4. Send a pull request. Make sure to add yourself to `CONTRIBUTORS.md`.

## Donation

Please see [the donations section](http://www.openmathlib.org/OpenBLAS/docs/about/#donations) in the docs.

+ 0
- 20
SECURITY.md View File

@@ -1,20 +0,0 @@
# Security Policy

## Supported Versions

It is generally recommended to use the latest release as this project
does not maintain multiple stable branches and providing packages e.g.
for Linux distributions is outside our scope. In particular, versions
before 0.3.18 can be assumed to carry the out-of-bounds-read error in
the LAPACK ?LARRV family of functions that was the subject of
CVE-2021-4048

## Reporting a Vulnerability

If you suspect that you have found a vulnerability - a defect that could
be abused to compromise the security of a user's code or systems - please
do not use the normal github issue tracker (except perhaps to post a general
warning if you deem that necessary). Instead, please contact the project
maintainers through the email addresses given in their github user profiles.
Defects found in the "lapack-netlib" subtree should ideally be reported to
the maintainers of the reference implementation of LAPACK, lapack@icl.itk.edu

+ 0
- 154
TargetList.txt View File

@@ -1,154 +0,0 @@
Force Target Examples:

make TARGET=NEHALEM
make TARGET=LOONGSON3A BINARY=64
make TARGET=ISTANBUL

Supported List:
1.X86/X86_64
a)Intel CPU:
P2
KATMAI
COPPERMINE
NORTHWOOD
PRESCOTT
BANIAS
YONAH
CORE2
PENRYN
DUNNINGTON
NEHALEM
SANDYBRIDGE
HASWELL
SKYLAKEX
ATOM
COOPERLAKE
SAPPHIRERAPIDS

b)AMD CPU:
ATHLON
OPTERON
OPTERON_SSE3
BARCELONA
SHANGHAI
ISTANBUL
BOBCAT
BULLDOZER
PILEDRIVER
STEAMROLLER
EXCAVATOR
ZEN

c)VIA CPU:
SSE_GENERIC
VIAC3
NANO

2.Power CPU:
POWER4
POWER5
POWER6
POWER7
POWER8
POWER9
POWER10
POWER11
PPCG4
PPC970
PPC970MP
PPC440
PPC440FP2
CELL

3.MIPS CPU:
P5600
MIPS1004K
MIPS24K

4.MIPS64 CPU:
MIPS64_GENERIC
SICORTEX
LOONGSON3A
LOONGSON3B
I6400
P6600
I6500

5.IA64 CPU:
ITANIUM2

6.SPARC CPU:
SPARC
SPARCV7

7.ARM CPU:
CORTEXA15
CORTEXA9
ARMV7
ARMV6
ARMV5

8.ARM 64-bit CPU:
ARMV8
CORTEXA53
CORTEXA57
CORTEXA72
CORTEXA73
CORTEXA76
CORTEXA510
CORTEXA710
CORTEXX1
CORTEXX2
NEOVERSEN1
NEOVERSEV1
NEOVERSEN2
NEOVERSEV2
CORTEXA55
EMAG8180
FALKOR
THUNDERX
THUNDERX2T99
TSV110
THUNDERX3T110
VORTEX
A64FX
ARMV8SVE
ARMV9SME
FT2000

9.System Z:
ZARCH_GENERIC
Z13
Z14

10.RISC-V 64:
RISCV64_GENERIC (e.g. PolarFire Soc/SiFive U54)
RISCV64_ZVL128B
C910V
x280
RISCV64_ZVL256B

11.LOONGARCH64:
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 are legacy names,
// and it is recommended to use the more standardized naming conventions
// LA64_GENERIC/LA264/LA464. You can still specify TARGET as
// LOONGSONGENERIC/LOONGSON2K1000/LOONGSON3R5 during compilation or runtime,
// and they will be internally relocated to LA64_GENERIC/LA264/LA464.
LOONGSONGENERIC
LOONGSON2K1000
LOONGSON3R5
LA64_GENERIC
LA264
LA464

12. Elbrus E2000:
E2K

13. Alpha
EV4
EV5
EV6

14.CSKY
CSKY
CK860FV

+ 0
- 213
USAGE.md View File

@@ -1,213 +0,0 @@
# Notes on OpenBLAS usage
## Usage

#### Program is Terminated. Because you tried to allocate too many memory regions

In OpenBLAS, we mange a pool of memory buffers and allocate the number of
buffers as the following.
```
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2)
```
This error indicates that the program exceeded the number of buffers.

Please build OpenBLAS with larger `NUM_THREADS`. For example, `make
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set
`MAX_CPU_NUMBER=NUM_THREADS`.

Despite its name, and due to the use of memory buffers in functions like SGEMM,
the setting of NUM_THREADS can be relevant even for a single-threaded build
of OpenBLAS, if such functions get called by multiple threads of a program
that uses OpenBLAS. In some cases, the affected code may simply crash or throw
a segmentation fault without displaying the above warning first.

Note that the number of threads used at runtime can be altered to differ from the
value NUM_THREADS was set to at build time. At runtime, the actual number of
threads can be set anywhere from 1 to the build's NUM_THREADS (note however,
that this does not change the number of memory buffers that will be allocated,
which is set at build time). The number of threads for a process can be set by
using the mechanisms described below.


#### How can I use OpenBLAS in multi-threaded applications?

If your application is already multi-threaded, it will conflict with OpenBLAS
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the
following ways:

* `export OPENBLAS_NUM_THREADS=1` in the environment variables.
* Call `openblas_set_num_threads(1)` in the application on runtime.
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0`

If the application is parallelized by OpenMP, please use OpenBLAS built with
`USE_OPENMP=1`

#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH

The environment variable which control the kernel selection is
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()`
returns the used target.

#### How could I disable OpenBLAS threading affinity on runtime?

You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment
variable to disable threading affinity on runtime. For example, before the
running,
```
export OPENBLAS_MAIN_FREE=1
```

Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1`
in `Makefile.rule`.

## Linking with the library

* Link with shared library

`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas`

If the library is multithreaded, please add `-lpthread`. If the library
contains LAPACK functions, please add `-lgfortran` or other Fortran libs.

* Link with static library

`gcc -o test test.c /your/path/libopenblas.a`

You can download `test.c` from https://gist.github.com/xianyi/5780018

On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by
default), custom programs statically linked against `libopenblas.a` should also
link with the pthread library e.g.:

```
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread
```

Failing to add the `-lpthread` flag will cause errors such as:

```
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory':
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock'
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock'
...
```

## Code examples

#### Call CBLAS interface
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656
```
#include <cblas.h>
#include <stdio.h>

void main()
{
int i=0;
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0};
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5};
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3);

for(i=0; i<9; i++)
printf("%lf ", C[i]);
printf("\n");
}
```
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran`

#### Call BLAS Fortran interface

This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018

```
#include "stdio.h"
#include "stdlib.h"
#include "sys/time.h"
#include "time.h"

extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*);

int main(int argc, char* argv[])
{
int i;
printf("test!\n");
if(argc<4){
printf("Input Error\n");
return 1;
}

int m = atoi(argv[1]);
int n = atoi(argv[2]);
int k = atoi(argv[3]);
int sizeofa = m * k;
int sizeofb = k * n;
int sizeofc = m * n;
char ta = 'N';
char tb = 'N';
double alpha = 1.2;
double beta = 0.001;

struct timeval start,finish;
double duration;

double* A = (double*)malloc(sizeof(double) * sizeofa);
double* B = (double*)malloc(sizeof(double) * sizeofb);
double* C = (double*)malloc(sizeof(double) * sizeofc);

srand((unsigned)time(NULL));

for (i=0; i<sizeofa; i++)
A[i] = i%3+1;//(rand()%100)/10.0;

for (i=0; i<sizeofb; i++)
B[i] = i%3+1;//(rand()%100)/10.0;

for (i=0; i<sizeofc; i++)
C[i] = i%3+1;//(rand()%100)/10.0;
//#if 0
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc);
gettimeofday(&start, NULL);
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m);
gettimeofday(&finish, NULL);

duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000;
double gflops = 2.0 * m *n*k;
gflops = gflops/duration*1.0e-6;

FILE *fp;
fp = fopen("timeDGEMM.txt", "a");
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops);
fclose(fp);

free(A);
free(B);
free(C);
return 0;
}
```

` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a`

` ./time_dgemm <m> <n> <k> `

## Troubleshooting
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first.
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD.
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code.
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1.
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html).
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell.

## BLAS reference manual
If you want to understand every BLAS function and definition, please read
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm)
or [netlib.org](http://netlib.org/blas/)

Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions)

## How to reference OpenBLAS.

You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications).

Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly.


+ 0
- 78
appveyor.yml View File

@@ -1,78 +0,0 @@
version: 0.2.19.{build}

#environment:

platform:
- x64

os: Visual Studio 2017

configuration: Release

clone_folder: c:\projects\OpenBLAS

init:
- git config --global core.autocrlf input

clone_depth: 5

skip_tags: true

matrix:
fast_finish: false

skip_commits:
# Add [av skip] to commit messages
message: /\[av skip\]/

environment:
global:
CONDA_INSTALL_LOCN: C:\\Miniconda36-x64
matrix:
# - COMPILER: clang-cl
# WITH_FORTRAN: ON
# - COMPILER: clang-cl
# DYNAMIC_ARCH: ON
# WITH_FORTRAN: OFF
# - COMPILER: cl
# - COMPILER: MinGW64-gcc-7.2.0-mingw
# DYNAMIC_ARCH: OFF
# WITH_FORTRAN: ignore
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
COMPILER: MinGW-gcc-6.3.0-32
- APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2015
COMPILER: MinGW-gcc-5.3.0
WITH_FORTRAN: ignore
install:
- if [%COMPILER%]==[clang-cl] call %CONDA_INSTALL_LOCN%\Scripts\activate.bat
- if [%COMPILER%]==[clang-cl] conda update --yes -n base conda
- if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force
- if [%COMPILER%]==[clang-cl] conda config --set auto_update_conda false
- if [%COMPILER%]==[clang-cl] conda install --yes --quiet clangdev cmake ninja flang=11.0.1
- if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio\2017\Community\VC\Auxiliary\Build\vcvarsall.bat" x64
- if [%COMPILER%]==[clang-cl] set "LIB=%CONDA_INSTALL_LOCN%\Library\lib;%LIB%"
- if [%COMPILER%]==[clang-cl] set "CPATH=%CONDA_INSTALL_LOCN%\Library\include;%CPATH%"

before_build:
- ps: if (-Not (Test-Path .\build)) { mkdir build }
- cd build
- set PATH=%PATH:C:\Program Files\Git\usr\bin;=%
- if [%COMPILER%]==[MinGW-gcc-5.3.0] set PATH=C:\MinGW\bin;C:\msys64\usr\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] set PATH=C:\MinGW\bin;C:\mingw-w64\x86_64-7.2.0-posix-seh-rt_v5-rev1\mingw64\bin;%PATH%
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] set PATH=C:\msys64\usr\bin;C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw64\bin;%PATH%
- if [%COMPILER%]==[cl] cmake -G "Visual Studio 15 2017 Win64" ..
- if [%COMPILER%]==[MinGW64-gcc-7.2.0-mingw] cmake -G "MinGW Makefiles" -DNOFORTRAN=1 ..
- if [%COMPILER%]==[MinGW-gcc-6.3.0-32] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
- if [%COMPILER%]==[MinGW-gcc-5.3.0] cmake -G "MSYS Makefiles" -DNOFORTRAN=1 ..
- if [%WITH_FORTRAN%]==[OFF] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_MT=mt -DMSVC_STATIC_CRT=ON ..
- if [%WITH_FORTRAN%]==[ON] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER=flang -DCMAKE_MT=mt -DBUILD_WITHOUT_LAPACK=no -DNOFORTRAN=0 ..
- if [%USE_OPENMP%]==[ON] cmake -DUSE_OPENMP=ON ..
- if [%DYNAMIC_ARCH%]==[ON] cmake -DDYNAMIC_ARCH=ON -DDYNAMIC_LIST='CORE2;NEHALEM;SANDYBRIDGE;BULLDOZER;HASWELL' ..

build_script:
- cmake --build .

test_script:
- ctest -j2

+ 0
- 330
azure-pipelines.yml View File

@@ -1,330 +0,0 @@
trigger:
# start a new build for every push
batch: False
branches:
include:
- develop
resources:
containers:
- container: oneapi-hpckit
image: intel/oneapi-hpckit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
- container: oneapi-basekit
image: intel/oneapi-basekit:latest
options: '-v /usr/bin/sudo:/usr/bin/sudo -v /usr/lib/sudo/libsudo_util.so.0:/usr/lib/sudo/libsudo_util.so.0 -v /usr/lib/sudo/sudoers.so:/usr/lib/sudo/sudoers.so'
jobs:
# manylinux1 is useful to test because the
# standard Docker container uses an old version
# of gcc / glibc
- job: manylinux1_gcc
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
echo "FROM quay.io/pypa/manylinux1_x86_64
COPY . /tmp/openblas
RUN cd /tmp/openblas && \
CC=gcc && \
make QUIET_MAKE=1 BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \
make -C test BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \
make -C ctest BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 && \
make -C utest BINARY=64 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" > Dockerfile
docker build .
displayName: Run manylinux1 docker build
- job: manylinux_32bit
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
echo "FROM quay.io/pypa/manylinux2014_i686
COPY . /tmp/openblas
RUN cd /tmp/openblas && \
CC=gcc && \
make QUIET_MAKE=1 BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \
make -C test BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \
make -C ctest BINARY=32 TARGET=NEHALEM NUM_THREADS=32 && \
make -C utest BINARY=32 TARGET=NEHALEM NUM_THREADS=32" > Dockerfile
docker build .
displayName: Run manylinux 32bit docker build
- job: Intel_SDE_skx
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
# at the time of writing the available Azure Ubuntu vm image
# does not support AVX512VL, so use more recent LTS version
echo "FROM ubuntu:bionic
COPY . /tmp/openblas
RUN apt-get -y update && apt-get -y install \\
cmake \\
gfortran \\
make \\
wget
RUN mkdir /tmp/SDE && cd /tmp/SDE && \\
mkdir sde-external-8.35.0-2019-03-11-lin && \\
wget --quiet -O sde-external-8.35.0-2019-03-11-lin.tar.bz2 https://www.dropbox.com/s/fopsnzj67572sj5/sde-external-8.35.0-2019-03-11-lin.tar.bz2?dl=0 && \\
tar -xjvf sde-external-8.35.0-2019-03-11-lin.tar.bz2 -C /tmp/SDE/sde-external-8.35.0-2019-03-11-lin --strip-components=1
RUN cd /tmp/openblas && CC=gcc make QUIET_MAKE=1 DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64
CMD cd /tmp/openblas && echo 0 > /proc/sys/kernel/yama/ptrace_scope && CC=gcc OPENBLAS_VERBOSE=2 /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/sde64 -cpuid_in /tmp/SDE/sde-external-8.35.0-2019-03-11-lin/misc/cpuid/skx/cpuid.def -- make -C utest DYNAMIC_ARCH=1 NUM_THREADS=32 BINARY=64" > Dockerfile
docker build -t intel_sde .
# we need a privileged docker run for sde process attachment
docker run --privileged intel_sde
displayName: 'Run AVX512 SkylakeX docker build / test'

- job: Windows_cl
pool:
vmImage: 'windows-latest'
steps:
- task: CMake@1
inputs:
workingDirectory: 'build' # Optional
cmakeArgs: '-G "Visual Studio 17 2022" ..'
- task: CMake@1
inputs:
cmakeArgs: '--build . --config Release'
workingDirectory: 'build'
- script: |
cd build
cd utest
dir
openblas_utest.exe

- job: Windows_mingw_gmake
pool:
vmImage: 'windows-latest'
steps:
- script: |
mingw32-make CC=gcc FC=gfortran DYNAMIC_ARCH=1 DYNAMIC_LIST="SANDYBRIDGE"

- job: Windows_clang_cmake
pool:
vmImage: 'windows-latest'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes ninja
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
mkdir build
cd build
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DNOFORTRAN=1 -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest

- job: Windows_flang_clang
pool:
vmImage: 'windows-2022'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes --quiet ninja flang
mkdir build
cd build
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=clang-cl -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_Fortran_COMPILER="flang -I C:\Miniconda\Library\include\flang" -DBUILD_TESTING=OFF -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest

- job: Windows_cl_flang
pool:
vmImage: 'windows-2022'
steps:
- script: |
set "PATH=C:\Miniconda\Scripts;C:\Miniconda\Library\bin;C:\Miniconda\Library\usr\bin;C:\Miniconda\condabin;%PATH%"
set "LIB=C:\Miniconda\Library\lib;%LIB%"
set "CPATH=C:\Miniconda\Library\include;%CPATH%"
conda config --add channels conda-forge --force
conda config --set auto_update_conda false
conda install --yes --quiet ninja flang
mkdir build
cd build
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
cmake -G "Ninja" -DCMAKE_C_COMPILER=cl -DCMAKE_Fortran_COMPILER=flang-new -DC_LAPACK=1 -DCMAKE_MT=mt -DCMAKE_BUILD_TYPE=Release -DMSVC_STATIC_CRT=ON ..
cmake --build . --config Release
ctest
ctest --rerun-failed --output-on-failure


- job: OSX_OpenMP
pool:
vmImage: 'macOS-13'
steps:
- script: |
brew update
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13
make TARGET=CORE2 DYNAMIC_ARCH=1 USE_OPENMP=1 INTERFACE64=1 CC=gcc-13 FC=gfortran-13 PREFIX=../blasinst install
ls -lR ../blasinst
- job: OSX_GCC_Nothreads
pool:
vmImage: 'macOS-13'
steps:
- script: |
brew update
make USE_THREADS=0 CC=gcc-13 FC=gfortran-13
- job: OSX_GCC12
pool:
vmImage: 'macOS-latest'
steps:
- script: |
brew update
make CC=gcc-12 FC=gfortran-12

- job: OSX_LLVM_flangnew
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm flang
make TARGET=NEHALEM CC=/usr/local/opt/llvm/bin/clang FC=/usr/local/opt/flang/bin/flang NO_SHARED=1

- job: OSX_OpenMP_Clang
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm libomp
make TARGET=CORE2 USE_OPENMP=1 DYNAMIC_ARCH=1 CC=/usr/local/opt/llvm/bin/clang NOFORTRAN=1

- job: OSX_OpenMP_Clang_cmake
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
brew update
brew install llvm libomp
mkdir build
cd build
cmake -DTARGET=CORE2 -DUSE_OPENMP=1 -DINTERFACE64=1 -DDYNAMIC_ARCH=1 -DCMAKE_C_COMPILER=/usr/local/opt/llvm/bin/clang -DNOFORTRAN=1 -DNO_AVX512=1 ..
make
ctest
- job: OSX_dynarch_cmake
pool:
vmImage: 'macOS-13'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
LIBRARY_PATH: /usr/local/opt/llvm/lib
steps:
- script: |
mkdir build
cd build
cmake -DTARGET=CORE2 -DDYNAMIC_ARCH=1 -DDYNAMIC_LIST='NEHALEM HASWELL SKYLAKEX' -DCMAKE_C_COMPILER=gcc-13 -DCMAKE_Fortran_COMPILER=gfortran-13 -DBUILD_SHARED_LIBS=ON ..
cmake --build .
ctest

- job: OSX_Ifort_Clang
pool:
vmImage: 'macOS-latest'
variables:
LD_LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_HPCKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/edb4dc2f-266f-47f2-8d56-21bc7764e119/m_HPCKit_p_2023.2.0.49443.dmg
LIBRARY_PATH: /usr/local/opt/llvm/lib
MACOS_FORTRAN_COMPONENTS: intel.oneapi.mac.ifort-compiler
steps:
- script: |
brew update
brew install llvm libomp
sudo mkdir -p /opt/intel
sudo chown $USER /opt/intel
displayName: prepare for cache restore
- task: Cache@2
inputs:
path: /opt/intel/oneapi
key: '"install" | "$(MACOS_HPCKIT_URL)" | "$(MACOS_FORTRAN_COMPONENTS)"'
cacheHitVar: CACHE_RESTORED
- script: |
curl --output webimage.dmg --url $(MACOS_HPCKIT_URL) --retry 5 --retry-delay 5
hdiutil attach webimage.dmg
sudo /Volumes/"$(basename "$(MACOS_HPCKIT_URL)" .dmg)"/bootstrapper.app/Contents/MacOS/bootstrapper -s --action install --components="$(MACOS_FORTRAN_COMPONENTS)" --eula=accept --continue-with-optional-error=yes --log-dir=.
installer_exit_code=$?
hdiutil detach /Volumes/"$(basename "$URL" .dmg)" -quiet
exit $installer_exit_code
displayName: install
condition: ne(variables.CACHE_RESTORED, 'true')
- script: |
source /opt/intel/oneapi/setvars.sh
make CC=/usr/local/opt/llvm/bin/clang FC=ifort
- job: OSX_NDK_ARMV7
pool:
vmImage: 'macOS-13'
steps:
- script: |
brew update
brew install --cask android-ndk
export ANDROID_NDK_HOME=/usr/local/share/android-ndk
make TARGET=ARMV7 ONLY_CBLAS=1 CC=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/armv7a-linux-androideabi21-clang AR=$ANDROID_NDK_HOME/toolchains/llvm/prebuilt/darwin-x86_64/bin/llvm-ar HOSTCC=gcc ARM_SOFTFP_ABI=1 -j4

- job: OSX_IOS_ARMV8
pool:
vmImage: 'macOS-13'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch arm64 -miphoneos-version-min=10.0
steps:
- script: |
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1

- job: OSX_IOS_ARMV7
pool:
vmImage: 'macOS-13'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS16.2.sdk -arch armv7 -miphoneos-version-min=5.1
steps:
- script: |
make TARGET=ARMV7 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1

- job: OSX_xbuild_DYNAMIC_ARM64
pool:
vmImage: 'macOS-13'
variables:
CC: /Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang
CFLAGS: -O2 -Wno-macro-redefined -isysroot /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX13.1.sdk -arch arm64
steps:
- script: |
ls /Applications/Xcode_14.2.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs
/Applications/Xcode_12.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang -arch arm64 --print-supported-cpus
/Applications/Xcode_14.2.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang --version
make TARGET=ARMV8 DYNAMIC_ARCH=1 NUM_THREADS=32 HOSTCC=clang NOFORTRAN=1

- job: ALPINE_MUSL
pool:
vmImage: 'ubuntu-latest'
steps:
- script: |
wget https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.14.0/alpine-chroot-install \
&& echo 'ccbf65f85cdc351851f8ad025bb3e65bae4d5b06 alpine-chroot-install' | sha1sum -c \
|| exit 1
alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers sudo'
alpine make DYNAMIC_ARCH=1 BINARY=64
alpine make DYNAMIC_ARCH=1 BINARY=64 PREFIX=mytestdir install
alpine ls -l mytestdir/include
alpine echo "// tests that inclusion of openblas_config.h works with musl" >test_install.c
alpine echo "#include <openblas_config.h>" >>test_install.c
alpine echo "int main(){" >> test_install.c
alpine echo "cpu_set_t* cpu_set = NULL;}" >>test_install.c
alpine gcc -Imytestdir/include test_install.c -Lmytestdir/lib -lopenblas -lpthread -lgfortran -o test_install

+ 0
- 9
benchmark/Make_exe.sh View File

@@ -1,9 +0,0 @@
#!/bin/bash

for f in *.goto *.acml *.mkl *.atlas
do
if [ -f "$f" ]; then
mv $f `echo $f|tr '.' '_'`.exe
fi
done


+ 0
- 3541
benchmark/Makefile
File diff suppressed because it is too large
View File


+ 0
- 133
benchmark/amax.c View File

@@ -1,133 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef AMAX

#ifdef COMPLEX
#ifdef DOUBLE
#define AMAX BLASFUNC(dzamax)
#else
#define AMAX BLASFUNC(scamax)
#endif
#else
#ifdef DOUBLE
#define AMAX BLASFUNC(damax)
#else
#define AMAX BLASFUNC(samax)
#endif
#endif

int main(int argc, char *argv[])
{

FLOAT *x;
blasint m, i;
blasint inc_x = 1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1, timeg;

argc--;
argv++;

if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}

if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);

if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for (m = from; m <= to; m += step)
{

timeg = 0;
fprintf(stderr, " %6d : ", (int)m);

for (l = 0; l < loops; l++)
{

for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}

begin();
AMAX(&m, x, &inc_x);
end();
timeg += getsec();
}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 137
benchmark/amin.c View File

@@ -1,137 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef AMIN

#ifdef COMPLEX
#ifdef DOUBLE
#define AMIN BLASFUNC(dzamin)
#else
#define AMIN BLASFUNC(scamin)
#endif
#else
#ifdef DOUBLE
#define AMIN BLASFUNC(damin)
#else
#define AMIN BLASFUNC(samin)
#endif
#endif

int main(int argc, char *argv[])
{

FLOAT *x;
blasint m, i;
blasint inc_x = 1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1, timeg;

argc--;
argv++;

if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}

if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);

if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for (m = from; m <= to; m += step)
{

timeg = 0;

fprintf(stderr, " %6d : ", (int)m);

for (l = 0; l < loops; l++)
{

for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}

begin();

AMIN(&m, x, &inc_x);

end();

timeg += getsec();
}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 135
benchmark/asum.c View File

@@ -1,135 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef ASUM

#ifdef COMPLEX
#ifdef DOUBLE
#define ASUM BLASFUNC(dzasum)
#else
#define ASUM BLASFUNC(scasum)
#endif
#else
#ifdef DOUBLE
#define ASUM BLASFUNC(dasum)
#else
#define ASUM BLASFUNC(sasum)
#endif
#endif

int main(int argc, char *argv[])
{

FLOAT *x;
FLOAT result;
blasint m, i;
blasint inc_x = 1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;
double time1, timeg;

argc--;
argv++;

if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}

if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);

if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for (m = from; m <= to; m += step)
{

timeg = 0;

fprintf(stderr, " %6d : ", (int)m);

for (l = 0; l < loops; l++)
{

for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
result = ASUM(&m, x, &inc_x);
end();
timeg += getsec();
}
if (loops > 1)
timeg /= loops;

#ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
#else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
#endif
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 124
benchmark/axpby.c View File

@@ -1,124 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef AXPBY

#ifdef COMPLEX
#ifdef DOUBLE
#define AXPBY BLASFUNC(zaxpby)
#else
#define AXPBY BLASFUNC(caxpby)
#endif
#else
#ifdef DOUBLE
#define AXPBY BLASFUNC(daxpby)
#else
#define AXPBY BLASFUNC(saxpby)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
FLOAT beta[2] = {2.0, 2.0};
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for (l=0; l<loops; l++)
{
begin();
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );
end();
timeg += getsec();
}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
(COMPSIZE * COMPSIZE * 4. - COMPSIZE) * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 128
benchmark/axpy.c View File

@@ -1,128 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef AXPY

#ifdef COMPLEX
#ifdef DOUBLE
#define AXPY BLASFUNC(zaxpy)
#else
#define AXPY BLASFUNC(caxpy)
#endif
#else
#ifdef DOUBLE
#define AXPY BLASFUNC(daxpy)
#else
#define AXPY BLASFUNC(saxpy)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;
double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();

AXPY (&m, alpha, x, &inc_x, y, &inc_y );

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.9f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 134
benchmark/bench.h View File

@@ -1,134 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#elif defined(__APPLE__)
#include <mach/mach_time.h>
#endif
#include "common.h"

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}


#define malloc huge_malloc

#endif

/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable results. This technique, storing the allocated
pointer value just before the aligned memory, doesn't require
C11's aligned_alloc for compatibility with older compilers. */
static void *aligned_alloc_cacheline(size_t n)
{
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
if (p) {
void **newp = (void **)
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
newp[-1] = p;
p = newp;
}
return p;
}
#define malloc aligned_alloc_cacheline
#define free(p) free((p) ? ((void **)(p))[-1] : (p))

#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
#elif defined(__APPLE__)
mach_timebase_info_data_t info;
uint64_t start = 0, stop = 0;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
#endif

double getsec()
{
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#elif defined(__APPLE__)
mach_timebase_info(&info);
return (double)(((stop - start) * info.numer)/info.denom) * 1.e-9;
#else
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
#endif
}

void begin() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#elif defined(__APPLE__)
start = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
}

void end() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &stop, (struct timezone *)0);
#elif defined(__APPLE__)
stop = clock_gettime_nsec_np(CLOCK_UPTIME_RAW);
#else
clock_gettime(CLOCK_REALTIME, &stop);
#endif
}

+ 0
- 247
benchmark/cholesky.c View File

@@ -1,247 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "bench.h"

double fabs(double);

#undef POTRF

#ifndef COMPLEX
#ifdef XDOUBLE
#define POTRF BLASFUNC(qpotrf)
#define SYRK BLASFUNC(qsyrk)
#elif defined(DOUBLE)
#define POTRF BLASFUNC(dpotrf)
#define SYRK BLASFUNC(dsyrk)
#else
#define POTRF BLASFUNC(spotrf)
#define SYRK BLASFUNC(ssyrk)
#endif
#else
#ifdef XDOUBLE
#define POTRF BLASFUNC(xpotrf)
#define SYRK BLASFUNC(xherk)
#elif defined(DOUBLE)
#define POTRF BLASFUNC(zpotrf)
#define SYRK BLASFUNC(zherk)
#else
#define POTRF BLASFUNC(cpotrf)
#define SYRK BLASFUNC(cherk)
#endif
#endif

static __inline double getmflops(int ratio, int m, double secs){

double mm = (double)m;
double mulflops, addflops;

if (secs==0.) return 0.;

mulflops = mm * (1./3. + mm * (1./2. + mm * 1./6.));
addflops = 1./6. * mm * (mm * mm - 1);

if (ratio == 1) {
return (mulflops + addflops) / secs * 1.e-6;
} else {
return (2. * mulflops + 6. * addflops) / secs * 1.e-6;
}
}


int main(int argc, char *argv[]){

#ifndef COMPLEX
char *trans[] = {"T", "N"};
#else
char *trans[] = {"C", "N"};
#endif
char *uplo[] = {"U", "L"};
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};

FLOAT *a, *b;

blasint m, i, j, info, uplos;

int from = 1;
int to = 200;
int step = 1;

FLOAT maxerr;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

for(m = from; m <= to; m += step){

fprintf(stderr, "M = %6d : ", (int)m);

for (uplos = 0; uplos < 2; uplos ++) {

#ifndef COMPLEX
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = 0.;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) a[(long)i + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[(long)j + (long)j * (long)m] = ((double) rand() / (double) RAND_MAX) + 8.;
for(i = j + 1; i < m; i++) a[(long)i + (long)j * (long)m] = 0.;
}
}
#else
if (uplos & 1) {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}

a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;

for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}
}
} else {
for (j = 0; j < m; j++) {
for(i = 0; i < j; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) - 0.5;
a[((long)i + (long)j * (long)m) * 2 + 1] = ((double) rand() / (double) RAND_MAX) - 0.5;
}

a[((long)j + (long)j * (long)m) * 2 + 0] = ((double) rand() / (double) RAND_MAX) + 8.;
a[((long)j + (long)j * (long)m) * 2 + 1] = 0.;

for(i = j + 1; i < m; i++) {
a[((long)i + (long)j * (long)m) * 2 + 0] = 0.;
a[((long)i + (long)j * (long)m) * 2 + 1] = 0.;
}
}
}
#endif

SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);

begin();

POTRF(uplo[uplos], &m, b, &m, &info);

end();

if (info != 0) {
fprintf(stderr, "Info = %d\n", info);
exit(1);
}

time1 = getsec();


if (!(uplos & 1)) {
for (j = 0; j < m; j++) {
for(i = 0; i <= j; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
#else
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
#endif
}
}
} else {
for (j = 0; j < m; j++) {
for(i = j; i < m; i++) {
#ifndef COMPLEX
if (maxerr < fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]))
maxerr = fabs(a[(long)i + (long)j * (long)m] - b[(long)i + (long)j * (long)m]);
#else
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 0] - b[((long)i + (long)j * (long)m) * 2 + 0]);
if (maxerr < fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]))
maxerr = fabs(a[((long)i + (long)j * (long)m) * 2 + 1] - b[((long)i + (long)j * (long)m) * 2 + 1]);
#endif
}
}
}

fprintf(stderr,
#ifdef XDOUBLE
" %Le %10.3f MFlops", maxerr,
#else
" %e %10.3f MFlops", maxerr,
#endif
getmflops(COMPSIZE * COMPSIZE, m, time1));

if (maxerr > 1.e-3) {
fprintf(stderr, "Hmm, probably it has bug.\n");
exit(1);
}

}
fprintf(stderr, "\n");

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 123
benchmark/copy.c View File

@@ -1,123 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef COPY

#ifdef COMPLEX
#ifdef DOUBLE
#define COPY BLASFUNC(zcopy)
#else
#define COPY BLASFUNC(ccopy)
#endif
#else
#ifdef DOUBLE
#define COPY BLASFUNC(dcopy)
#else
#define COPY BLASFUNC(scopy)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT alpha[2] = { 2.0, 2.0 };
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1 = 0.0, timeg = 0.0;
long nanos = 0;
time_t seconds = 0;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for (l=0; l<loops; l++)
{
begin();
COPY (&m, x, &inc_x, y, &inc_y );
end();
timeg += getsec();
}

timeg /= loops;

fprintf(stderr,
" %10.2f MBytes %12.9f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 28
benchmark/cula_wrapper.c View File

@@ -1,28 +0,0 @@
#include <stdio.h>
#include "culapack.h"

static int initialized = 0;

int sgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info) {

if (!initialized) {
culaInitialize();
initialized = 1;
}

*info = culaSgetrf(*m, *m, a, *lda, ipiv);

return 0;
}

int cgetrf_(int *m, int *n, float *a, int *lda, int *ipiv, int *info) {

if (!initialized) {
culaInitialize();
initialized = 1;
}

*info = culaCgetrf(*m, *m, (culaFloatComplex *)a, *lda, ipiv);

return 0;
}

+ 0
- 118
benchmark/dot.c View File

@@ -1,118 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef DOT

#ifdef DOUBLE
#define DOT BLASFUNC(ddot)
#else
#define DOT BLASFUNC(sdot)
#endif

int main(int argc, char *argv[]){

FLOAT *x, *y;
FLOAT result;
blasint m, i;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();

result = DOT (&m, x, &inc_x, y, &inc_y );

end();
timeg += getsec();

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 189
benchmark/geev.c View File

@@ -1,189 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "bench.h"

#undef GEEV

#ifndef COMPLEX
#ifdef XDOUBLE
#define GEEV BLASFUNC(qgeev)
#elif defined(DOUBLE)
#define GEEV BLASFUNC(dgeev)
#else
#define GEEV BLASFUNC(sgeev)
#endif
#else
#ifdef XDOUBLE
#define GEEV BLASFUNC(xgeev)
#elif defined(DOUBLE)
#define GEEV BLASFUNC(zgeev)
#else
#define GEEV BLASFUNC(cgeev)
#endif
#endif

#ifndef COMPLEX
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
blasint* lda, FLOAT* wr, FLOAT* wi, FLOAT* vl, blasint* ldvl,
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, blasint* info );
#else
extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
blasint* lda, FLOAT* wr, FLOAT* vl, blasint* ldvl,
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
#endif

int main(int argc, char *argv[]){

FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
FLOAT wkopt[4];
char job='V';
char jobr='N';
char *p;

blasint m, i, j, info,lwork;
double factor = 26.33;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_JOB"))) job=*p;

if ( job == 'N' ) factor = 10.0;

fprintf(stderr, "From : %3d To : %3d Step = %3d Job=%c\n", from, to, step,job);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( vl = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( vr = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( wr = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( wi = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( rwork = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}


lwork = -1;
m=to;
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
#endif

lwork = (blasint)wkopt[0];
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}


#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE FLops Time Lwork\n");

for(m = from; m <= to; m += step){

fprintf(stderr, " %6d : ", (int)m);
begin();

lwork = -1;
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info);
#endif

lwork = (blasint)wkopt[0];
#ifndef COMPLEX
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info);
#else
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
#endif

end();

if (info) {
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
exit(1);
}

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",
COMPSIZE * COMPSIZE * factor * (double)m * (double)m * (double)m / time1 * 1.e-6,time1,lwork);


}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 197
benchmark/gemm.c View File

@@ -1,197 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef GEMM

#ifndef COMPLEX

#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#elif defined(BFLOAT16) && defined(BGEMM)
#define GEMM BLASFUNC(bgemm)
#elif defined(BFLOAT16)
#define GEMM BLASFUNC(sbgemm)
#undef IFLOAT
#define IFLOAT bfloat16
#elif defined(HFLOAT16)
#define GEMM BLASFUNC(shgemm)
#undef IFLOAT
#define IFLOAT hfloat16
#else
#define GEMM BLASFUNC(sgemm)
#define IFLOAT float
#endif

#else

#ifdef DOUBLE
#define GEMM BLASFUNC(zgemm)
#else
#define GEMM BLASFUNC(cgemm)
#endif

#endif

int main(int argc, char *argv[]){

IFLOAT *a, *b;
FLOAT *c;
#ifdef BGEMM
blasint one=1;
blasint two=2;
float alpha_in[] = {1.0, 0.0};
float beta_in[] = {0.0, 0.0};
FLOAT alpha[2], beta[2];
sbstobf16_(&two, alpha_in, &one, alpha, &one);
sbstobf16_(&two, beta_in, &one, beta, &one);
#else
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};
#endif
char transa = 'N';
char transb = 'N';
blasint m, n, k, i, j, lda, ldb, ldc;
int loops = 1;
int has_param_m = 0;
int has_param_n = 0;
int has_param_k = 0;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1, timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++; }
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
if (argc > 0) { step = atol(*argv); argc--; argv++; }

if ((p = getenv("OPENBLAS_TRANS"))) {
transa=*p;
transb=*p;
}
if ((p = getenv("OPENBLAS_TRANSA"))) {
transa=*p;
}
if ((p = getenv("OPENBLAS_TRANSB"))) {
transb=*p;
}
TOUPPER(transa);
TOUPPER(transb);

fprintf(stderr, "From : %3d To : %3d Step=%d : Transa=%c : Transb=%c\n", from, to, step, transa, transb);

p = getenv("OPENBLAS_LOOPS");
if ( p != NULL ) {
loops = atoi(p);
}

if ((p = getenv("OPENBLAS_PARAM_M"))) {
m = atoi(p);
has_param_m=1;
} else {
m = to;
}
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
has_param_n=1;
} else {
n = to;
}
if ((p = getenv("OPENBLAS_PARAM_K"))) {
k = atoi(p);
has_param_k=1;
} else {
k = to;
}

if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * m * k * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (IFLOAT *)malloc(sizeof(IFLOAT) * k * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( c = (FLOAT *)malloc(sizeof(FLOAT) * m * n * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

for (i = 0; i < m * k * COMPSIZE; i++) {
a[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < k * n * COMPSIZE; i++) {
b[i] = ((IFLOAT) rand() / (IFLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < m * n * COMPSIZE; i++) {
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

fprintf(stderr, " SIZE Flops Time\n");

for (i = from; i <= to; i += step) {
timeg=0;

if (!has_param_m) { m = i; }
if (!has_param_n) { n = i; }
if (!has_param_k) { k = i; }

if (transa == 'N') { lda = m; }
else { lda = k; }
if (transb == 'N') { ldb = k; }
else { ldb = n; }
ldc = m;

fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
begin();

for (j=0; j<loops; j++) {
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
}

end();
time1 = getsec();

timeg = time1/loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)k * (double)m * (double)n / timeg * 1.e-6, time1);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 136
benchmark/gemm3m.c View File

@@ -1,136 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef GEMM

#ifndef COMPLEX

#ifdef DOUBLE
#define GEMM BLASFUNC(dgemm)
#else
#define GEMM BLASFUNC(sgemm)
#endif

#else

#ifdef DOUBLE
#define GEMM BLASFUNC(zgemm3m)
#else
#define GEMM BLASFUNC(cgemm3m)
#endif

#endif

int main(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char trans='N';
blasint m, i, j;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c\n", from, to, step, trans);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

p = getenv("OPENBLAS_LOOPS");
if ( p != NULL )
loops = atoi(p);


#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);

for (l=0; l<loops; l++)
{
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

begin();

GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

end();
timeg += getsec();
}

timeg /= loops;
fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / timeg * 1.e-6);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 214
benchmark/gemv.c View File

@@ -1,214 +0,0 @@
/***************************************************************************
Copyright (c) 2014, 2025 The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"


#undef GEMV

#ifndef COMPLEX

#ifdef DOUBLE
#define GEMV BLASFUNC(dgemv)
#elif defined(BFLOAT16) && defined(BGEMM)
#define GEMV BLASFUNC(bgemv)
#elif defined(BFLOAT16)
#define GEMV BLASFUNC(sbgemv)
#undef IFLOAT
#define IFLOAT bfloat16
#else
#define GEMV BLASFUNC(sgemv)
#endif

#else

#ifdef DOUBLE
#define GEMV BLASFUNC(zgemv)
#else
#define GEMV BLASFUNC(cgemv)
#endif

#endif
int main(int argc, char *argv[]){

IFLOAT *a, *x;
FLOAT *y;
#ifdef BGEMM
blasint one=1;
blasint two=2;
float alpha_in[] = {1.0, 0.0};
float beta_in[] = {0.0, 0.0};
FLOAT alpha[2], beta[2];
sbstobf16_(&two, alpha_in, &one, alpha, &one);
sbstobf16_(&two, beta_in, &one, beta, &one);
#else
FLOAT alpha[] = {1.0, 0.0};
FLOAT beta [] = {0.0, 0.0};
#endif
char trans='N';
blasint m, i, j;
blasint inc_x=1,inc_y=1;
blasint n=0;
int has_param_n = 0;
int has_param_m = 0;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}


int tomax = to;

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
if ((n>0)) has_param_n = 1;
if ( n > tomax ) tomax = n;
}
if ( has_param_n == 0 )
if ((p = getenv("OPENBLAS_PARAM_M"))) {
m = atoi(p);
if ((m>0)) has_param_m = 1;
if ( m > tomax ) tomax = m;
}



fprintf(stderr, "From : %3d To : %3d Step = %3d Trans = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,trans,inc_x,inc_y,loops);

if (( a = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * tomax * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( x = (IFLOAT *)malloc(sizeof(IFLOAT) * tomax * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * tomax * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

if (has_param_m == 0)
{

for(m = from; m <= to; m += step)
{
timeg=0;
if ( has_param_n == 0 ) n = m;
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for (l=0; l<loops; l++)
{

for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
x[i] = ((IFLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
timeg += time1;

}

timeg /= loops;

fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);

}
}
else
{

for(n = from; n <= to; n += step)
{
timeg=0;
fprintf(stderr, " %6dx%d : ", (int)m,(int)n);
for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for (l=0; l<loops; l++)
{

for(i = 0; i < n * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
end();
time1 = getsec();
timeg += time1;

}

timeg /= loops;

fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg);

}
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 149
benchmark/ger.c View File

@@ -1,149 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef GER

#ifdef COMPLEX
#ifdef DOUBLE
#define GER BLASFUNC(zgeru)
#else
#define GER BLASFUNC(cgeru)
#endif
#else
#ifdef DOUBLE
#define GER BLASFUNC(dger)
#else
#define GER BLASFUNC(sger)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
blasint m, i, j;
blasint inc_x=1,inc_y=1;
blasint n=0;
int has_param_n = 0;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_PARAM_N"))) {
n = atoi(p);
if ((n>0) && (n<=to)) has_param_n = 1;
}

if ( has_param_n == 1 )
fprintf(stderr, "From : %3d To : %3d Step = %3d N = %d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,n,inc_x,inc_y,loops);
else
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

if ( has_param_n == 0 ) n = m;

fprintf(stderr, " %6dx%d : ", (int)m,(int)n);

for(j = 0; j < m; j++){
for(i = 0; i < n * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}


for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < n * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for (l=0; l<loops; l++)
{

begin();

GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);

end();
timeg += getsec();
}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));


+ 0
- 143
benchmark/gesv.c View File

@@ -1,143 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "bench.h"

double fabs(double);

#undef GESV
#undef GETRS

#ifndef COMPLEX
#ifdef XDOUBLE
#define GESV BLASFUNC(qgesv)
#elif defined(DOUBLE)
#define GESV BLASFUNC(dgesv)
#else
#define GESV BLASFUNC(sgesv)
#endif
#else
#ifdef XDOUBLE
#define GESV BLASFUNC(xgesv)
#elif defined(DOUBLE)
#define GESV BLASFUNC(zgesv)
#else
#define GESV BLASFUNC(cgesv)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *a, *b;
blasint *ipiv;

blasint m, i, j, info;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops Time\n");

for(m = from; m <= to; m += step){

fprintf(stderr, " %dx%d : ", (int)m, (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
b[(long)i + (long)j * (long)m * COMPSIZE] = 0.0;
}
}


for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
}
}

begin();

GESV (&m, &m, a, &m, ipiv, b, &m, &info);

end();

time1 = getsec();

fprintf(stderr,
"%10.2f MFlops %10.6f s\n",
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 179
benchmark/getri.c View File

@@ -1,179 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "bench.h"

#undef GETRF
#undef GETRI

#ifndef COMPLEX
#ifdef XDOUBLE
#define GETRF BLASFUNC(qgetrf)
#define GETRI BLASFUNC(qgetri)
#elif defined(DOUBLE)
#define GETRF BLASFUNC(dgetrf)
#define GETRI BLASFUNC(dgetri)
#else
#define GETRF BLASFUNC(sgetrf)
#define GETRI BLASFUNC(sgetri)
#endif
#else
#ifdef XDOUBLE
#define GETRF BLASFUNC(xgetrf)
#define GETRI BLASFUNC(xgetri)
#elif defined(DOUBLE)
#define GETRF BLASFUNC(zgetrf)
#define GETRI BLASFUNC(zgetri)
#else
#define GETRF BLASFUNC(cgetrf)
#define GETRI BLASFUNC(cgetri)
#endif
#endif

extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);

int main(int argc, char *argv[]){

FLOAT *a,*work;
FLOAT wkopt[4];
blasint *ipiv;
blasint m, i, j, l, info,lwork;

int from = 1;
int to = 200;
int step = 1;
int loops = 1;

double time1,timeg;
char *p;
char btest = 'I';

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_TEST"))) btest=*p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



for(j = 0; j < to; j++){
for(i = 0; i < to * COMPSIZE; i++){
a[(long)i + (long)j * (long)to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}


lwork = -1;
m=to;

GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);

lwork = (blasint)wkopt[0];
if (( work = (FLOAT *)malloc(sizeof(FLOAT) * lwork * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}


#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE FLops Time Lwork\n");

for(m = from; m <= to; m += step){
timeg = 0.;
fprintf(stderr, " %6d : ", (int)m);

for (l = 0; l < loops; l++) {

if (btest == 'F') begin();
GETRF (&m, &m, a, &m, ipiv, &info);
if (btest == 'F') {
end();
timeg += getsec();
}
if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}

if (btest == 'I') begin();

lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);

lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info);
if (btest == 'I') end();

if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1);
}

if (btest == 'I')
timeg += getsec();
} // loops
time1 = timeg/(double)loops;
fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n",
COMPSIZE * COMPSIZE * (4.0/3.0 * (double)m * (double)m *(double)m - (double)m *(double)m + 5.0/3.0* (double)m) / time1 * 1.e-6,time1,lwork);


}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 134
benchmark/hbmv.c View File

@@ -1,134 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HBMV

#ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv)
#else
#define HBMV BLASFUNC(chbmv)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {0.0, 0.0};
blasint k = 1;
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_K"))) k = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' k = %d Inc_x = %d Inc_y = %d Loops = %d\n",
from, to, step, uplo, k, inc_x, inc_y, loops);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step) {

timeg=0;

fprintf(stderr, " %6dx%d : ", (int)m, (int)m);

for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for (l = 0; l < loops; l++) {

for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );

end();

timeg += getsec();

}

timeg /= loops;

fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)(2 * k + 1) * (double)m / timeg * 1.e-6);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 117
benchmark/hemm.c View File

@@ -1,117 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HEMM

#ifdef DOUBLE
#define HEMM BLASFUNC(zhemm)
#else
#define HEMM BLASFUNC(chemm)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char side='L';
char uplo='U';

if ((p = getenv("OPENBLAS_SIDE"))) side=*p;
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c\n", from, to, step,side,uplo);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

begin();

HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

end();

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 134
benchmark/hemv.c View File

@@ -1,134 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HEMV

#ifdef DOUBLE
#define HEMV BLASFUNC(zhemv)
#else
#define HEMV BLASFUNC(chemv)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1,inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6dx%d : ", (int)m,(int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
begin();

HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 109
benchmark/her.c View File

@@ -1,109 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HER

#ifdef DOUBLE
#define HER BLASFUNC(zher)
#else
#define HER BLASFUNC(cher)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x;
FLOAT alpha[] = {1.0, 1.0};
blasint incx = 1;
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;
double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

HER (&uplo, &m, alpha, x, &incx, a, &m );

end();

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

+ 0
- 113
benchmark/her2.c View File

@@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2020, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HER2

#ifdef DOUBLE
#define HER2 BLASFUNC(zher2)
#else
#define HER2 BLASFUNC(cher2)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
blasint inc = 1;
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{
fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );

end();

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

+ 0
- 116
benchmark/her2k.c View File

@@ -1,116 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HER2K
#ifdef DOUBLE
#define HER2K BLASFUNC(zher2k)
#else
#define HER2K BLASFUNC(cher2k)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *b, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
b[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

begin();

HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );

end();

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)m / time1 * 1.e-6);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 112
benchmark/herk.c View File

@@ -1,112 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HERK

#ifdef DOUBLE
#define HERK BLASFUNC(zherk)
#else
#define HERK BLASFUNC(cherk)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *c;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char *p;

char uplo='U';
char trans='N';

if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p;

blasint m, i, j;

int from = 1;
int to = 200;
int step = 1;

double time1;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = %c Trans = %c\n", from, to, step,uplo,trans);


if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( c = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}



#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

fprintf(stderr, " %6d : ", (int)m);

for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
c[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

begin();

HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );

end();

time1 = getsec();

fprintf(stderr,
" %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 133
benchmark/hpmv.c View File

@@ -1,133 +0,0 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef HPMV

#ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv)
#else
#define HPMV BLASFUNC(chpmv)
#endif

int main(int argc, char *argv[]){

FLOAT *a, *x, *y;
FLOAT alpha[] = {1.0, 1.0};
FLOAT beta [] = {1.0, 1.0};
char uplo='L';
blasint m, i, j;
blasint inc_x=1, inc_y=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p);
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p;

fprintf(stderr, "From : %3d To : %3d Step = %3d Uplo = '%c' Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,uplo,inc_x,inc_y,loops);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");
exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step) {

timeg=0;

fprintf(stderr, " %6dx%d : ", (int)m, (int)m);

for(j = 0; j < m; j++) {
for(i = 0; i < m * COMPSIZE; i++) {
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for (l = 0; l < loops; l++) {

for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) {
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

for (i = 0; i < m * COMPSIZE * abs(inc_y); i++) {
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr, " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / timeg * 1.e-6);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 120
benchmark/iamax.c View File

@@ -1,120 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef IAMAX

#ifdef COMPLEX
#ifdef DOUBLE
#define IAMAX BLASFUNC(izamax)
#else
#define IAMAX BLASFUNC(icamax)
#endif
#else
#ifdef DOUBLE
#define IAMAX BLASFUNC(idamax)
#else
#define IAMAX BLASFUNC(isamax)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

IAMAX (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 120
benchmark/iamin.c View File

@@ -1,120 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef IAMIN

#ifdef COMPLEX
#ifdef DOUBLE
#define IAMIN BLASFUNC(izamin)
#else
#define IAMIN BLASFUNC(icamin)
#endif
#else
#ifdef DOUBLE
#define IAMIN BLASFUNC(idamin)
#else
#define IAMIN BLASFUNC(isamin)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

IAMIN (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);

}
return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 114
benchmark/imax.c View File

@@ -1,114 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef IMAX

#ifndef COMPLEX
#ifdef DOUBLE
#define IMAX BLASFUNC(idmax)
#else
#define IMAX BLASFUNC(ismax)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

IMAX (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 114
benchmark/imin.c View File

@@ -1,114 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef IMIN

#ifndef COMPLEX
#ifdef DOUBLE
#define IMIN BLASFUNC(idmin)
#else
#define IMIN BLASFUNC(ismin)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

IMIN (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 208
benchmark/linpack.c View File

@@ -1,208 +0,0 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include "bench.h"

double fabs(double);

#undef GETRF
#undef GETRS

#ifndef COMPLEX
#ifdef XDOUBLE
#define GETRF BLASFUNC(qgetrf)
#define GETRS BLASFUNC(qgetrs)
#elif defined(DOUBLE)
#define GETRF BLASFUNC(dgetrf)
#define GETRS BLASFUNC(dgetrs)
#else
#define GETRF BLASFUNC(sgetrf)
#define GETRS BLASFUNC(sgetrs)
#endif
#else
#ifdef XDOUBLE
#define GETRF BLASFUNC(xgetrf)
#define GETRS BLASFUNC(xgetrs)
#elif defined(DOUBLE)
#define GETRF BLASFUNC(zgetrf)
#define GETRS BLASFUNC(zgetrs)
#else
#define GETRF BLASFUNC(cgetrf)
#define GETRS BLASFUNC(cgetrs)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *a, *b;
blasint *ipiv;

blasint m, i, j, l, info;
blasint unit = 1;

int from = 1;
int to = 200;
int step = 1;
int loops = 1;

FLOAT maxerr;

double time1, time2, timeg1,timeg2;

char *p;
if ((p = getenv("OPENBLAS_LOOPS"))) loops=atoi(p);
argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step);

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Residual Decompose Solve Total\n");

for(m = from; m <= to; m += step){
timeg1 = timeg2 = 0.;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++) {
for(j = 0; j < m; j++){
for(i = 0; i < m * COMPSIZE; i++){
a[(long)i + (long)j * (long)m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
}

for (i = 0; i < m * COMPSIZE; ++i) b[i] = 0.;

for (j = 0; j < m; ++j) {
for (i = 0; i < m * COMPSIZE; ++i) {
b[i] += a[(long)i + (long)j * (long)m * COMPSIZE];
}
}

begin();

GETRF (&m, &m, a, &m, ipiv, &info);

end();

if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}

timeg1 += getsec();

begin();

GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);

end();

if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1);
}

timeg2 += getsec();
} //loops
time1=timeg1/(double)loops;
time2=timeg2/(double)loops;
maxerr = 0.;

for(i = 0; i < m; i++){
#ifndef XDOUBLE
if (maxerr < fabs(b[i * COMPSIZE] - 1.0)) maxerr = fabs(b[i * COMPSIZE] - 1.0);
#ifdef COMPLEX
if (maxerr < fabs(b[i * COMPSIZE] + 1)) maxerr = fabs(b[i * COMPSIZE + 1]);
#endif
#else
if (maxerr < fabsl(b[i * COMPSIZE] - 1.0L)) maxerr = fabsl(b[i * COMPSIZE] - 1.0L);
#ifdef COMPLEX
if (maxerr < fabsl(b[i * COMPSIZE] + 1)) maxerr = fabsl(b[i * COMPSIZE + 1]);
#endif
#endif
}

#ifdef XDOUBLE
fprintf(stderr," %Le ", maxerr);
#else
fprintf(stderr," %e ", maxerr);
#endif

fprintf(stderr,
" %10.2f MFlops %10.2f MFlops %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 2. / 3. * (double)m * (double)m * (double)m / time1 * 1.e-6,
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m / time2 * 1.e-6,
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m) / (time1 + time2) * 1.e-6);

#if 0
if (
#ifdef DOUBLE
maxerr > 1.e-8
#else
maxerr > 1.e-1
#endif
) {
fprintf(stderr, "Error is too large.\n");
exit(1);
}
#endif

}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 113
benchmark/max.c View File

@@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef NAMAX

#ifndef COMPLEX
#ifdef DOUBLE
#define NAMAX BLASFUNC(dmax)
#else
#define NAMAX BLASFUNC(smax)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

NAMAX (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 113
benchmark/min.c View File

@@ -1,113 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef NAMIN

#ifndef COMPLEX
#ifdef DOUBLE
#define NAMIN BLASFUNC(dmin)
#else
#define NAMIN BLASFUNC(smin)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

NAMIN (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 121
benchmark/nrm2.c View File

@@ -1,121 +0,0 @@
/***************************************************************************
Copyright (c) 2016, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef NRM2

#ifdef COMPLEX
#ifdef DOUBLE
#define NRM2 BLASFUNC(dznrm2)
#else
#define NRM2 BLASFUNC(scnrm2)
#endif
#else
#ifdef DOUBLE
#define NRM2 BLASFUNC(dnrm2)
#else
#define NRM2 BLASFUNC(snrm2)
#endif
#endif

int main(int argc, char *argv[]){

FLOAT *x;
blasint m, i;
blasint inc_x=1;
int loops = 1;
int l;
char *p;

int from = 1;
int to = 200;
int step = 1;

double time1,timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}

if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);

fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);

if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

fprintf(stderr, " SIZE Flops\n");

for(m = from; m <= to; m += step)
{

timeg=0;

fprintf(stderr, " %6d : ", (int)m);


for (l=0; l<loops; l++)
{

for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

begin();

NRM2 (&m, x, &inc_x);

end();

time1 = getsec();

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg);


}

return 0;
}

// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__")));

+ 0
- 122
benchmark/omatcopy.c View File

@@ -1,122 +0,0 @@
/***************************************************************************
Copyright (c) 2024, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

#include "bench.h"

#undef OMATCOPY

#ifndef COMPLEX
#ifdef DOUBLE
#define OMATCOPY BLASFUNC(domatcopy)
#else
#define OMATCOPY BLASFUNC(somatcopy)
#endif
#else
#ifdef DOUBLE
#define OMATCOPY BLASFUNC(zomatcopy)
#else
#define OMATCOPY BLASFUNC(comatcopy)
#endif
#endif
int main(int argc, char *argv[]){
FLOAT *a, *b;
FLOAT alpha[] = {1.0, 0.0};
char trans = 'N';
char order = 'C';
blasint crows, ccols, clda, cldb;
int loops = 1;
char *p;

int from = 1;
int to = 200;
int step = 1;
int i, j;

double time1, timeg;

argc--;argv++;

if (argc > 0) { from = atol(*argv); argc--; argv++; }
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++; }
if (argc > 0) { step = atol(*argv); argc--; argv++; }

if ((p = getenv("OPENBLAS_TRANS"))) {
trans=*p;
}
if ((p = getenv("OPENBLAS_ORDER"))) {
order=*p;
}
TOUPPER(trans);
TOUPPER(order);
fprintf(stderr, "From : %3d To : %3d Step=%d : Trans=%c : Order=%c\n", from, to, step, trans, order);
p = getenv("OPENBLAS_LOOPS");
if ( p != NULL ) {
loops = atoi(p);
}

if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL) {
fprintf(stderr,"Out of Memory!!\n");exit(1);
}

#ifdef __linux
srandom(getpid());
#endif

for (i = 0; i < to * to * COMPSIZE; i++) {
a[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
for (i = 0; i < to * to * COMPSIZE; i++) {
b[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}

fprintf(stderr, " SIZE Flops Time\n");
for (i = from; i <= to; i += step) {
cldb = clda = crows = ccols = i;
fprintf(stderr, " ROWS=%4d, COLS=%4d : ", (int)crows, (int)ccols);
begin();

for (j=0; j<loops; j++) {
OMATCOPY (&order, &trans, &crows, &ccols, alpha, a, &clda, b, &cldb);
}

end();
time1 = getsec();

timeg = time1/loops;
fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * COMPSIZE * (double)ccols * (double)crows / timeg * 1.e-6, time1);
}

free(a);
free(b);

return 0;
}

+ 0
- 65
benchmark/plot-filter.sh View File

@@ -1,65 +0,0 @@
#!/bin/sh
# **********************************************************************************
# Copyright (c) 2014, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# **********************************************************************************

# ************************************************************************
# sample filter for data output from benchmark programs
#
# usage example:
# ./dgemm.goto 2>&1|./plotfilter.sh >OpenBLAS
# ************************************************************************

if [ $# -eq 1 ]
then
arg1=$1
else
arg1=0
fi

case $arg1 in

L)
# Linpack Benchmark
awk '/MFlops/ { print $1,int($8) }'|tail --lines=+2
;;

C)
# Cholesky Benchmark
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2
;;

B)
# Copy Benchmark
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2
;;


*)
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2
;;
esac


+ 0
- 42
benchmark/plot-header View File

@@ -1,42 +0,0 @@
# **********************************************************************************
# Copyright (c) 2014, The OpenBLAS Project
# All rights reserved.
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# 3. Neither the name of the OpenBLAS project nor the names of
# its contributors may be used to endorse or promote products
# derived from this software without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
# USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# **********************************************************************************

set term x11 font sans;
set ylabel "MFlops";
set xlabel "Size";
set grid xtics;
set grid ytics;
set key left;
set timestamp "generated on %Y-%m-%d by `whoami`"
set title "Dtrsm\nUPLO=U TRANS=N SIDE=L\nBulldozer 1 Thread"
plot 'OpenBLAS' smooth bezier, 'ACML' smooth bezier, 'MKL' smooth bezier;
set output "print.png";
show title;
show plot;
show output;



Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save