Browse Source

Merge branch 'OpenMathLib:develop' into issue4728

tags/v0.3.28^2
Martin Kroeker GitHub 1 year ago
parent
commit
a2ee4b1966
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
16 changed files with 166 additions and 104 deletions
  1. +54
    -41
      .github/workflows/loongarch64.yml
  2. +56
    -43
      .github/workflows/loongarch64_clang.yml
  3. +1
    -0
      cmake/cc.cmake
  4. +2
    -2
      cmake/fc.cmake
  5. +1
    -0
      cpuid_x86.c
  6. +12
    -0
      driver/level3/level3_thread.c
  7. +1
    -0
      driver/others/dynamic.c
  8. +1
    -1
      interface/CMakeLists.txt
  9. +6
    -1
      kernel/arm64/dot_kernel_sve.c
  10. +4
    -1
      kernel/arm64/zdot_thunderx2t99.c
  11. +4
    -3
      kernel/x86_64/tobf16.c
  12. +5
    -3
      lapack-netlib/TESTING/EIG/ddrvst.f
  13. +2
    -2
      lapack-netlib/TESTING/EIG/dlahd2.f
  14. +5
    -3
      lapack-netlib/TESTING/EIG/sdrvst.f
  15. +5
    -2
      utest/test_fork.c
  16. +7
    -2
      utest/test_post_fork.c

+ 54
- 41
.github/workflows/loongarch64.yml View File

@@ -33,10 +33,8 @@ jobs:

- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache

- name: Download and install loongarch64-toolchain
run: |
@@ -44,6 +42,20 @@ jobs:
#wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz
tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt

- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master

- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install

- name: Set env
run: |
echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV
@@ -76,45 +88,46 @@ jobs:

- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat

+ 56
- 43
.github/workflows/loongarch64_clang.yml View File

@@ -34,18 +34,30 @@ jobs:

- name: Install APT deps
run: |
sudo add-apt-repository ppa:savoury1/virtualisation
sudo apt-get update
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \
qemu-user-static
sudo apt-get install autoconf automake autotools-dev ninja-build make ccache

- name: Download and install loongarch64-toolchain
run: |
wget http://ftp.loongnix.cn/toolchain/llvm/llvm8/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget http://ftp.loongnix.cn/toolchain/gcc/release/loongarch/gcc8/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz
wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz
tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt
tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt

- name: Checkout qemu
uses: actions/checkout@v3
with:
repository: qemu/qemu
path: qemu
ref: master

- name: Install qemu
run: |
cd qemu
./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static
make -j$(nproc)
make install

- name: Set env
run: |
echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV
@@ -77,46 +89,47 @@ jobs:

- name: Test
run: |
qemu-loongarch64-static ./utest/openblas_utest
qemu-loongarch64-static ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1
export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH
qemu-loongarch64 ./utest/openblas_utest
qemu-loongarch64 ./utest/openblas_utest_ext
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT2.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat
rm -f ./test/?BLAT3.SUMM
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat
OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat


+ 1
- 0
cmake/cc.cmake View File

@@ -6,6 +6,7 @@ include(CheckCCompilerFlag)

if (${CMAKE_C_COMPILER_ID} MATCHES "IntelLLVM")
set(CCOMMON_OPT "${CCOMMON_OPT} -fp-model=consistent")
set(GCC_VERSION 100)
endif ()

if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang")


+ 2
- 2
cmake/fc.cmake View File

@@ -117,12 +117,12 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F
endif ()
endif ()

if (${F_COMPILER} STREQUAL "INTEL")
if (${F_COMPILER} STREQUAL "INTEL" OR CMAKE_Fortran_COMPILER_ID MATCHES "Intel")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL")
if (INTERFACE64)
set(FCOMMON_OPT "${FCOMMON_OPT} -i8")
endif ()
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive")
set(FCOMMON_OPT "${FCOMMON_OPT} -recursive -fp-model=consistent")
if (USE_OPENMP)
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp")
endif ()


+ 1
- 0
cpuid_x86.c View File

@@ -2525,6 +2525,7 @@ int get_coretype(void){
case 0x7:
switch (exmodel) {
case 5:
case 6:
if (support_avx2())
return CORE_ZEN;
else


+ 12
- 0
driver/level3/level3_thread.c View File

@@ -570,6 +570,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER;
volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER;
#endif

blas_arg_t newarg;
@@ -639,6 +641,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
EnterCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_lock(&level3_lock);
while(CPU_AVAILABLE < nthreads) {
pthread_cond_wait(&level3_wakeup, &level3_lock);
}
CPU_AVAILABLE -= nthreads;
WMB;
pthread_mutex_unlock(&level3_lock);
#endif

#ifdef USE_ALLOC_HEAP
@@ -783,6 +791,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG
#elif defined(OS_WINDOWS)
LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock);
#else
pthread_mutex_lock(&level3_lock);
CPU_AVAILABLE += nthreads;
WMB;
pthread_cond_signal(&level3_wakeup);
pthread_mutex_unlock(&level3_lock);
#endif



+ 1
- 0
driver/others/dynamic.c View File

@@ -927,6 +927,7 @@ static gotoblas_t *get_coretype(void){
case 0x7:
switch (exmodel) {
case 5:
case 6:
if (support_avx2())
return &gotoblas_ZEN;
else


+ 1
- 1
interface/CMakeLists.txt View File

@@ -137,7 +137,7 @@ endif ()
foreach (float_type ${FLOAT_TYPES})

if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX")
GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zaxpy.c" "CONJ" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type})

GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type})
GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type})


+ 6
- 1
kernel/arm64/dot_kernel_sve.c View File

@@ -108,7 +108,12 @@ dot_kernel_sve(BLASLONG n, FLOAT* x, FLOAT* y)
[N_] "r" (n),
[X_] "r" (x),
[Y_] "r" (y)
:);
: "cc",
"memory",
"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
"x8", "x9", "x10", "x11", "x12", "x13", "d1",
"z0", "z1"
);

return ret;
}

+ 4
- 1
kernel/arm64/zdot_thunderx2t99.c View File

@@ -292,7 +292,10 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON
: "cc",
"memory",
"x0", "x1", "x2", "x3", "x4", "x5",
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"
"d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7",
"v16", "v17", "v18", "v19", "v20", "v21", "v22",
"v23", "v24", "v25", "v26", "v27", "v28", "v29",
"v30", "v31"
);

cf=OPENBLAS_MAKE_COMPLEX_FLOAT(dotr, doti);


+ 4
- 3
kernel/x86_64/tobf16.c View File

@@ -144,10 +144,11 @@ void CNAME(BLASLONG n, FLOAT_TYPE * in, BLASLONG inc_in, bfloat16 * out, BLASLON
if (inc_in == 0 || inc_out == 0 || n <= 100000) {
nthreads = 1;
} else {
nthreads = num_cpu_avail(1);
if (n/100000 < 100) {
nthreads = 4;
} else {
nthreads = 16;
nthreads = MAX(nthreads,4);
// } else {
// nthreads = MAX(nthreads,16);
}
}



+ 5
- 3
lapack-netlib/TESTING/EIG/ddrvst.f View File

@@ -2772,7 +2772,7 @@ c LIWEDC = 12
RESULT( NTEST ) = ULPINV
RESULT( NTEST+1 ) = ULPINV
RESULT( NTEST+2 ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
@@ -2797,13 +2797,13 @@ c LIWEDC = 12
RETURN
ELSE
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
IF( M3.EQ.0 .AND. N.GT.0 ) THEN
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
*
* Do test 78 (or +54)
@@ -2819,6 +2819,8 @@ c LIWEDC = 12
$ MAX( UNFL, TEMP3*ULP )
*
CALL DLACPY( ' ', N, N, V, LDU, A, LDA )
*
1750 CONTINUE
*
1720 CONTINUE
*


+ 2
- 2
lapack-netlib/TESTING/EIG/dlahd2.f View File

@@ -534,8 +534,8 @@
$ / ' 2: norm( I - Q'' Q ) / ( m ulp )',
$ / ' 3: norm( I - PT PT'' ) / ( n ulp )',
$ / ' 4: norm( Y - Q'' C ) / ( norm(Y) max(m,nrhs) ulp )' )
9968 FORMAT( / ' Tests performed: See sdrvst.f' )
9967 FORMAT( / ' Tests performed: See cdrvst.f' )
9968 FORMAT( / ' Tests performed: See ddrvst.f' )
9967 FORMAT( / ' Tests performed: See zdrvst.f' )
*
* End of DLAHD2
*


+ 5
- 3
lapack-netlib/TESTING/EIG/sdrvst.f View File

@@ -2772,7 +2772,7 @@ c LIWEDC = 12
RESULT( NTEST ) = ULPINV
RESULT( NTEST+1 ) = ULPINV
RESULT( NTEST+2 ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
@@ -2797,13 +2797,13 @@ c LIWEDC = 12
RETURN
ELSE
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
END IF
*
IF( M3.EQ.0 .AND. N.GT.0 ) THEN
RESULT( NTEST ) = ULPINV
GO TO 700
GO TO 1750
END IF
*
* Do test 78 (or +54)
@@ -2819,6 +2819,8 @@ c LIWEDC = 12
$ MAX( UNFL, TEMP3*ULP )
*
CALL SLACPY( ' ', N, N, V, LDU, A, LDA )
*
1750 CONTINUE
*
1720 CONTINUE
*


+ 5
- 2
utest/test_fork.c View File

@@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#include <cblas.h>
#include "openblas_utest.h"

@@ -41,7 +42,7 @@ static void* xmalloc(size_t n)
void* tmp;
tmp = malloc(n);
if (tmp == NULL) {
fprintf(stderr, "You are about to die\n");
fprintf(stderr, "Failed to allocate memory for the testcase.\n");
exit(1);
} else {
return tmp;
@@ -103,6 +104,7 @@ exit(0);

fork_pid = fork();
if (fork_pid == -1) {
perror("fork");
CTEST_ERR("Failed to fork process.");
} else if (fork_pid == 0) {
// Compute a DGEMM product in the child process to check that the
@@ -113,7 +115,8 @@ exit(0);
// recursively
fork_pid_nested = fork();
if (fork_pid_nested == -1) {
CTEST_ERR("Failed to fork process.");
perror("fork");
CTEST_ERR("Failed to fork nested process.");
exit(1);
} else if (fork_pid_nested == 0) {
check_dgemm(a, b, d, c, n);


+ 7
- 2
utest/test_post_fork.c View File

@@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <sys/types.h>
#include <sys/wait.h>
#include <errno.h>
#include <cblas.h>
#ifdef USE_OPENMP
#include <omp.h>
@@ -44,7 +45,7 @@ static void* xmalloc(size_t n)
void* tmp;
tmp = malloc(n);
if (tmp == NULL) {
fprintf(stderr, "You are about to die\n");
fprintf(stderr, "Failed to allocate memory for the test payload.\n");
exit(1);
} else {
return tmp;
@@ -114,7 +115,11 @@ exit(0);

fork_pid = fork();
if (fork_pid == -1) {
CTEST_ERR("Failed to fork process.");
perror("fork");
CTEST_ERR("Failed to fork subprocesses in a loop.");
#ifdef USE_OPENMP
CTEST_ERR("Number of OpenMP threads was %d in this attempt.",i);
#endif
} else if (fork_pid == 0) {
// Just pretend to do something, e.g. call `uname`, then exit
exit(0);


Loading…
Cancel
Save