| @@ -33,10 +33,8 @@ jobs: | |||
| - name: Install APT deps | |||
| run: | | |||
| sudo add-apt-repository ppa:savoury1/virtualisation | |||
| sudo apt-get update | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | |||
| qemu-user-static | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build make ccache | |||
| - name: Download and install loongarch64-toolchain | |||
| run: | | |||
| @@ -44,6 +42,20 @@ jobs: | |||
| #wget https://github.com/loongson/build-tools/releases/download/2023.08.08/CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz | |||
| tar -xf CLFS-loongarch64-8.1-x86_64-cross-tools-gcc-glibc.tar.xz -C /opt | |||
| - name: Checkout qemu | |||
| uses: actions/checkout@v3 | |||
| with: | |||
| repository: qemu/qemu | |||
| path: qemu | |||
| ref: master | |||
| - name: Install qemu | |||
| run: | | |||
| cd qemu | |||
| ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static | |||
| make -j$(nproc) | |||
| make install | |||
| - name: Set env | |||
| run: | | |||
| echo "LD_LIBRARY_PATH=/opt/cross-tools/target/usr/lib64:/opt/cross-tools/loongarch64-unknown-linux-gnu/lib64:$LD_LIBRARY_PATH" >> $GITHUB_ENV | |||
| @@ -76,45 +88,46 @@ jobs: | |||
| - name: Test | |||
| run: | | |||
| qemu-loongarch64-static ./utest/openblas_utest | |||
| qemu-loongarch64-static ./utest/openblas_utest_ext | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
| qemu-loongarch64 ./utest/openblas_utest | |||
| qemu-loongarch64 ./utest/openblas_utest_ext | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
| @@ -34,18 +34,30 @@ jobs: | |||
| - name: Install APT deps | |||
| run: | | |||
| sudo add-apt-repository ppa:savoury1/virtualisation | |||
| sudo apt-get update | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build make ccache \ | |||
| qemu-user-static | |||
| sudo apt-get install autoconf automake autotools-dev ninja-build make ccache | |||
| - name: Download and install loongarch64-toolchain | |||
| run: | | |||
| wget http://ftp.loongnix.cn/toolchain/llvm/llvm8/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz | |||
| wget http://ftp.loongnix.cn/toolchain/gcc/release/loongarch/gcc8/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz | |||
| wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz | |||
| wget https://github.com/XiWeiGu/loongarch64_toolchain/releases/download/V0.1/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz | |||
| tar -xf clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10.tar.gz -C /opt | |||
| tar -xf loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3.tar.xz -C /opt | |||
| - name: Checkout qemu | |||
| uses: actions/checkout@v3 | |||
| with: | |||
| repository: qemu/qemu | |||
| path: qemu | |||
| ref: master | |||
| - name: Install qemu | |||
| run: | | |||
| cd qemu | |||
| ./configure --prefix=$GITHUB_WORKSPACE/qemu-install --target-list=loongarch64-linux-user --disable-system --static | |||
| make -j$(nproc) | |||
| make install | |||
| - name: Set env | |||
| run: | | |||
| echo "PATH=$GITHUB_WORKSPACE:/opt/clang+llvm_8.0.1-6_amd64-linux-gnu_debian-10/bin:/opt/loongson-gnu-toolchain-8.3-x86_64-loongarch64-linux-gnu-rc1.3/bin:$PATH" >> $GITHUB_ENV | |||
| @@ -77,46 +89,47 @@ jobs: | |||
| - name: Test | |||
| run: | | |||
| qemu-loongarch64-static ./utest/openblas_utest | |||
| qemu-loongarch64-static ./utest/openblas_utest_ext | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat2 < ./ctest/sin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat2 < ./ctest/din2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat2 < ./ctest/cin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat2 < ./ctest/zin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xscblat3 < ./ctest/sin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xdcblat3 < ./ctest/din3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xccblat3 < ./ctest/cin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./ctest/xzcblat3 < ./ctest/zin3 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat1 | |||
| export PATH=$GITHUB_WORKSPACE/qemu-install/bin/:$PATH | |||
| qemu-loongarch64 ./utest/openblas_utest | |||
| qemu-loongarch64 ./utest/openblas_utest_ext | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat2 < ./ctest/sin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat2 < ./ctest/din2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat2 < ./ctest/cin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat2 < ./ctest/zin2 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xscblat3 < ./ctest/sin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xdcblat3 < ./ctest/din3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xccblat3 < ./ctest/cin3 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./ctest/xzcblat3 < ./ctest/zin3 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat1 | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat1 | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT2.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat2 < ./test/zblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat2 < ./test/sblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat2 < ./test/dblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat2 < ./test/cblat2.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat2 < ./test/zblat2.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=1 OMP_NUM_THREADS=1 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
| rm -f ./test/?BLAT3.SUMM | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64-static ./test/zblat3 < ./test/zblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/sblat3 < ./test/sblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/dblat3 < ./test/dblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/cblat3 < ./test/cblat3.dat | |||
| OPENBLAS_NUM_THREADS=2 qemu-loongarch64 ./test/zblat3 < ./test/zblat3.dat | |||
| @@ -6,6 +6,7 @@ include(CheckCCompilerFlag) | |||
| if (${CMAKE_C_COMPILER_ID} MATCHES "IntelLLVM") | |||
| set(CCOMMON_OPT "${CCOMMON_OPT} -fp-model=consistent") | |||
| set(GCC_VERSION 100) | |||
| endif () | |||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB" OR ${CMAKE_C_COMPILER_ID} MATCHES "Clang") | |||
| @@ -117,12 +117,12 @@ if (${F_COMPILER} STREQUAL "GFORTRAN" OR ${F_COMPILER} STREQUAL "F95" OR CMAKE_F | |||
| endif () | |||
| endif () | |||
| if (${F_COMPILER} STREQUAL "INTEL") | |||
| if (${F_COMPILER} STREQUAL "INTEL" OR CMAKE_Fortran_COMPILER_ID MATCHES "Intel") | |||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") | |||
| if (INTERFACE64) | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||
| endif () | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -recursive") | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -recursive -fp-model=consistent") | |||
| if (USE_OPENMP) | |||
| set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||
| endif () | |||
| @@ -2525,6 +2525,7 @@ int get_coretype(void){ | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| case 6: | |||
| if (support_avx2()) | |||
| return CORE_ZEN; | |||
| else | |||
| @@ -570,6 +570,8 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||
| InitializeCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||
| #else | |||
| static pthread_mutex_t level3_lock = PTHREAD_MUTEX_INITIALIZER; | |||
| static pthread_cond_t level3_wakeup = PTHREAD_COND_INITIALIZER; | |||
| volatile static BLASLONG CPU_AVAILABLE = MAX_CPU_NUMBER; | |||
| #endif | |||
| blas_arg_t newarg; | |||
| @@ -639,6 +641,12 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||
| EnterCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||
| #else | |||
| pthread_mutex_lock(&level3_lock); | |||
| while(CPU_AVAILABLE < nthreads) { | |||
| pthread_cond_wait(&level3_wakeup, &level3_lock); | |||
| } | |||
| CPU_AVAILABLE -= nthreads; | |||
| WMB; | |||
| pthread_mutex_unlock(&level3_lock); | |||
| #endif | |||
| #ifdef USE_ALLOC_HEAP | |||
| @@ -783,6 +791,10 @@ static int gemm_driver(blas_arg_t *args, BLASLONG *range_m, BLASLONG | |||
| #elif defined(OS_WINDOWS) | |||
| LeaveCriticalSection((PCRITICAL_SECTION)&level3_lock); | |||
| #else | |||
| pthread_mutex_lock(&level3_lock); | |||
| CPU_AVAILABLE += nthreads; | |||
| WMB; | |||
| pthread_cond_signal(&level3_wakeup); | |||
| pthread_mutex_unlock(&level3_lock); | |||
| #endif | |||
| @@ -927,6 +927,7 @@ static gotoblas_t *get_coretype(void){ | |||
| case 0x7: | |||
| switch (exmodel) { | |||
| case 5: | |||
| case 6: | |||
| if (support_avx2()) | |||
| return &gotoblas_ZEN; | |||
| else | |||
| @@ -137,7 +137,7 @@ endif () | |||
| foreach (float_type ${FLOAT_TYPES}) | |||
| if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||
| GenerateNamedObjects("zaxpy.c" "" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type}) | |||
| GenerateNamedObjects("zaxpy.c" "CONJ" "axpyc" ${CBLAS_FLAG} "" "" false ${float_type}) | |||
| GenerateNamedObjects("zger.c" "" "geru" ${CBLAS_FLAG} "" "" false ${float_type}) | |||
| GenerateNamedObjects("zger.c" "CONJ" "gerc" ${CBLAS_FLAG} "" "" false ${float_type}) | |||
| @@ -108,7 +108,12 @@ dot_kernel_sve(BLASLONG n, FLOAT* x, FLOAT* y) | |||
| [N_] "r" (n), | |||
| [X_] "r" (x), | |||
| [Y_] "r" (y) | |||
| :); | |||
| : "cc", | |||
| "memory", | |||
| "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", | |||
| "x8", "x9", "x10", "x11", "x12", "x13", "d1", | |||
| "z0", "z1" | |||
| ); | |||
| return ret; | |||
| } | |||
| @@ -292,7 +292,10 @@ static void zdot_compute(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLON | |||
| : "cc", | |||
| "memory", | |||
| "x0", "x1", "x2", "x3", "x4", "x5", | |||
| "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" | |||
| "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", | |||
| "v16", "v17", "v18", "v19", "v20", "v21", "v22", | |||
| "v23", "v24", "v25", "v26", "v27", "v28", "v29", | |||
| "v30", "v31" | |||
| ); | |||
| cf=OPENBLAS_MAKE_COMPLEX_FLOAT(dotr, doti); | |||
| @@ -144,10 +144,11 @@ void CNAME(BLASLONG n, FLOAT_TYPE * in, BLASLONG inc_in, bfloat16 * out, BLASLON | |||
| if (inc_in == 0 || inc_out == 0 || n <= 100000) { | |||
| nthreads = 1; | |||
| } else { | |||
| nthreads = num_cpu_avail(1); | |||
| if (n/100000 < 100) { | |||
| nthreads = 4; | |||
| } else { | |||
| nthreads = 16; | |||
| nthreads = MAX(nthreads,4); | |||
| // } else { | |||
| // nthreads = MAX(nthreads,16); | |||
| } | |||
| } | |||
| @@ -2772,7 +2772,7 @@ c LIWEDC = 12 | |||
| RESULT( NTEST ) = ULPINV | |||
| RESULT( NTEST+1 ) = ULPINV | |||
| RESULT( NTEST+2 ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| END IF | |||
| * | |||
| @@ -2797,13 +2797,13 @@ c LIWEDC = 12 | |||
| RETURN | |||
| ELSE | |||
| RESULT( NTEST ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| END IF | |||
| * | |||
| IF( M3.EQ.0 .AND. N.GT.0 ) THEN | |||
| RESULT( NTEST ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| * | |||
| * Do test 78 (or +54) | |||
| @@ -2819,6 +2819,8 @@ c LIWEDC = 12 | |||
| $ MAX( UNFL, TEMP3*ULP ) | |||
| * | |||
| CALL DLACPY( ' ', N, N, V, LDU, A, LDA ) | |||
| * | |||
| 1750 CONTINUE | |||
| * | |||
| 1720 CONTINUE | |||
| * | |||
| @@ -534,8 +534,8 @@ | |||
| $ / ' 2: norm( I - Q'' Q ) / ( m ulp )', | |||
| $ / ' 3: norm( I - PT PT'' ) / ( n ulp )', | |||
| $ / ' 4: norm( Y - Q'' C ) / ( norm(Y) max(m,nrhs) ulp )' ) | |||
| 9968 FORMAT( / ' Tests performed: See sdrvst.f' ) | |||
| 9967 FORMAT( / ' Tests performed: See cdrvst.f' ) | |||
| 9968 FORMAT( / ' Tests performed: See ddrvst.f' ) | |||
| 9967 FORMAT( / ' Tests performed: See zdrvst.f' ) | |||
| * | |||
| * End of DLAHD2 | |||
| * | |||
| @@ -2772,7 +2772,7 @@ c LIWEDC = 12 | |||
| RESULT( NTEST ) = ULPINV | |||
| RESULT( NTEST+1 ) = ULPINV | |||
| RESULT( NTEST+2 ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| END IF | |||
| * | |||
| @@ -2797,13 +2797,13 @@ c LIWEDC = 12 | |||
| RETURN | |||
| ELSE | |||
| RESULT( NTEST ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| END IF | |||
| * | |||
| IF( M3.EQ.0 .AND. N.GT.0 ) THEN | |||
| RESULT( NTEST ) = ULPINV | |||
| GO TO 700 | |||
| GO TO 1750 | |||
| END IF | |||
| * | |||
| * Do test 78 (or +54) | |||
| @@ -2819,6 +2819,8 @@ c LIWEDC = 12 | |||
| $ MAX( UNFL, TEMP3*ULP ) | |||
| * | |||
| CALL SLACPY( ' ', N, N, V, LDU, A, LDA ) | |||
| * | |||
| 1750 CONTINUE | |||
| * | |||
| 1720 CONTINUE | |||
| * | |||
| @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <sys/types.h> | |||
| #include <sys/wait.h> | |||
| #include <errno.h> | |||
| #include <cblas.h> | |||
| #include "openblas_utest.h" | |||
| @@ -41,7 +42,7 @@ static void* xmalloc(size_t n) | |||
| void* tmp; | |||
| tmp = malloc(n); | |||
| if (tmp == NULL) { | |||
| fprintf(stderr, "You are about to die\n"); | |||
| fprintf(stderr, "Failed to allocate memory for the testcase.\n"); | |||
| exit(1); | |||
| } else { | |||
| return tmp; | |||
| @@ -103,6 +104,7 @@ exit(0); | |||
| fork_pid = fork(); | |||
| if (fork_pid == -1) { | |||
| perror("fork"); | |||
| CTEST_ERR("Failed to fork process."); | |||
| } else if (fork_pid == 0) { | |||
| // Compute a DGEMM product in the child process to check that the | |||
| @@ -113,7 +115,8 @@ exit(0); | |||
| // recursively | |||
| fork_pid_nested = fork(); | |||
| if (fork_pid_nested == -1) { | |||
| CTEST_ERR("Failed to fork process."); | |||
| perror("fork"); | |||
| CTEST_ERR("Failed to fork nested process."); | |||
| exit(1); | |||
| } else if (fork_pid_nested == 0) { | |||
| check_dgemm(a, b, d, c, n); | |||
| @@ -33,6 +33,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #include <sys/types.h> | |||
| #include <sys/wait.h> | |||
| #include <errno.h> | |||
| #include <cblas.h> | |||
| #ifdef USE_OPENMP | |||
| #include <omp.h> | |||
| @@ -44,7 +45,7 @@ static void* xmalloc(size_t n) | |||
| void* tmp; | |||
| tmp = malloc(n); | |||
| if (tmp == NULL) { | |||
| fprintf(stderr, "You are about to die\n"); | |||
| fprintf(stderr, "Failed to allocate memory for the test payload.\n"); | |||
| exit(1); | |||
| } else { | |||
| return tmp; | |||
| @@ -114,7 +115,11 @@ exit(0); | |||
| fork_pid = fork(); | |||
| if (fork_pid == -1) { | |||
| CTEST_ERR("Failed to fork process."); | |||
| perror("fork"); | |||
| CTEST_ERR("Failed to fork subprocesses in a loop."); | |||
| #ifdef USE_OPENMP | |||
| CTEST_ERR("Number of OpenMP threads was %d in this attempt.",i); | |||
| #endif | |||
| } else if (fork_pid == 0) { | |||
| // Just pretend to do something, e.g. call `uname`, then exit | |||
| exit(0); | |||