diff --git a/.travis.yml b/.travis.yml index 63b469716..b1a13acd9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,4 +1,119 @@ +# XXX: Precise is already deprecated, new default is Trusty. +# https://blog.travis-ci.com/2017-07-11-trusty-as-default-linux-is-coming +dist: precise +sudo: false language: c +compiler: gcc + +jobs: + include: + - &test-ubuntu + stage: test + addons: + apt: + packages: + - gfortran + before_script: &common-before + - COMMON_FLAGS="DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32" + script: + - set -e + - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + - make -C test $COMMON_FLAGS $BTYPE + - make -C ctest $COMMON_FLAGS $BTYPE + - make -C utest $COMMON_FLAGS $BTYPE + env: + - TARGET_BOX=LINUX64 + - BTYPE="BINARY=64" + + - <<: *test-ubuntu + env: + - TARGET_BOX=LINUX64 + - BTYPE="BINARY=64 USE_OPENMP=1" + + - <<: *test-ubuntu + env: + - TARGET_BOX=LINUX64 + - BTYPE="BINARY=64 INTERFACE64=1" + + - <<: *test-ubuntu + addons: + apt: + packages: + - gcc-multilib + - gfortran-multilib + env: + - TARGET_BOX=LINUX32 + - BTYPE="BINARY=32" + + - stage: test + addons: + apt: + packages: + - binutils-mingw-w64-x86-64 + - gcc-mingw-w64-x86-64 + - gfortran-mingw-w64-x86-64 + before_script: *common-before + script: + - make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + env: + - TARGET_BOX=WIN64 + - BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" + + # Build & test on Alpine Linux inside chroot, i.e. on system with musl libc. + # These jobs needs sudo, so Travis runs them on VM-based infrastructure + # which is slower than container-based infrastructure used for jobs + # that don't require sudo. + - &test-alpine + stage: test + dist: trusty + sudo: true + language: minimal + before_install: + - "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.6.0/alpine-chroot-install' \ + && echo 'a827a4ba3d0817e7c88bae17fe34e50204983d1e alpine-chroot-install' | sha1sum -c || exit 1" + - alpine() { /alpine/enter-chroot -u "$USER" "$@"; } + install: + - sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers' + before_script: *common-before + script: + - set -e + # XXX: Disable some warnings for now to avoid exceeding Travis limit for log size. + - alpine make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE + CFLAGS="-Wno-misleading-indentation -Wno-sign-conversion -Wno-incompatible-pointer-types" + - alpine make -C test $COMMON_FLAGS $BTYPE + - alpine make -C ctest $COMMON_FLAGS $BTYPE + - alpine make -C utest $COMMON_FLAGS $BTYPE + env: + - TARGET_BOX=LINUX64_MUSL + - BTYPE="BINARY=64" + + # XXX: This job segfaults in TESTS OF THE COMPLEX LEVEL 3 BLAS, + # so it's "allowed to fail" for now (see allow_failures). + - &test-alpine-openmp + <<: *test-alpine + env: + - TARGET_BOX=LINUX64_MUSL + - BTYPE="BINARY=64 USE_OPENMP=1" + + - <<: *test-alpine + env: + - TARGET_BOX=LINUX64_MUSL + - BTYPE="BINARY=64 INTERFACE64=1" + + # Build with the same flags as Alpine do in OpenBLAS package. + - <<: *test-alpine + env: + - TARGET_BOX=LINUX64_MUSL + - BTYPE="BINARY=64 NO_AFFINITY=1 USE_OPENMP=0 NO_LAPACK=0 TARGET=core2" + + allow_failures: + - <<: *test-alpine-openmp + +# whitelist +branches: + only: + - master + - develop notifications: webhooks: @@ -7,32 +122,3 @@ notifications: on_success: change # options: [always|never|change] default: always on_failure: always # options: [always|never|change] default: always on_start: never # options: [always|never|change] default: always - -compiler: - - gcc - -env: - - TARGET_BOX=LINUX64 BTYPE="BINARY=64" - - TARGET_BOX=LINUX64 BTYPE="BINARY=64 USE_OPENMP=1" - - TARGET_BOX=LINUX64 BTYPE="BINARY=64 INTERFACE64=1" - - TARGET_BOX=LINUX32 BTYPE="BINARY=32" - - TARGET_BOX=WIN64 BTYPE="BINARY=64 HOSTCC=gcc CC=x86_64-w64-mingw32-gcc FC=x86_64-w64-mingw32-gfortran" - -before_install: - - sudo apt-get update -qq - - sudo apt-get install -qq gfortran - - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi - - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi - -script: - - set -e - - make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE - - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi - - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi - - if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi - -# whitelist -branches: - only: - - master - - develop \ No newline at end of file diff --git a/appveyor.yml b/appveyor.yml index c9d8e47ac..087b22665 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -12,31 +12,36 @@ clone_folder: c:\projects\OpenBLAS init: - git config --global core.autocrlf input -build: - project: OpenBLAS.sln - clone_depth: 5 -#branches to build -branches: - only: - - master - - develop - - cmake - skip_tags: true matrix: - fast_finish: true + fast_finish: false skip_commits: # Add [av skip] to commit messages message: /\[av skip\]/ +environment: + matrix: + - COMPILER: clang-cl + - COMPILER: cl + +install: + - if [%COMPILER%]==[clang-cl] call C:\Miniconda36-x64\Scripts\activate.bat + - if [%COMPILER%]==[clang-cl] conda config --add channels conda-forge --force + - if [%COMPILER%]==[clang-cl] conda install --yes clangdev ninja cmake + - if [%COMPILER%]==[clang-cl] call "C:\Program Files (x86)\Microsoft Visual Studio 14.0\VC\vcvarsall.bat" amd64 + before_build: - echo Running cmake... - cd c:\projects\OpenBLAS - - cmake -G "Visual Studio 12 Win64" . + - if [%COMPILER%]==[cl] cmake -G "Visual Studio 12 Win64" . + - if [%COMPILER%]==[clang-cl] cmake -G "Ninja" -DCMAKE_CXX_COMPILER=clang-cl -DCMAKE_C_COMPILER=clang-cl . + +build_script: + - cmake --build . test_script: - echo Running Test diff --git a/cmake/c_check.cmake b/cmake/c_check.cmake index fc376c659..2249a873f 100644 --- a/cmake/c_check.cmake +++ b/cmake/c_check.cmake @@ -28,6 +28,8 @@ set(FU "") if(APPLE) set(FU "_") +elseif(MSVC AND ${CMAKE_C_COMPILER_ID} MATCHES "Clang") +set(FU "") elseif(MSVC) set(FU "_") elseif(UNIX) @@ -59,7 +61,8 @@ endif () # CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. # # TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check -set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) +set(ARCH ${CMAKE_SYSTEM_PROCESSOR} CACHE STRING "Target Architecture") + if (${ARCH} STREQUAL "AMD64") set(ARCH "x86_64") endif () diff --git a/cmake/export.cmake b/cmake/export.cmake index 629f8fbc2..a9d1fc458 100644 --- a/cmake/export.cmake +++ b/cmake/export.cmake @@ -51,7 +51,8 @@ else() endif() add_custom_command( - TARGET ${OpenBLAS_LIBNAME} PRE_LINK + OUTPUT ${PROJECT_BINARY_DIR}/openblas.def + #TARGET ${OpenBLAS_LIBNAME} PRE_LINK COMMAND perl ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" COMMENT "Create openblas.def file" diff --git a/cmake/prebuild.cmake b/cmake/prebuild.cmake index 8fbd0419d..2c262b0b6 100644 --- a/cmake/prebuild.cmake +++ b/cmake/prebuild.cmake @@ -66,15 +66,14 @@ set(GETARCH_SRC ${CPUIDEMO} ) -if (NOT MSVC) +if ("${CMAKE_C_COMPILER_ID}" STREQUAL "MSVC") + #Use generic for MSVC now + message("MSVC") + set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) +else() list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) endif () -if (MSVC) -#Use generic for MSVC now -set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) -endif() - if ("${CMAKE_SYSTEM_NAME}" STREQUAL "WindowsStore") # disable WindowsStore strict CRT checks set(GETARCH_FLAGS ${GETARCH_FLAGS} -D_CRT_SECURE_NO_WARNINGS) diff --git a/common.h b/common.h index 4463141c8..ae98279ef 100644 --- a/common.h +++ b/common.h @@ -495,6 +495,33 @@ static void __inline blas_lock(volatile BLASULONG *address){ #define MMAP_POLICY (MAP_PRIVATE | MAP_ANONYMOUS) #endif +#ifndef ASSEMBLER +/* C99 supports complex floating numbers natively, which GCC also offers as an + extension since version 3.0. If neither are available, use a compatible + structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ +#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ + (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER) + #define OPENBLAS_COMPLEX_C99 + #ifndef __cplusplus + #include + #endif + typedef float _Complex openblas_complex_float; + typedef double _Complex openblas_complex_double; + typedef xdouble _Complex openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) + #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) +#else + #define OPENBLAS_COMPLEX_STRUCT + typedef struct { float real, imag; } openblas_complex_float; + typedef struct { double real, imag; } openblas_complex_double; + typedef struct { xdouble real, imag; } openblas_complex_xdouble; + #define openblas_make_complex_float(real, imag) {(real), (imag)} + #define openblas_make_complex_double(real, imag) {(real), (imag)} + #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} +#endif +#endif + #include "param.h" #include "common_param.h" @@ -524,31 +551,6 @@ static void __inline blas_lock(volatile BLASULONG *address){ #include #endif // NOINCLUDE -/* C99 supports complex floating numbers natively, which GCC also offers as an - extension since version 3.0. If neither are available, use a compatible - structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ -#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) - #define OPENBLAS_COMPLEX_C99 - #ifndef __cplusplus - #include - #endif - typedef float _Complex openblas_complex_float; - typedef double _Complex openblas_complex_double; - typedef xdouble _Complex openblas_complex_xdouble; - #define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) - #define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) - #define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) -#else - #define OPENBLAS_COMPLEX_STRUCT - typedef struct { float real, imag; } openblas_complex_float; - typedef struct { double real, imag; } openblas_complex_double; - typedef struct { xdouble real, imag; } openblas_complex_xdouble; - #define openblas_make_complex_float(real, imag) {(real), (imag)} - #define openblas_make_complex_double(real, imag) {(real), (imag)} - #define openblas_make_complex_xdouble(real, imag) {(real), (imag)} -#endif - #ifdef XDOUBLE #define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble #define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) diff --git a/common_param.h b/common_param.h index 36d6149ea..0513ace9f 100644 --- a/common_param.h +++ b/common_param.h @@ -333,8 +333,8 @@ BLASLONG (*icamin_k)(BLASLONG, float *, BLASLONG); float (*cnrm2_k) (BLASLONG, float *, BLASLONG); float (*casum_k) (BLASLONG, float *, BLASLONG); int (*ccopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); - float _Complex (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); - float _Complex (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); + openblas_complex_float (*cdotu_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); + openblas_complex_float (*cdotc_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*csrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); int (*caxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); @@ -496,8 +496,8 @@ BLASLONG (*izamin_k)(BLASLONG, double *, BLASLONG); double (*znrm2_k) (BLASLONG, double *, BLASLONG); double (*zasum_k) (BLASLONG, double *, BLASLONG); int (*zcopy_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); - double _Complex (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); - double _Complex (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); + openblas_complex_double (*zdotu_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); + openblas_complex_double (*zdotc_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG); int (*zdrot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); int (*zaxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); @@ -661,8 +661,8 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); xdouble (*xnrm2_k) (BLASLONG, xdouble *, BLASLONG); xdouble (*xasum_k) (BLASLONG, xdouble *, BLASLONG); int (*xcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); - xdouble _Complex (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); - xdouble _Complex (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); + openblas_complex_xdouble (*xdotu_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); + openblas_complex_xdouble (*xdotc_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); int (*xqrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble); int (*xaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c index e86b565f8..9d374676e 100644 --- a/driver/level2/gbmv_thread.c +++ b/driver/level2/gbmv_thread.c @@ -230,8 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT #ifndef TRANSA range_m[num_cpu] = num_cpu * ((m + 15) & ~15); + if (range_m[num_cpu] > m) range_m[num_cpu] = m; #else range_m[num_cpu] = num_cpu * ((n + 15) & ~15); + if (range_m[num_cpu] > n) range_m[num_cpu] = n; #endif queue[num_cpu].mode = mode; diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c index 5718c0ec9..ce841ee0e 100644 --- a/driver/level2/sbmv_thread.c +++ b/driver/level2/sbmv_thread.c @@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; @@ -316,6 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * ((n + 15) & ~15); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = sbmv_kernel; diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c index 035300841..0b4087430 100644 --- a/driver/level2/spmv_thread.c +++ b/driver/level2/spmv_thread.c @@ -246,6 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; @@ -285,6 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = spmv_kernel; diff --git a/driver/level2/symv_thread.c b/driver/level2/symv_thread.c index 6580178f1..8d4cd249c 100644 --- a/driver/level2/symv_thread.c +++ b/driver/level2/symv_thread.c @@ -177,7 +177,8 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - + if (range_n[num_cpu] > m) range_n[num_cpu] = m; + queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode; queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel; queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args; @@ -225,6 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = symv_kernel; diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c index 226a922e9..aaf4958e2 100644 --- a/driver/level2/tbmv_thread.c +++ b/driver/level2/tbmv_thread.c @@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); + if (range_n[num_cpu] > n) range_n[num_cpu] = n; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c index c91b52775..79438ba29 100644 --- a/driver/level2/tpmv_thread.c +++ b/driver/level2/tpmv_thread.c @@ -307,7 +307,8 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); - + if (range_n[num_cpu] > m) range_n[num_cpu] = m; + queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel; queue[num_cpu].args = &args; @@ -346,6 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = tpmv_kernel; diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c index 0a155366c..24b881a93 100644 --- a/driver/level2/trmv_thread.c +++ b/driver/level2/trmv_thread.c @@ -346,6 +346,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; @@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu range_m[num_cpu + 1] = range_m[num_cpu] + width; range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; queue[num_cpu].mode = mode; queue[num_cpu].routine = trmv_kernel; diff --git a/driver/others/memory.c b/driver/others/memory.c index b5b58b6fd..661f7c4eb 100644 --- a/driver/others/memory.c +++ b/driver/others/memory.c @@ -155,7 +155,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifdef DYNAMIC_ARCH gotoblas_t *gotoblas = NULL; #endif - extern void openblas_warning(int verbose, const char * msg); #ifndef SMP @@ -187,25 +186,24 @@ int i,n; #if !defined(__GLIBC_PREREQ) return nums; -#endif -#if !__GLIBC_PREREQ(2, 3) +#else + #if !__GLIBC_PREREQ(2, 3) return nums; -#endif + #endif -#if !__GLIBC_PREREQ(2, 7) + #if !__GLIBC_PREREQ(2, 7) ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); if (ret!=0) return nums; n=0; -#if !__GLIBC_PREREQ(2, 6) + #if !__GLIBC_PREREQ(2, 6) for (i=0;i= 199901L || \ - (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) + (__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) && !defined(_MSC_VER) #define OPENBLAS_COMPLEX_C99 #ifndef __cplusplus #include diff --git a/utest/CMakeLists.txt b/utest/CMakeLists.txt index e52fb2c90..a7f3871c3 100644 --- a/utest/CMakeLists.txt +++ b/utest/CMakeLists.txt @@ -1,10 +1,14 @@ include_directories(${PROJECT_SOURCE_DIR}) include_directories(${PROJECT_BINARY_DIR}) -set(OpenBLAS_utest_src - utest_main.c - test_amax.c +if (MSVC AND "${CMAKE_C_COMPILER_ID}" MATCHES Clang) + set(OpenBLAS_utest_src utest_main2.c) +else () + set(OpenBLAS_utest_src + utest_main.c + test_amax.c ) +endif () if (NOT NO_LAPACK) set(OpenBLAS_utest_src @@ -36,7 +40,7 @@ endforeach() if (MSVC) add_custom_command(TARGET ${OpenBLAS_utest_bin} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/lib/$/${OpenBLAS_LIBNAME}.dll ${CMAKE_CURRENT_BINARY_DIR}/. + COMMAND ${CMAKE_COMMAND} -E copy ${PROJECT_BINARY_DIR}/lib/${CMAKE_CFG_INTDIR}/${OpenBLAS_LIBNAME}.dll ${CMAKE_CURRENT_BINARY_DIR}/. ) endif() diff --git a/utest/utest_main2.c b/utest/utest_main2.c new file mode 100644 index 000000000..565872b16 --- /dev/null +++ b/utest/utest_main2.c @@ -0,0 +1,61 @@ +/***************************************************************************** +Copyright (c) 2011-2016, The OpenBLAS Project +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + 3. Neither the name of the OpenBLAS project nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE +USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +**********************************************************************************/ + +#include + +#define CTEST_MAIN +#define CTEST_SEGFAULT +#define CTEST_ADD_TESTS_MANUALLY + +#include "openblas_utest.h" + +CTEST(amax, samax){ + blasint N=3, inc=1; + float te_max=0.0, tr_max=0.0; + float x[]={-1.1, 2.2, -3.3}; + te_max=BLASFUNC(samax)(&N, x, &inc); + tr_max=3.3; + + ASSERT_DBL_NEAR_TOL((double)(tr_max), (double)(te_max), SINGLE_EPS); +} + +int main(int argc, const char ** argv){ + + CTEST_ADD(amax, samax); + int num_fail=0; + + num_fail=ctest_main(argc, argv); + + return num_fail; +} +