| @@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc | |||||
| else() | else() | ||||
| set(NO_AFFINITY 1) | set(NO_AFFINITY 1) | ||||
| endif() | endif() | ||||
| option(BUILD_SINGLE "Single precision" OFF) | |||||
| option(BUILD_DOUBLE "Double precision" OFF) | |||||
| option(BUILD_COMPLEX "Single precision" OFF) | |||||
| option(BUILD_COMPLEX16 "Single precision" OFF) | |||||
| option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF) | |||||
| option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF) | |||||
| # Add a prefix or suffix to all exported symbol names in the shared library. | # Add a prefix or suffix to all exported symbol names in the shared library. | ||||
| # Avoids conflicts with other BLAS libraries, especially when using | # Avoids conflicts with other BLAS libraries, especially when using | ||||
| @@ -91,13 +89,13 @@ if (NOT NO_LAPACK) | |||||
| list(APPEND SUBDIRS lapack) | list(APPEND SUBDIRS lapack) | ||||
| endif () | endif () | ||||
| if (NOT DEFINED BUILD_HALF) | |||||
| set (BUILD_HALF false) | |||||
| if (NOT DEFINED BUILD_BFLOAT16) | |||||
| set (BUILD_BFLOAT16 false) | |||||
| endif () | endif () | ||||
| # set which float types we want to build for | # set which float types we want to build for | ||||
| if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | ||||
| # if none are defined, build for all | # if none are defined, build for all | ||||
| # set(BUILD_HALF true) | |||||
| # set(BUILD_BFLOAT16 true) | |||||
| set(BUILD_SINGLE true) | set(BUILD_SINGLE true) | ||||
| set(BUILD_DOUBLE true) | set(BUILD_DOUBLE true) | ||||
| set(BUILD_COMPLEX true) | set(BUILD_COMPLEX true) | ||||
| @@ -110,33 +108,28 @@ endif() | |||||
| set(FLOAT_TYPES "") | set(FLOAT_TYPES "") | ||||
| if (BUILD_SINGLE) | if (BUILD_SINGLE) | ||||
| message(STATUS "Building Songle Precision") | |||||
| list(APPEND FLOAT_TYPES "SINGLE") | |||||
| # set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1") | |||||
| message(STATUS "Building Single Precision") | |||||
| list(APPEND FLOAT_TYPES "SINGLE") # defines nothing | |||||
| endif () | endif () | ||||
| if (BUILD_DOUBLE) | if (BUILD_DOUBLE) | ||||
| message(STATUS "Building Double Precision") | message(STATUS "Building Double Precision") | ||||
| list(APPEND FLOAT_TYPES "DOUBLE") | |||||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1") | |||||
| list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE | |||||
| endif () | endif () | ||||
| if (BUILD_COMPLEX) | if (BUILD_COMPLEX) | ||||
| message(STATUS "Building Complex Precision") | message(STATUS "Building Complex Precision") | ||||
| list(APPEND FLOAT_TYPES "COMPLEX") | |||||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1") | |||||
| endif () | |||||
| list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX | |||||
| endif () | |||||
| if (BUILD_COMPLEX16) | if (BUILD_COMPLEX16) | ||||
| message(STATUS "Building Double Complex Precision") | message(STATUS "Building Double Complex Precision") | ||||
| list(APPEND FLOAT_TYPES "ZCOMPLEX") | |||||
| #set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1") | |||||
| list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | |||||
| endif () | endif () | ||||
| if (BUILD_HALF) | |||||
| if (BUILD_BFLOAT16) | |||||
| message(STATUS "Building Half Precision") | message(STATUS "Building Half Precision") | ||||
| list(APPEND FLOAT_TYPES "HALF") | |||||
| set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF") | |||||
| list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing | |||||
| endif () | endif () | ||||
| if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | ||||
| @@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN) | |||||
| add_subdirectory(ctest) | add_subdirectory(ctest) | ||||
| endif() | endif() | ||||
| add_subdirectory(lapack-netlib/TESTING) | add_subdirectory(lapack-netlib/TESTING) | ||||
| if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV) | |||||
| add_subdirectory(cpp_thread_test) | |||||
| endif() | |||||
| endif() | endif() | ||||
| set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | ||||
| @@ -272,17 +272,33 @@ COMMON_PROF = -pg | |||||
| # work at all. | # work at all. | ||||
| # | # | ||||
| # CPP_THREAD_SAFETY_TEST = 1 | # CPP_THREAD_SAFETY_TEST = 1 | ||||
| # | |||||
| # use this to run only the less memory-hungry GEMV test | |||||
| # CPP_THREAD_SAFETY_GEMV = 1 | |||||
| # If you want to enable the experimental BFLOAT16 support | # If you want to enable the experimental BFLOAT16 support | ||||
| # BUILD_HALF = 1 | |||||
| # | |||||
| # Select if you need to build only select types | |||||
| # BUILD_SINGLE = 1 | |||||
| # BUILD_DOUBLE = 1 | |||||
| # BUILD_COMPLEX = 1 | |||||
| # BUILD_COMPLEX16 = 1 | |||||
| # | |||||
| # | |||||
| # BUILD_BFLOAT16 = 1 | |||||
| # Set the thread number threshold beyond which the job array for the threaded level3 BLAS | |||||
| # will be allocated on the heap rather than the stack. (This array alone requires | |||||
| # NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu | |||||
| # counts, but obviously it is not the only item that ends up on the stack. | |||||
| # The default value of 32 ensures that the overall requirement is compatible | |||||
| # with the default 1MB stacksize imposed by having the Java VM loaded without use | |||||
| # of its -Xss parameter. | |||||
| # The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible | |||||
| # with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java | |||||
| # VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code | |||||
| # BLAS3_MEM_ALLOC_THRESHOLD = 160 | |||||
| # the below is not yet configurable, use cmake if you need to build only select types | |||||
| BUILD_SINGLE = 1 | |||||
| BUILD_DOUBLE = 1 | |||||
| BUILD_COMPLEX = 1 | |||||
| BUILD_COMPLEX16 = 1 | |||||
| # End of user configuration | # End of user configuration | ||||
| # | # | ||||
| @@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1) | |||||
| CCOMMON_OPT += -DUSE_TLS | CCOMMON_OPT += -DUSE_TLS | ||||
| endif | endif | ||||
| ifeq ($(BUILD_HALF), 1) | |||||
| CCOMMON_OPT += -DBUILD_HALF | |||||
| ifeq ($(BUILD_BFLOAT16), 1) | |||||
| CCOMMON_OPT += -DBUILD_BFLOAT16 | |||||
| endif | endif | ||||
| ifeq ($(BUILD_SINGLE), 1) | ifeq ($(BUILD_SINGLE), 1) | ||||
| CCOMMON_OPT += -DBUILD_SINGLE=1 | CCOMMON_OPT += -DBUILD_SINGLE=1 | ||||
| @@ -1521,10 +1521,10 @@ export KERNELDIR | |||||
| export FUNCTION_PROFILE | export FUNCTION_PROFILE | ||||
| export TARGET_CORE | export TARGET_CORE | ||||
| export NO_AVX512 | export NO_AVX512 | ||||
| export BUILD_HALF | |||||
| export BUILD_BFLOAT16 | |||||
| export SHGEMM_UNROLL_M | |||||
| export SHGEMM_UNROLL_N | |||||
| export SBGEMM_UNROLL_M | |||||
| export SBGEMM_UNROLL_N | |||||
| export SGEMM_UNROLL_M | export SGEMM_UNROLL_M | ||||
| export SGEMM_UNROLL_N | export SGEMM_UNROLL_N | ||||
| export DGEMM_UNROLL_M | export DGEMM_UNROLL_M | ||||
| @@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||||
| BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | ||||
| endif | endif | ||||
| $(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX | |||||
| $(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | ||||
| $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | ||||
| $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | ||||
| $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | ||||
| $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | ||||
| $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | ||||
| $(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX | |||||
| $(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX | |||||
| $(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||
| $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | ||||