Browse Source

Change "HALF" and "sh" to "BFLOAT16" and "sb"

tags/v0.3.11^2
Martin Kroeker GitHub 5 years ago
parent
commit
2c552f1074
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 48 additions and 36 deletions
  1. +16
    -20
      CMakeLists.txt
  2. +25
    -9
      Makefile.rule
  3. +5
    -5
      Makefile.system
  4. +2
    -2
      Makefile.tail

+ 16
- 20
CMakeLists.txt View File

@@ -29,10 +29,8 @@ option(NO_AFFINITY "Disable support for CPU affinity masks to avoid binding proc
else() else()
set(NO_AFFINITY 1) set(NO_AFFINITY 1)
endif() endif()
option(BUILD_SINGLE "Single precision" OFF)
option(BUILD_DOUBLE "Double precision" OFF)
option(BUILD_COMPLEX "Single precision" OFF)
option(BUILD_COMPLEX16 "Single precision" OFF)
option(CPP_THREAD_SAFETY_TEST "Run a massively parallel DGEMM test to confirm thread safety of the library (requires OpenMP and about 1.3GB of RAM)" OFF)
option(CPP_THREAD_SAFETY_GEMV "Run a massively parallel DGEMV test to confirm thread safety of the library (requires OpenMP)" OFF)


# Add a prefix or suffix to all exported symbol names in the shared library. # Add a prefix or suffix to all exported symbol names in the shared library.
# Avoids conflicts with other BLAS libraries, especially when using # Avoids conflicts with other BLAS libraries, especially when using
@@ -91,13 +89,13 @@ if (NOT NO_LAPACK)
list(APPEND SUBDIRS lapack) list(APPEND SUBDIRS lapack)
endif () endif ()


if (NOT DEFINED BUILD_HALF)
set (BUILD_HALF false)
if (NOT DEFINED BUILD_BFLOAT16)
set (BUILD_BFLOAT16 false)
endif () endif ()
# set which float types we want to build for # set which float types we want to build for
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16)
# if none are defined, build for all # if none are defined, build for all
# set(BUILD_HALF true)
# set(BUILD_BFLOAT16 true)
set(BUILD_SINGLE true) set(BUILD_SINGLE true)
set(BUILD_DOUBLE true) set(BUILD_DOUBLE true)
set(BUILD_COMPLEX true) set(BUILD_COMPLEX true)
@@ -110,33 +108,28 @@ endif()


set(FLOAT_TYPES "") set(FLOAT_TYPES "")
if (BUILD_SINGLE) if (BUILD_SINGLE)
message(STATUS "Building Songle Precision")
list(APPEND FLOAT_TYPES "SINGLE")
# set(CCOMMON_OPT "${CCOMMON_OPT} -DBUILD_SINGLE=1")
message(STATUS "Building Single Precision")
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing
endif () endif ()


if (BUILD_DOUBLE) if (BUILD_DOUBLE)
message(STATUS "Building Double Precision") message(STATUS "Building Double Precision")
list(APPEND FLOAT_TYPES "DOUBLE")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_DOUBLE=1")
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE
endif () endif ()


if (BUILD_COMPLEX) if (BUILD_COMPLEX)
message(STATUS "Building Complex Precision") message(STATUS "Building Complex Precision")
list(APPEND FLOAT_TYPES "COMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX=1")
endif ()
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX
endif ()


if (BUILD_COMPLEX16) if (BUILD_COMPLEX16)
message(STATUS "Building Double Complex Precision") message(STATUS "Building Double Complex Precision")
list(APPEND FLOAT_TYPES "ZCOMPLEX")
#set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_COMPLEX16=1")
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE
endif () endif ()


if (BUILD_HALF)
if (BUILD_BFLOAT16)
message(STATUS "Building Half Precision") message(STATUS "Building Half Precision")
list(APPEND FLOAT_TYPES "HALF")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DBUILD_HALF")
list(APPEND FLOAT_TYPES "BFLOAT16") # defines nothing
endif () endif ()


if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN")
@@ -243,6 +236,9 @@ if (NOT MSVC AND NOT NOFORTRAN)
add_subdirectory(ctest) add_subdirectory(ctest)
endif() endif()
add_subdirectory(lapack-netlib/TESTING) add_subdirectory(lapack-netlib/TESTING)
if (CPP_THREAD_SAFETY_TEST OR CPP_THREAD_SAFETY_GEMV)
add_subdirectory(cpp_thread_test)
endif()
endif() endif()


set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES


+ 25
- 9
Makefile.rule View File

@@ -272,17 +272,33 @@ COMMON_PROF = -pg
# work at all. # work at all.
# #
# CPP_THREAD_SAFETY_TEST = 1 # CPP_THREAD_SAFETY_TEST = 1
#
# use this to run only the less memory-hungry GEMV test
# CPP_THREAD_SAFETY_GEMV = 1




# If you want to enable the experimental BFLOAT16 support # If you want to enable the experimental BFLOAT16 support
# BUILD_HALF = 1
#
# Select if you need to build only select types
# BUILD_SINGLE = 1
# BUILD_DOUBLE = 1
# BUILD_COMPLEX = 1
# BUILD_COMPLEX16 = 1
#
#
# BUILD_BFLOAT16 = 1


# Set the thread number threshold beyond which the job array for the threaded level3 BLAS
# will be allocated on the heap rather than the stack. (This array alone requires
# NUM_THREADS*NUM_THREADS*128 bytes of memory so should not pose a problem at low cpu
# counts, but obviously it is not the only item that ends up on the stack.
# The default value of 32 ensures that the overall requirement is compatible
# with the default 1MB stacksize imposed by having the Java VM loaded without use
# of its -Xss parameter.
# The value of 160 formerly used from about version 0.2.7 until 0.3.10 is easily compatible
# with the common Linux stacksize of 8MB but will cause crashes with unwary use of the java
# VM e.g. in Octave or with the java-based libhdfs in numpy or scipy code
# BLAS3_MEM_ALLOC_THRESHOLD = 160



# the below is not yet configurable, use cmake if you need to build only select types
BUILD_SINGLE = 1
BUILD_DOUBLE = 1
BUILD_COMPLEX = 1
BUILD_COMPLEX16 = 1
# End of user configuration # End of user configuration
# #

+ 5
- 5
Makefile.system View File

@@ -1232,8 +1232,8 @@ ifeq ($(USE_TLS), 1)
CCOMMON_OPT += -DUSE_TLS CCOMMON_OPT += -DUSE_TLS
endif endif


ifeq ($(BUILD_HALF), 1)
CCOMMON_OPT += -DBUILD_HALF
ifeq ($(BUILD_BFLOAT16), 1)
CCOMMON_OPT += -DBUILD_BFLOAT16
endif endif
ifeq ($(BUILD_SINGLE), 1) ifeq ($(BUILD_SINGLE), 1)
CCOMMON_OPT += -DBUILD_SINGLE=1 CCOMMON_OPT += -DBUILD_SINGLE=1
@@ -1521,10 +1521,10 @@ export KERNELDIR
export FUNCTION_PROFILE export FUNCTION_PROFILE
export TARGET_CORE export TARGET_CORE
export NO_AVX512 export NO_AVX512
export BUILD_HALF
export BUILD_BFLOAT16


export SHGEMM_UNROLL_M
export SHGEMM_UNROLL_N
export SBGEMM_UNROLL_M
export SBGEMM_UNROLL_N
export SGEMM_UNROLL_M export SGEMM_UNROLL_M
export SGEMM_UNROLL_N export SGEMM_UNROLL_N
export DGEMM_UNROLL_M export DGEMM_UNROLL_M


+ 2
- 2
Makefile.tail View File

@@ -24,14 +24,14 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS)
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P)
endif endif


$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
$(SHBLASOBJS) $(SHBLASOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX $(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX $(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX $(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX $(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX $(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX $(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DHALF -UDOUBLE -UCOMPLEX
$(SHEXTOBJS) $(SHEXTOBJS_P) : override CFLAGS += -DBFLOAT16 -UDOUBLE -UCOMPLEX


$(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(SHBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) $(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF)


Loading…
Cancel
Save