@@ -15,6 +15,7 @@ lapack-netlib/make.inc | |||||
lapack-netlib/lapacke/include/lapacke_mangling.h | lapack-netlib/lapacke/include/lapacke_mangling.h | ||||
lapack-netlib/TESTING/testing_results.txt | lapack-netlib/TESTING/testing_results.txt | ||||
*.so | *.so | ||||
*.so.* | |||||
*.a | *.a | ||||
.svn | .svn | ||||
*~ | *~ | ||||
@@ -65,3 +66,5 @@ test/sblat3 | |||||
test/zblat1 | test/zblat1 | ||||
test/zblat2 | test/zblat2 | ||||
test/zblat3 | test/zblat3 | ||||
build | |||||
build.* |
@@ -1,4 +1,13 @@ | |||||
language: c | language: c | ||||
notifications: | |||||
webhooks: | |||||
urls: | |||||
- https://webhooks.gitter.im/e/8a6e4470a0cebd090344 | |||||
on_success: change # options: [always|never|change] default: always | |||||
on_failure: always # options: [always|never|change] default: always | |||||
on_start: never # options: [always|never|change] default: always | |||||
compiler: | compiler: | ||||
- gcc | - gcc | ||||
@@ -0,0 +1,190 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## | |||||
cmake_minimum_required(VERSION 2.8.4) | |||||
project(OpenBLAS) | |||||
set(OpenBLAS_MAJOR_VERSION 0) | |||||
set(OpenBLAS_MINOR_VERSION 2) | |||||
set(OpenBLAS_PATCH_VERSION 14) | |||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | |||||
enable_language(ASM) | |||||
enable_language(C) | |||||
if(MSVC) | |||||
set(OpenBLAS_LIBNAME libopenblas) | |||||
else() | |||||
set(OpenBLAS_LIBNAME openblas) | |||||
endif() | |||||
####### | |||||
if(MSVC) | |||||
option(BUILD_WITHOUT_LAPACK "Without LAPACK and LAPACKE (Only BLAS or CBLAS)" ON) | |||||
endif() | |||||
option(BUILD_WITHOUT_CBLAS "Without CBLAS" OFF) | |||||
option(BUILD_DEBUG "Build Debug Version" OFF) | |||||
####### | |||||
if(BUILD_WITHOUT_LAPACK) | |||||
set(NO_LAPACK 1) | |||||
set(NO_LAPACKE 1) | |||||
endif() | |||||
if(BUILD_DEBUG) | |||||
set(CMAKE_BUILD_TYPE Debug) | |||||
else() | |||||
set(CMAKE_BUILD_TYPE Release) | |||||
endif() | |||||
if(BUILD_WITHOUT_CBLAS) | |||||
set(NO_CBLAS 1) | |||||
endif() | |||||
####### | |||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") | |||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") | |||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") | |||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others) | |||||
if (NOT DYNAMIC_ARCH) | |||||
list(APPEND BLASDIRS kernel) | |||||
endif () | |||||
if (DEFINED UTEST_CHECK) | |||||
set(SANITY_CHECK 1) | |||||
endif () | |||||
if (DEFINED SANITY_CHECK) | |||||
list(APPEND BLASDIRS reference) | |||||
endif () | |||||
set(SUBDIRS ${BLASDIRS}) | |||||
if (NOT NO_LAPACK) | |||||
list(APPEND SUBDIRS lapack) | |||||
endif () | |||||
# set which float types we want to build for | |||||
if (NOT DEFINED BUILD_SINGLE AND NOT DEFINED BUILD_DOUBLE AND NOT DEFINED BUILD_COMPLEX AND NOT DEFINED BUILD_COMPLEX16) | |||||
# if none are defined, build for all | |||||
set(BUILD_SINGLE true) | |||||
set(BUILD_DOUBLE true) | |||||
set(BUILD_COMPLEX true) | |||||
set(BUILD_COMPLEX16 true) | |||||
endif () | |||||
set(FLOAT_TYPES "") | |||||
if (BUILD_SINGLE) | |||||
message(STATUS "Building Single Precision") | |||||
list(APPEND FLOAT_TYPES "SINGLE") # defines nothing | |||||
endif () | |||||
if (BUILD_DOUBLE) | |||||
message(STATUS "Building Double Precision") | |||||
list(APPEND FLOAT_TYPES "DOUBLE") # defines DOUBLE | |||||
endif () | |||||
if (BUILD_COMPLEX) | |||||
message(STATUS "Building Complex Precision") | |||||
list(APPEND FLOAT_TYPES "COMPLEX") # defines COMPLEX | |||||
endif () | |||||
if (BUILD_COMPLEX16) | |||||
message(STATUS "Building Double Complex Precision") | |||||
list(APPEND FLOAT_TYPES "ZCOMPLEX") # defines COMPLEX and DOUBLE | |||||
endif () | |||||
set(SUBDIRS_ALL ${SUBDIRS} test ctest utest exports benchmark ../laswp ../bench) | |||||
# all :: libs netlib tests shared | |||||
# libs : | |||||
if (NOT DEFINED CORE OR "${CORE}" STREQUAL "UNKNOWN") | |||||
message(FATAL_ERROR "Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for details.") | |||||
endif () | |||||
if (${NO_STATIC} AND ${NO_SHARED}) | |||||
message(FATAL_ERROR "Neither static nor shared are enabled.") | |||||
endif () | |||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | |||||
set(TARGET_OBJS "") | |||||
foreach (SUBDIR ${SUBDIRS}) | |||||
add_subdirectory(${SUBDIR}) | |||||
string(REPLACE "/" "_" subdir_obj ${SUBDIR}) | |||||
list(APPEND TARGET_OBJS "$<TARGET_OBJECTS:${subdir_obj}>") | |||||
endforeach () | |||||
# netlib: | |||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | |||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | |||||
if (NOT NOFORTRAN AND NOT NO_LAPACK) | |||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") | |||||
if (NOT NO_LAPACKE) | |||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") | |||||
endif () | |||||
endif () | |||||
#Only generate .def for dll on MSVC | |||||
if(MSVC) | |||||
set(OpenBLAS_DEF_FILE "${PROJECT_BINARY_DIR}/openblas.def") | |||||
endif() | |||||
# add objects to the openblas lib | |||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | |||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | |||||
if(NOT MSVC) | |||||
#only build shared library for MSVC | |||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) | |||||
if(SMP) | |||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread) | |||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) | |||||
endif() | |||||
#build test and ctest | |||||
enable_testing() | |||||
add_subdirectory(test) | |||||
if(NOT NO_CBLAS) | |||||
add_subdirectory(ctest) | |||||
endif() | |||||
endif() | |||||
set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | |||||
VERSION ${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION} | |||||
SOVERSION ${OpenBLAS_MAJOR_VERSION} | |||||
) | |||||
# TODO: Why is the config saved here? Is this necessary with CMake? | |||||
#Save the config files for installation | |||||
# @cp Makefile.conf Makefile.conf_last | |||||
# @cp config.h config_last.h | |||||
#ifdef QUAD_PRECISION | |||||
# @echo "#define QUAD_PRECISION">> config_last.h | |||||
#endif | |||||
#ifeq ($(EXPRECISION), 1) | |||||
# @echo "#define EXPRECISION">> config_last.h | |||||
#endif | |||||
### | |||||
#ifeq ($(DYNAMIC_ARCH), 1) | |||||
# @$(MAKE) -C kernel commonlibs || exit 1 | |||||
# @for d in $(DYNAMIC_CORE) ; \ | |||||
# do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||||
# done | |||||
# @echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||||
#endif | |||||
#ifdef USE_THREAD | |||||
# @echo USE_THREAD=$(USE_THREAD) >> Makefile.conf_last | |||||
#endif | |||||
# @touch lib.grd | |||||
@@ -127,5 +127,8 @@ In chronological order: | |||||
* Ton van den Heuvel <https://github.com/ton> | * Ton van den Heuvel <https://github.com/ton> | ||||
* [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity(). | * [2015-03-18] Fix race condition during shutdown causing a crash in gotoblas_set_affinity(). | ||||
* Martin Koehler <https://github.com/grisuthedragon/> | |||||
* [2015-09-07] Improved imatcopy | |||||
* [Your name or handle] <[email or website]> | * [Your name or handle] <[email or website]> | ||||
* [Date] [Brief summary of your changes] | * [Date] [Brief summary of your changes] |
@@ -1,4 +1,57 @@ | |||||
OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
==================================================================== | |||||
Version 0.2.15 | |||||
27-Oct-2015 | |||||
common: | |||||
* Support cmake on x86/x86-64. Natively compiling on MS Visual Studio. | |||||
(experimental. Thank Hank Anderson for the initial cmake porting work.) | |||||
On Linux and Mac OSX, OpenBLAS cmake supports assembly kernels. | |||||
e.g. cmake . | |||||
make | |||||
make test (Optional) | |||||
On Windows MS Visual Studio, OpenBLAS cmake only support C kernels. | |||||
(OpenBLAS uses AT&T style assembly, which is not supported by MSVC.) | |||||
e.g. cmake -G "Visual Studio 12 Win64" . | |||||
Open OpenBLAS.sln and build. | |||||
* Enable MAX_STACK_ALLOC flags by default. | |||||
Improve ger and gemv for small matrices. | |||||
* Improve gemv parallel with small m and large n case. | |||||
* Improve ?imatcopy when lda==ldb (#633. Thanks, Martin Koehler) | |||||
* Add vecLib benchmarks (#565. Thanks, Andreas Noack.) | |||||
* Fix LAPACK lantr for row major matrices (#634. Thanks, Dan Kortschak) | |||||
* Fix LAPACKE lansy (#640. Thanks, Dan Kortschak) | |||||
* Import bug fixes for LAPACKE s/dormlq, c/zunmlq | |||||
* Raise the signal when pthread_create fails (#668. Thanks, James K. Lowden) | |||||
* Remove g77 from compiler list. | |||||
* Enable AppVeyor Windows CI. | |||||
x86/x86-64: | |||||
* Support pure C generic kernels for x86/x86-64. | |||||
* Support Intel Boardwell and Skylake by Haswell kernels. | |||||
* Support AMD Excavator by Steamroller kernels. | |||||
* Optimize s/d/c/zdot for Intel SandyBridge and Haswell. | |||||
* Optimize s/d/c/zdot for AMD Piledriver and Steamroller. | |||||
* Optimize s/d/c/zapxy for Intel SandyBridge and Haswell. | |||||
* Optimize s/d/c/zapxy for AMD Piledriver and Steamroller. | |||||
* Optimize d/c/zscal for Intel Haswell, dscal for Intel SandyBridge. | |||||
* Optimize d/c/zscal for AMD Bulldozer, Piledriver and Steamroller. | |||||
* Optimize s/dger for Intel SandyBridge. | |||||
* Optimize s/dsymv for Intel SandyBridge. | |||||
* Optimize ssymv for Intel Haswell. | |||||
* Optimize dgemv for Intel Nehalem and Haswell. | |||||
* Optimize dtrmm for Intel Haswell. | |||||
ARM: | |||||
* Support Android NDK armeabi-v7a-hard ABI (-mfloat-abi=hard) | |||||
e.g. make HOSTCC=gcc CC=arm-linux-androideabi-gcc NO_LAPACK=1 TARGET=ARMV7 | |||||
* Fix lock, rpcc bugs (#616, #617. Thanks, Grazvydas Ignotas) | |||||
POWER: | |||||
* Support ppc64le platform (ELF ABI v2. #612. Thanks, Matthew Brandyberry.) | |||||
* Support POWER7/8 by POWER6 kernels. (#612. Thanks, Fábio Perez.) | |||||
==================================================================== | ==================================================================== | ||||
Version 0.2.14 | Version 0.2.14 | ||||
24-Mar-2015 | 24-Mar-2015 | ||||
@@ -20,6 +20,8 @@ ifneq ($(NO_LAPACK), 1) | |||||
SUBDIRS += lapack | SUBDIRS += lapack | ||||
endif | endif | ||||
LAPACK_NOOPT := $(filter-out -O0 -O1 -O2 -O3 -Ofast,$(LAPACK_FFLAGS)) | |||||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench | SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench | ||||
.PHONY : all libs netlib test ctest shared install | .PHONY : all libs netlib test ctest shared install | ||||
@@ -131,7 +133,7 @@ ifeq ($(CORE), UNKOWN) | |||||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | $(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | ||||
endif | endif | ||||
ifeq ($(NOFORTRAN), 1) | ifeq ($(NOFORTRAN), 1) | ||||
$(error OpenBLAS: Detecting fortran compiler failed. Please install fortran compiler, e.g. gfortran, ifort, openf90.) | |||||
$(info OpenBLAS: Detecting fortran compiler failed. Cannot compile LAPACK. Only compile BLAS.) | |||||
endif | endif | ||||
ifeq ($(NO_STATIC), 1) | ifeq ($(NO_STATIC), 1) | ||||
ifeq ($(NO_SHARED), 1) | ifeq ($(NO_SHARED), 1) | ||||
@@ -231,7 +233,7 @@ ifndef NOFORTRAN | |||||
-@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | -@echo "FORTRAN = $(FC)" > $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "OPTS = $(LAPACK_FFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "NOOPT = $(LAPACK_FFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
-@echo "NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADOPTS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
@@ -1,13 +1,23 @@ | |||||
# ifeq logical or | # ifeq logical or | ||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ||||
ifeq ($(OSNAME), Android) | |||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
else | |||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
endif | endif | ||||
endif | |||||
ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
ifeq ($(OSNAME), Android) | |||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
else | |||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
endif | endif | ||||
endif | |||||
ifeq ($(CORE), ARMV6) | ifeq ($(CORE), ARMV6) | ||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | ||||
@@ -16,8 +26,8 @@ endif | |||||
ifeq ($(CORE), ARMV5) | ifeq ($(CORE), ARMV5) | ||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
CCOMMON_OPT += -marm -march=armv5 | |||||
FCOMMON_OPT += -marm -march=armv5 | |||||
endif | endif | ||||
@@ -11,6 +11,7 @@ OPENBLAS_BINARY_DIR := $(PREFIX)/bin | |||||
OPENBLAS_BUILD_DIR := $(CURDIR) | OPENBLAS_BUILD_DIR := $(CURDIR) | ||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas | OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas | ||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | ||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | |||||
.PHONY : install | .PHONY : install | ||||
.NOTPARALLEL : install | .NOTPARALLEL : install | ||||
@@ -86,8 +87,8 @@ ifeq ($(OSNAME), Darwin) | |||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | ||||
endif | endif | ||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) | |||||
@-cp $(LIBDLLNAME).a $(OPENBLAS_LIBRARY_DIR) | |||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
endif | endif | ||||
ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) | @-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) | ||||
@@ -97,6 +98,7 @@ endif | |||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | ||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | @echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | ||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | @echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | ||||
ifndef NO_SHARED | ifndef NO_SHARED | ||||
#ifeq logical or | #ifeq logical or | ||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) | ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) | ||||
@@ -112,5 +114,16 @@ else | |||||
#only static | #only static | ||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | @echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | ||||
endif | endif | ||||
#Generating OpenBLASConfigVersion.cmake | |||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo Install OK! | @echo Install OK! | ||||
@@ -3,7 +3,7 @@ | |||||
# | # | ||||
# This library's version | # This library's version | ||||
VERSION = 0.2.14 | |||||
VERSION = 0.2.15 | |||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
@@ -162,13 +162,16 @@ COMMON_PROF = -pg | |||||
# Improve GEMV and GER for small matrices by stack allocation. | # Improve GEMV and GER for small matrices by stack allocation. | ||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482 | # For details, https://github.com/xianyi/OpenBLAS/pull/482 | ||||
# | # | ||||
# MAX_STACK_ALLOC=2048 | |||||
MAX_STACK_ALLOC=2048 | |||||
# Add a prefix or suffix to all exported symbol names in the shared library. | # Add a prefix or suffix to all exported symbol names in the shared library. | ||||
# Avoid conflicts with other BLAS libraries, especially when using | # Avoid conflicts with other BLAS libraries, especially when using | ||||
# 64 bit integer interfaces in OpenBLAS. | # 64 bit integer interfaces in OpenBLAS. | ||||
# For details, https://github.com/xianyi/OpenBLAS/pull/459 | # For details, https://github.com/xianyi/OpenBLAS/pull/459 | ||||
# | # | ||||
# The same prefix and suffix are also added to the library name, | |||||
# i.e. you get lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) rather than libopenblas | |||||
# | |||||
# SYMBOLPREFIX= | # SYMBOLPREFIX= | ||||
# SYMBOLSUFFIX= | # SYMBOLSUFFIX= | ||||
@@ -23,6 +23,7 @@ CC = gcc | |||||
UNAME_S := $(shell uname -s) | UNAME_S := $(shell uname -s) | ||||
ifeq ($(UNAME_S),Darwin) | ifeq ($(UNAME_S),Darwin) | ||||
CC = clang | CC = clang | ||||
# EXTRALIB += -Wl,-no_compact_unwind | |||||
endif | endif | ||||
endif | endif | ||||
@@ -64,6 +65,9 @@ endif | |||||
ifeq ($(TARGET), STEAMROLLER) | ifeq ($(TARGET), STEAMROLLER) | ||||
GETARCH_FLAGS := -DFORCE_BARCELONA | GETARCH_FLAGS := -DFORCE_BARCELONA | ||||
endif | endif | ||||
ifeq ($(TARGET), EXCAVATOR) | |||||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||||
endif | |||||
endif | endif | ||||
@@ -91,6 +95,9 @@ endif | |||||
ifeq ($(TARGET_CORE), STEAMROLLER) | ifeq ($(TARGET_CORE), STEAMROLLER) | ||||
GETARCH_FLAGS := -DFORCE_BARCELONA | GETARCH_FLAGS := -DFORCE_BARCELONA | ||||
endif | endif | ||||
ifeq ($(TARGET_CORE), EXCAVATOR) | |||||
GETARCH_FLAGS := -DFORCE_BARCELONA | |||||
endif | |||||
endif | endif | ||||
@@ -195,12 +202,18 @@ DLLWRAP = $(CROSS_SUFFIX)dllwrap | |||||
OBJCOPY = $(CROSS_SUFFIX)objcopy | OBJCOPY = $(CROSS_SUFFIX)objcopy | ||||
OBJCONV = $(CROSS_SUFFIX)objconv | OBJCONV = $(CROSS_SUFFIX)objconv | ||||
# For detect fortran failed, only build BLAS. | |||||
ifeq ($(NOFORTRAN), 1) | |||||
NO_LAPACK = 1 | |||||
endif | |||||
# | # | ||||
# OS dependent settings | # OS dependent settings | ||||
# | # | ||||
ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
export MACOSX_DEPLOYMENT_TARGET=10.2 | |||||
export MACOSX_DEPLOYMENT_TARGET=10.6 | |||||
MD5SUM = md5 -r | MD5SUM = md5 -r | ||||
endif | endif | ||||
@@ -323,6 +336,11 @@ ifeq ($(ARCH), x86) | |||||
ifndef BINARY | ifndef BINARY | ||||
NO_BINARY_MODE = 1 | NO_BINARY_MODE = 1 | ||||
endif | endif | ||||
ifeq ($(CORE), generic) | |||||
NO_EXPRECISION = 1 | |||||
endif | |||||
ifndef NO_EXPRECISION | ifndef NO_EXPRECISION | ||||
ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
# ifeq logical or. GCC or LSB | # ifeq logical or. GCC or LSB | ||||
@@ -341,6 +359,11 @@ endif | |||||
endif | endif | ||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
ifeq ($(CORE), generic) | |||||
NO_EXPRECISION = 1 | |||||
endif | |||||
ifndef NO_EXPRECISION | ifndef NO_EXPRECISION | ||||
ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
# ifeq logical or. GCC or LSB | # ifeq logical or. GCC or LSB | ||||
@@ -408,7 +431,7 @@ endif | |||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER | |||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER EXCAVATOR | |||||
endif | endif | ||||
ifneq ($(NO_AVX2), 1) | ifneq ($(NO_AVX2), 1) | ||||
DYNAMIC_CORE += HASWELL | DYNAMIC_CORE += HASWELL | ||||
@@ -578,7 +601,7 @@ else | |||||
FCOMMON_OPT += -m32 | FCOMMON_OPT += -m32 | ||||
endif | endif | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -fopenmp | FCOMMON_OPT += -fopenmp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -590,14 +613,14 @@ ifneq ($(INTERFACE64), 0) | |||||
FCOMMON_OPT += -i8 | FCOMMON_OPT += -i8 | ||||
endif | endif | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -openmp | FCOMMON_OPT += -openmp | ||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(F_COMPILER), FUJITSU) | ifeq ($(F_COMPILER), FUJITSU) | ||||
CCOMMON_OPT += -DF_INTERFACE_FUJITSU | CCOMMON_OPT += -DF_INTERFACE_FUJITSU | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -openmp | FCOMMON_OPT += -openmp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -615,7 +638,7 @@ endif | |||||
else | else | ||||
FCOMMON_OPT += -q32 | FCOMMON_OPT += -q32 | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -openmp | FCOMMON_OPT += -openmp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -633,7 +656,7 @@ FCOMMON_OPT += -tp p7-64 | |||||
else | else | ||||
FCOMMON_OPT += -tp p7 | FCOMMON_OPT += -tp p7 | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -mp | FCOMMON_OPT += -mp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -662,7 +685,7 @@ FCOMMON_OPT += -mabi=n32 | |||||
endif | endif | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -mp | FCOMMON_OPT += -mp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -699,7 +722,7 @@ FCOMMON_OPT += -m64 | |||||
endif | endif | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FEXTRALIB += -lstdc++ | FEXTRALIB += -lstdc++ | ||||
FCOMMON_OPT += -mp | FCOMMON_OPT += -mp | ||||
endif | endif | ||||
@@ -747,14 +770,14 @@ FCOMMON_OPT += -m32 | |||||
else | else | ||||
FCOMMON_OPT += -m64 | FCOMMON_OPT += -m64 | ||||
endif | endif | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -xopenmp=parallel | FCOMMON_OPT += -xopenmp=parallel | ||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(F_COMPILER), COMPAQ) | ifeq ($(F_COMPILER), COMPAQ) | ||||
CCOMMON_OPT += -DF_INTERFACE_COMPAQ | CCOMMON_OPT += -DF_INTERFACE_COMPAQ | ||||
ifdef USE_OPENMP | |||||
ifeq ($(USE_OPENMP), 1) | |||||
FCOMMON_OPT += -openmp | FCOMMON_OPT += -openmp | ||||
endif | endif | ||||
endif | endif | ||||
@@ -857,12 +880,6 @@ ifdef USE_SIMPLE_THREADED_LEVEL3 | |||||
CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 | CCOMMON_OPT += -DUSE_SIMPLE_THREADED_LEVEL3 | ||||
endif | endif | ||||
ifndef LIBNAMESUFFIX | |||||
LIBPREFIX = libopenblas | |||||
else | |||||
LIBPREFIX = libopenblas_$(LIBNAMESUFFIX) | |||||
endif | |||||
ifndef SYMBOLPREFIX | ifndef SYMBOLPREFIX | ||||
SYMBOLPREFIX = | SYMBOLPREFIX = | ||||
endif | endif | ||||
@@ -871,6 +888,12 @@ ifndef SYMBOLSUFFIX | |||||
SYMBOLSUFFIX = | SYMBOLSUFFIX = | ||||
endif | endif | ||||
ifndef LIBNAMESUFFIX | |||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) | |||||
else | |||||
LIBPREFIX = lib$(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) | |||||
endif | |||||
KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | ||||
include $(TOPDIR)/Makefile.$(ARCH) | include $(TOPDIR)/Makefile.$(ARCH) | ||||
@@ -1,7 +1,10 @@ | |||||
# OpenBLAS | # OpenBLAS | ||||
[](https://travis-ci.org/xianyi/OpenBLAS) | |||||
[](https://gitter.im/xianyi/OpenBLAS?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) | |||||
Travis CI: [](https://travis-ci.org/xianyi/OpenBLAS) | |||||
AppVeyor: [](https://ci.appveyor.com/project/xianyi/openblas/branch/develop) | |||||
## Introduction | ## Introduction | ||||
OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. | ||||
@@ -33,6 +33,7 @@ BOBCAT | |||||
BULLDOZER | BULLDOZER | ||||
PILEDRIVER | PILEDRIVER | ||||
STEAMROLLER | STEAMROLLER | ||||
EXCAVATOR | |||||
c)VIA CPU: | c)VIA CPU: | ||||
SSE_GENERIC | SSE_GENERIC | ||||
@@ -43,6 +44,8 @@ NANO | |||||
POWER4 | POWER4 | ||||
POWER5 | POWER5 | ||||
POWER6 | POWER6 | ||||
POWER7 | |||||
POWER8 | |||||
PPCG4 | PPCG4 | ||||
PPC970 | PPC970 | ||||
PPC970MP | PPC970MP | ||||
@@ -0,0 +1,42 @@ | |||||
version: 0.2.15.{build} | |||||
#environment: | |||||
platform: | |||||
- x64 | |||||
configuration: Release | |||||
clone_folder: c:\projects\OpenBLAS | |||||
init: | |||||
- git config --global core.autocrlf input | |||||
build: | |||||
project: OpenBLAS.sln | |||||
clone_depth: 5 | |||||
#branches to build | |||||
branches: | |||||
only: | |||||
- master | |||||
- develop | |||||
- cmake | |||||
skip_tags: true | |||||
matrix: | |||||
fast_finish: true | |||||
skip_commits: | |||||
# Add [av skip] to commit messages | |||||
message: /\[av skip\]/ | |||||
before_build: | |||||
- echo Running cmake... | |||||
- cd c:\projects\OpenBLAS | |||||
- cmake -G "Visual Studio 12 Win64" . | |||||
test_script: | |||||
- echo Build OK! |
@@ -0,0 +1,9 @@ | |||||
#!/bin/bash | |||||
for f in *.goto *.acml *.mkl *.atlas | |||||
do | |||||
if [ -f "$f" ]; then | |||||
mv $f `echo $f|tr '.' '_'`.exe | |||||
fi | |||||
done | |||||
@@ -0,0 +1,196 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef ASUM | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define ASUM BLASFUNC(dzasum) | |||||
#else | |||||
#define ASUM BLASFUNC(scasum) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define ASUM BLASFUNC(dasum) | |||||
#else | |||||
#define ASUM BLASFUNC(sasum) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x; | |||||
FLOAT result; | |||||
blasint m, i; | |||||
blasint inc_x=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
result = ASUM (&m, x, &inc_x); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
#ifdef COMPLEX | |||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6); | |||||
#else | |||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6); | |||||
#endif | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -71,8 +71,14 @@ double fabs(double); | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#if defined(__WIN32__) || defined(__WIN64__) | #if defined(__WIN32__) || defined(__WIN64__) | ||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | int gettimeofday(struct timeval *tv, void *tz){ | ||||
FILETIME ft; | FILETIME ft; | ||||
@@ -99,6 +105,7 @@ int gettimeofday(struct timeval *tv, void *tz){ | |||||
#endif | #endif | ||||
static __inline double getmflops(int ratio, int m, double secs){ | static __inline double getmflops(int ratio, int m, double secs){ | ||||
double mm = (double)m; | double mm = (double)m; | ||||
@@ -0,0 +1,201 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef COPY | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define COPY BLASFUNC(zcopy) | |||||
#else | |||||
#define COPY BLASFUNC(ccopy) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define COPY BLASFUNC(dcopy) | |||||
#else | |||||
#define COPY BLASFUNC(scopy) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
COPY (&m, x, &inc_x, y, &inc_y ); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MBytes\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -144,6 +144,7 @@ int main(int argc, char *argv[]){ | |||||
FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; | FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; | ||||
FLOAT wkopt[4]; | FLOAT wkopt[4]; | ||||
char job='V'; | char job='V'; | ||||
char jobr='N'; | |||||
char *p; | char *p; | ||||
blasint m, i, j, info,lwork; | blasint m, i, j, info,lwork; | ||||
@@ -202,9 +203,9 @@ int main(int argc, char *argv[]){ | |||||
lwork = -1; | lwork = -1; | ||||
m=to; | m=to; | ||||
#ifndef COMPLEX | #ifndef COMPLEX | ||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||||
#else | #else | ||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||||
#endif | #endif | ||||
lwork = (blasint)wkopt[0]; | lwork = (blasint)wkopt[0]; | ||||
@@ -226,16 +227,16 @@ int main(int argc, char *argv[]){ | |||||
lwork = -1; | lwork = -1; | ||||
#ifndef COMPLEX | #ifndef COMPLEX | ||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, wkopt, &lwork, &info); | |||||
#else | #else | ||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, wkopt, &lwork,rwork, &info); | |||||
#endif | #endif | ||||
lwork = (blasint)wkopt[0]; | lwork = (blasint)wkopt[0]; | ||||
#ifndef COMPLEX | #ifndef COMPLEX | ||||
GEEV (&job, &job, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, wi, vl, &m, vr, &m, work, &lwork, &info); | |||||
#else | #else | ||||
GEEV (&job, &job, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); | |||||
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); | |||||
#endif | #endif | ||||
gettimeofday( &stop, (struct timezone *)0); | gettimeofday( &stop, (struct timezone *)0); | ||||
@@ -122,7 +122,7 @@ int main(int argc, char *argv[]){ | |||||
FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
FLOAT alpha[] = {1.0, 1.0}; | FLOAT alpha[] = {1.0, 1.0}; | ||||
FLOAT beta [] = {1.0, 1.0}; | |||||
FLOAT beta [] = {0.0, 0.0}; | |||||
char trans='N'; | char trans='N'; | ||||
blasint m, n, i, j; | blasint m, n, i, j; | ||||
int loops = 1; | int loops = 1; | ||||
@@ -168,12 +168,21 @@ int main(int argc, char *argv[]){ | |||||
has_param_n=1; | has_param_n=1; | ||||
} | } | ||||
#ifdef linux | #ifdef linux | ||||
srandom(getpid()); | srandom(getpid()); | ||||
#endif | #endif | ||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < to * COMPSIZE; i++){ | |||||
a[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
b[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
c[i + j * to * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
fprintf(stderr, " SIZE Flops Time\n"); | |||||
for(m = from; m <= to; m += step) | for(m = from; m <= to; m += step) | ||||
{ | { | ||||
@@ -188,34 +197,23 @@ int main(int argc, char *argv[]){ | |||||
fprintf(stderr, " %6dx%d : ", (int)m, (int)n); | fprintf(stderr, " %6dx%d : ", (int)m, (int)n); | ||||
gettimeofday( &start, (struct timezone *)0); | |||||
for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
{ | { | ||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
c[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m ); | GEMM (&trans, &trans, &m, &n, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | } | ||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg /= loops; | |||||
timeg = time1/loops; | |||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6); | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m * (double)m * (double)n / timeg * 1.e-6, time1); | |||||
} | } | ||||
@@ -35,12 +35,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#undef GER | #undef GER | ||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define GER BLASFUNC(zgeru) | |||||
#else | |||||
#define GER BLASFUNC(cgeru) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | #ifdef DOUBLE | ||||
#define GER BLASFUNC(dger) | #define GER BLASFUNC(dger) | ||||
#else | #else | ||||
#define GER BLASFUNC(sger) | #define GER BLASFUNC(sger) | ||||
#endif | #endif | ||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | #if defined(__WIN32__) || defined(__WIN64__) | ||||
@@ -0,0 +1,218 @@ | |||||
/*********************************************************************/ | |||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
/* All rights reserved. */ | |||||
/* */ | |||||
/* Redistribution and use in source and binary forms, with or */ | |||||
/* without modification, are permitted provided that the following */ | |||||
/* conditions are met: */ | |||||
/* */ | |||||
/* 1. Redistributions of source code must retain the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer. */ | |||||
/* */ | |||||
/* 2. Redistributions in binary form must reproduce the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer in the documentation and/or other materials */ | |||||
/* provided with the distribution. */ | |||||
/* */ | |||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||||
/* */ | |||||
/* The views and conclusions contained in the software and */ | |||||
/* documentation are those of the authors and should not be */ | |||||
/* interpreted as representing official policies, either expressed */ | |||||
/* or implied, of The University of Texas at Austin. */ | |||||
/*********************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
double fabs(double); | |||||
#undef GESV | |||||
#undef GETRS | |||||
#ifndef COMPLEX | |||||
#ifdef XDOUBLE | |||||
#define GESV BLASFUNC(qgesv) | |||||
#elif defined(DOUBLE) | |||||
#define GESV BLASFUNC(dgesv) | |||||
#else | |||||
#define GESV BLASFUNC(sgesv) | |||||
#endif | |||||
#else | |||||
#ifdef XDOUBLE | |||||
#define GESV BLASFUNC(xgesv) | |||||
#elif defined(DOUBLE) | |||||
#define GESV BLASFUNC(zgesv) | |||||
#else | |||||
#define GESV BLASFUNC(cgesv) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *a, *b; | |||||
blasint *ipiv; | |||||
blasint m, i, j, info; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d\n", from, to, step); | |||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( ipiv = (blasint *)malloc(sizeof(blasint) * to * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops Time\n"); | |||||
for(m = from; m <= to; m += step){ | |||||
fprintf(stderr, " %dx%d : ", (int)m, (int)m); | |||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
b[i + j * m * COMPSIZE] = 0.0; | |||||
} | |||||
} | |||||
for (j = 0; j < m; ++j) { | |||||
for (i = 0; i < m * COMPSIZE; ++i) { | |||||
b[i] += a[i + j * m * COMPSIZE]; | |||||
} | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
GESV (&m, &m, a, &m, ipiv, b, &m, &info); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
fprintf(stderr, | |||||
"%10.2f MFlops %10.6f s\n", | |||||
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -52,6 +52,11 @@ C) | |||||
awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2 | awk '/MFlops/ { print $3,int($9) }'|tail --lines=+2 | ||||
;; | ;; | ||||
B) | |||||
# Copy Benchmark | |||||
awk '/MBytes/ { print $1,int($3) }'|tail --lines=+2 | |||||
;; | |||||
*) | *) | ||||
awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2 | awk '/MFlops/ { print $1,int($3) }'|tail --lines=+2 | ||||
@@ -88,6 +88,10 @@ double fabs(double); | |||||
#if defined(__WIN32__) || defined(__WIN64__) | #if defined(__WIN32__) || defined(__WIN64__) | ||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | int gettimeofday(struct timeval *tv, void *tz){ | ||||
FILETIME ft; | FILETIME ft; | ||||
@@ -0,0 +1,202 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef SCAL | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define SCAL BLASFUNC(zscal) | |||||
#else | |||||
#define SCAL BLASFUNC(cscal) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define SCAL BLASFUNC(dscal) | |||||
#else | |||||
#define SCAL BLASFUNC(sscal) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
SCAL (&m, alpha, x, &inc_x); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
#ifdef COMPLEX | |||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6); | |||||
#else | |||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6); | |||||
#endif | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_cgemm(N,l): | |||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j; | |||||
B = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j; | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 8*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_cgemm(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_cgemv(N,l): | |||||
A = randn(N,N).astype('float32') + randn(N,N).astype('float32') * 1j; | |||||
B = randn(N).astype('float32') + randn(N).astype('float32') * 1j; | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 8*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_cgemv(i,LOOPS) | |||||
@@ -0,0 +1,58 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
from scipy.linalg.blas import daxpy | |||||
def run_daxpy(N,l): | |||||
x = randn(N).astype('float64') | |||||
y = randn(N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
y = daxpy(x,y, a=2.0 ) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N ) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%d" % (N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_daxpy(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_ddot(N,l): | |||||
A = randn(N).astype('float64') | |||||
B = randn(N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N ) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%d" % (N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_ddot(i,LOOPS) | |||||
@@ -0,0 +1,55 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_deig(N,l): | |||||
A = randn(N,N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
la,v = numpy.linalg.eig(A) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 26.33 *N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_deig(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_dgemm(N,l): | |||||
A = randn(N,N).astype('float64') | |||||
B = randn(N,N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_dgemm(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_dgemv(N,l): | |||||
A = randn(N,N).astype('float64') | |||||
B = randn(N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_dgemv(i,LOOPS) | |||||
@@ -0,0 +1,58 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
from scipy.linalg.lapack import dgesv | |||||
def run_dgesv(N,l): | |||||
a = randn(N,N).astype('float64') | |||||
b = randn(N,N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
dgesv(a,b,1,1) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_dgesv(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_dsolve(N,l): | |||||
A = randn(N,N).astype('float64') | |||||
B = randn(N,N).astype('float64') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.linalg.solve(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2.0/3.0 *N*N*N + 2.0*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_dsolve(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_sdot(N,l): | |||||
A = randn(N).astype('float32') | |||||
B = randn(N).astype('float32') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N ) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%d" % (N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_sdot(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_sgemm(N,l): | |||||
A = randn(N,N).astype('float32') | |||||
B = randn(N,N).astype('float32') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_sgemm(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_sgemv(N,l): | |||||
A = randn(N,N).astype('float32') | |||||
B = randn(N).astype('float32') | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 2*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_sgemv(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_zgemm(N,l): | |||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j; | |||||
B = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j; | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 8*N*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_zgemm(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy.random import randn | |||||
def run_zgemv(N,l): | |||||
A = randn(N,N).astype('float64') + randn(N,N).astype('float64') * 1j; | |||||
B = randn(N).astype('float64') + randn(N).astype('float64') * 1j; | |||||
start = time.time(); | |||||
for i in range(0,l): | |||||
ref = numpy.dot(A,B) | |||||
end = time.time() | |||||
timediff = (end -start) | |||||
mflops = ( 8*N*N) *l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N,N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size,mflops,timediff)) | |||||
if __name__ == "__main__": | |||||
N=128 | |||||
NMAX=2048 | |||||
NINC=128 | |||||
LOOPS=1 | |||||
z=0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p); | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range (N,NMAX+NINC,NINC): | |||||
run_zgemv(i,LOOPS) | |||||
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i; | |||||
B = single(rand(n,n)) + single(rand(n,n)) * 1i; | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = single(rand(n,n)) + single(rand(n,n)) * 1i; | |||||
B = single(rand(n,1)) + single(rand(n,1)) * 1i; | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
[V,lambda] = eig(A); | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 26.33 *n*n*n ) *loops / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg ); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)); | |||||
B = double(rand(n,n)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)); | |||||
B = double(rand(n,1)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,59 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)); | |||||
B = double(rand(n,n)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
x = linsolve(A,B); | |||||
#x = A / B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
#r = norm(A*x - B)/norm(B) | |||||
mflops = ( 2.0/3.0 *n*n*n + 2.0*n*n*n ) *loops / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg ); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = single(rand(n,n)); | |||||
B = single(rand(n,n)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 2.0*n*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = single(rand(n,n)); | |||||
B = single(rand(n,1)); | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 2.0*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i; | |||||
B = double(rand(n,n)) + double(rand(n,n)) * 1i; | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 4.0 * 2.0*n*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,56 @@ | |||||
#!/usr/bin/octave --silent | |||||
nfrom = 128 ; | |||||
nto = 2048; | |||||
nstep = 128; | |||||
loops = 1; | |||||
arg_list = argv(); | |||||
for i = 1:nargin | |||||
switch(i) | |||||
case 1 | |||||
nfrom = str2num(arg_list{i}); | |||||
case 2 | |||||
nto = str2num(arg_list{i}); | |||||
case 3 | |||||
nstep = str2num(arg_list{i}); | |||||
case 4 | |||||
loops = str2num(arg_list{i}); | |||||
endswitch | |||||
endfor | |||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if p | |||||
loops = str2num(p); | |||||
endif | |||||
printf("From %d To %d Step=%d Loops=%d\n",nfrom, nto, nstep, loops); | |||||
printf(" SIZE FLOPS TIME\n"); | |||||
n = nfrom; | |||||
while n <= nto | |||||
A = double(rand(n,n)) + double(rand(n,n)) * 1i; | |||||
B = double(rand(n,1)) + double(rand(n,1)) * 1i; | |||||
start = clock(); | |||||
l=0; | |||||
while l < loops | |||||
C = A * B; | |||||
l = l + 1; | |||||
endwhile | |||||
timeg = etime(clock(), start); | |||||
mflops = ( 4.0 * 2.0*n*n *loops ) / ( timeg * 1.0e6 ); | |||||
st1 = sprintf("%dx%d : ", n,n); | |||||
printf("%20s %10.2f MFlops %10.6f sec\n", st1, mflops, timeg); | |||||
n = n + nstep; | |||||
endwhile |
@@ -0,0 +1,62 @@ | |||||
#!/usr/bin/Rscript | |||||
argv <- commandArgs(trailingOnly = TRUE) | |||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | |||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | |||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
while ( l <= loops ) { | |||||
ev <- eigen(A) | |||||
l = l + 1 | |||||
} | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
} | |||||
@@ -0,0 +1,63 @@ | |||||
#!/usr/bin/Rscript | |||||
argv <- commandArgs(trailingOnly = TRUE) | |||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | |||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | |||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
while ( l <= loops ) { | |||||
C <- A %*% B | |||||
l = l + 1 | |||||
} | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
} | |||||
@@ -0,0 +1,63 @@ | |||||
#!/usr/bin/Rscript | |||||
argv <- commandArgs(trailingOnly = TRUE) | |||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | |||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | |||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
while ( l <= loops ) { | |||||
solve(A,B) | |||||
l = l + 1 | |||||
} | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
} | |||||
@@ -0,0 +1,201 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above swapright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above swapright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE SWAPRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef SWAP | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define SWAP BLASFUNC(zswap) | |||||
#else | |||||
#define SWAP BLASFUNC(cswap) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define SWAP BLASFUNC(dswap) | |||||
#else | |||||
#define SWAP BLASFUNC(sswap) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
FLOAT alpha[2] = { 2.0, 2.0 }; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
SWAP (&m, x, &inc_x, y, &inc_y ); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MBytes\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -130,11 +130,21 @@ int main(int argc, char *argv[]){ | |||||
char trans='N'; | char trans='N'; | ||||
char diag ='U'; | char diag ='U'; | ||||
int l; | |||||
int loops = 1; | |||||
double timeg; | |||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | ||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | ||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | ||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | ||||
p = getenv("OPENBLAS_LOOPS"); | |||||
if ( p != NULL ) | |||||
loops = atoi(p); | |||||
blasint m, i, j; | blasint m, i, j; | ||||
int from = 1; | int from = 1; | ||||
@@ -150,7 +160,7 @@ int main(int argc, char *argv[]){ | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | ||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | if (argc > 0) { step = atol(*argv); argc--; argv++;} | ||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c Loops = %d\n", from, to, step,side,uplo,trans,diag,loops); | |||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | ||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | fprintf(stderr,"Out of Memory!!\n");exit(1); | ||||
@@ -171,28 +181,35 @@ int main(int argc, char *argv[]){ | |||||
for(m = from; m <= to; m += step) | for(m = from; m <= to; m += step) | ||||
{ | { | ||||
fprintf(stderr, " %6d : ", (int)m); | |||||
timeg=0.0; | |||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
time1 = timeg/loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
} | } | ||||
@@ -0,0 +1,196 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#define RETURN_BY_STACK 1 | |||||
#include "common.h" | |||||
#undef DOT | |||||
#ifdef DOUBLE | |||||
#define DOT BLASFUNC(zdotu) | |||||
#else | |||||
#define DOT BLASFUNC(cdotu) | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
FLOAT _Complex result; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
DOT (&result, &m, x, &inc_x, y, &inc_y ); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -0,0 +1,195 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef DOT | |||||
#ifdef DOUBLE | |||||
#define DOT BLASFUNC(zdotu) | |||||
#else | |||||
#define DOT BLASFUNC(cdotu) | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
FLOAT _Complex result; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
result = DOT (&m, x, &inc_x, y, &inc_y ); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -4,6 +4,8 @@ | |||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | ||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | ||||
$hostarch = "x86_64" if ($hostarch eq "amd64"); | $hostarch = "x86_64" if ($hostarch eq "amd64"); | ||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/); | |||||
$hostarch = "arm64" if ($hostarch eq "aarch64"); | |||||
$binary = $ENV{"BINARY"}; | $binary = $ENV{"BINARY"}; | ||||
@@ -28,7 +30,7 @@ if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { | |||||
$cross_suffix = $1; | $cross_suffix = $1; | ||||
} | } | ||||
} else { | } else { | ||||
if ($ARGV[0] =~ /(.*-)(.*)/) { | |||||
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { | |||||
$cross_suffix = $1; | $cross_suffix = $1; | ||||
} | } | ||||
} | } | ||||
@@ -55,6 +57,7 @@ $os = osf if ($data =~ /OS_OSF/); | |||||
$os = WINNT if ($data =~ /OS_WINNT/); | $os = WINNT if ($data =~ /OS_WINNT/); | ||||
$os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/); | $os = CYGWIN_NT if ($data =~ /OS_CYGWIN_NT/); | ||||
$os = Interix if ($data =~ /OS_INTERIX/); | $os = Interix if ($data =~ /OS_INTERIX/); | ||||
$os = Android if ($data =~ /OS_ANDROID/); | |||||
$architecture = x86 if ($data =~ /ARCH_X86/); | $architecture = x86 if ($data =~ /ARCH_X86/); | ||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | $architecture = x86_64 if ($data =~ /ARCH_X86_64/); | ||||
@@ -1,350 +0,0 @@ | |||||
#ifndef CBLAS_H | |||||
#define CBLAS_H | |||||
#include <stddef.h> | |||||
#include "common.h" | |||||
#ifdef __cplusplus | |||||
extern "C" { | |||||
/* Assume C declarations for C++ */ | |||||
#endif /* __cplusplus */ | |||||
/*Set the number of threads on runtime.*/ | |||||
void openblas_set_num_threads(int num_threads); | |||||
void goto_set_num_threads(int num_threads); | |||||
/*Get the number of threads on runtime.*/ | |||||
int openblas_get_num_threads(void); | |||||
/*Get the number of physical processors (cores).*/ | |||||
int openblas_get_num_procs(void); | |||||
/*Get the build configure on runtime.*/ | |||||
char* openblas_get_config(void); | |||||
/* Get the parallelization type which is used by OpenBLAS */ | |||||
int openblas_get_parallel(void); | |||||
/* OpenBLAS is compiled for sequential use */ | |||||
#define OPENBLAS_SEQUENTIAL 0 | |||||
/* OpenBLAS is compiled using normal threading model */ | |||||
#define OPENBLAS_THREAD 1 | |||||
/* OpenBLAS is compiled using OpenMP threading model */ | |||||
#define OPENBLAS_OPENMP 2 | |||||
#define CBLAS_INDEX size_t | |||||
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; | |||||
typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE; | |||||
typedef enum CBLAS_UPLO {CblasUpper=121, CblasLower=122} CBLAS_UPLO; | |||||
typedef enum CBLAS_DIAG {CblasNonUnit=131, CblasUnit=132} CBLAS_DIAG; | |||||
typedef enum CBLAS_SIDE {CblasLeft=141, CblasRight=142} CBLAS_SIDE; | |||||
float cblas_sdsdot(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy); | |||||
double cblas_dsdot (blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
float cblas_sdot(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
double cblas_ddot(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
openblas_complex_float cblas_cdotu(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
openblas_complex_float cblas_cdotc(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
openblas_complex_double cblas_zdotu(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
openblas_complex_double cblas_zdotc(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_cdotu_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret); | |||||
void cblas_cdotc_sub(blasint n, float *x, blasint incx, float *y, blasint incy, openblas_complex_float *ret); | |||||
void cblas_zdotu_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret); | |||||
void cblas_zdotc_sub(blasint n, double *x, blasint incx, double *y, blasint incy, openblas_complex_double *ret); | |||||
float cblas_sasum (blasint n, float *x, blasint incx); | |||||
double cblas_dasum (blasint n, double *x, blasint incx); | |||||
float cblas_scasum(blasint n, float *x, blasint incx); | |||||
double cblas_dzasum(blasint n, double *x, blasint incx); | |||||
float cblas_snrm2 (blasint N, float *X, blasint incX); | |||||
double cblas_dnrm2 (blasint N, double *X, blasint incX); | |||||
float cblas_scnrm2(blasint N, float *X, blasint incX); | |||||
double cblas_dznrm2(blasint N, double *X, blasint incX); | |||||
CBLAS_INDEX cblas_isamax(blasint n, float *x, blasint incx); | |||||
CBLAS_INDEX cblas_idamax(blasint n, double *x, blasint incx); | |||||
CBLAS_INDEX cblas_icamax(blasint n, float *x, blasint incx); | |||||
CBLAS_INDEX cblas_izamax(blasint n, double *x, blasint incx); | |||||
void cblas_saxpy(blasint n, float alpha, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_daxpy(blasint n, double alpha, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_caxpy(blasint n, float *alpha, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_zaxpy(blasint n, double *alpha, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_scopy(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_dcopy(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_ccopy(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_zcopy(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_sswap(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_dswap(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_cswap(blasint n, float *x, blasint incx, float *y, blasint incy); | |||||
void cblas_zswap(blasint n, double *x, blasint incx, double *y, blasint incy); | |||||
void cblas_srot(blasint N, float *X, blasint incX, float *Y, blasint incY, float c, float s); | |||||
void cblas_drot(blasint N, double *X, blasint incX, double *Y, blasint incY, double c, double s); | |||||
void cblas_srotg(float *a, float *b, float *c, float *s); | |||||
void cblas_drotg(double *a, double *b, double *c, double *s); | |||||
void cblas_srotm(blasint N, float *X, blasint incX, float *Y, blasint incY, float *P); | |||||
void cblas_drotm(blasint N, double *X, blasint incX, double *Y, blasint incY, double *P); | |||||
void cblas_srotmg(float *d1, float *d2, float *b1, float b2, float *P); | |||||
void cblas_drotmg(double *d1, double *d2, double *b1, double b2, double *P); | |||||
void cblas_sscal(blasint N, float alpha, float *X, blasint incX); | |||||
void cblas_dscal(blasint N, double alpha, double *X, blasint incX); | |||||
void cblas_cscal(blasint N, float *alpha, float *X, blasint incX); | |||||
void cblas_zscal(blasint N, double *alpha, double *X, blasint incX); | |||||
void cblas_csscal(blasint N, float alpha, float *X, blasint incX); | |||||
void cblas_zdscal(blasint N, double alpha, double *X, blasint incX); | |||||
void cblas_sgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, | |||||
float alpha, float *a, blasint lda, float *x, blasint incx, float beta, float *y, blasint incy); | |||||
void cblas_dgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, | |||||
double alpha, double *a, blasint lda, double *x, blasint incx, double beta, double *y, blasint incy); | |||||
void cblas_cgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, | |||||
float *alpha, float *a, blasint lda, float *x, blasint incx, float *beta, float *y, blasint incy); | |||||
void cblas_zgemv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE trans, blasint m, blasint n, | |||||
double *alpha, double *a, blasint lda, double *x, blasint incx, double *beta, double *y, blasint incy); | |||||
void cblas_sger (enum CBLAS_ORDER order, blasint M, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); | |||||
void cblas_dger (enum CBLAS_ORDER order, blasint M, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); | |||||
void cblas_cgeru(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); | |||||
void cblas_cgerc(enum CBLAS_ORDER order, blasint M, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *A, blasint lda); | |||||
void cblas_zgeru(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); | |||||
void cblas_zgerc(enum CBLAS_ORDER order, blasint M, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *A, blasint lda); | |||||
void cblas_strsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_dtrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_ctrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_ztrsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_strmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_dtrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_ctrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_ztrmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, blasint N, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_ssyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda); | |||||
void cblas_dsyr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda); | |||||
void cblas_cher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A, blasint lda); | |||||
void cblas_zher(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *A, blasint lda); | |||||
void cblas_ssyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,blasint N, float alpha, float *X, | |||||
blasint incX, float *Y, blasint incY, float *A, blasint lda); | |||||
void cblas_dsyr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, | |||||
blasint incX, double *Y, blasint incY, double *A, blasint lda); | |||||
void cblas_cher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, | |||||
float *Y, blasint incY, float *A, blasint lda); | |||||
void cblas_zher2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, | |||||
double *Y, blasint incY, double *A, blasint lda); | |||||
void cblas_sgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, | |||||
blasint KL, blasint KU, float alpha, float *A, blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); | |||||
void cblas_dgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, | |||||
blasint KL, blasint KU, double alpha, double *A, blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); | |||||
void cblas_cgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, | |||||
blasint KL, blasint KU, float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); | |||||
void cblas_zgbmv(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, blasint M, blasint N, | |||||
blasint KL, blasint KU, double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); | |||||
void cblas_ssbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, float alpha, float *A, | |||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); | |||||
void cblas_dsbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, double alpha, double *A, | |||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); | |||||
void cblas_stbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_dtbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_ctbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_ztbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_stbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_dtbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_ctbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, float *A, blasint lda, float *X, blasint incX); | |||||
void cblas_ztbsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, blasint K, double *A, blasint lda, double *X, blasint incX); | |||||
void cblas_stpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, float *Ap, float *X, blasint incX); | |||||
void cblas_dtpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, double *Ap, double *X, blasint incX); | |||||
void cblas_ctpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, float *Ap, float *X, blasint incX); | |||||
void cblas_ztpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, double *Ap, double *X, blasint incX); | |||||
void cblas_stpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, float *Ap, float *X, blasint incX); | |||||
void cblas_dtpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, double *Ap, double *X, blasint incX); | |||||
void cblas_ctpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, float *Ap, float *X, blasint incX); | |||||
void cblas_ztpsv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, enum CBLAS_DIAG Diag, | |||||
blasint N, double *Ap, double *X, blasint incX); | |||||
void cblas_ssymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *A, | |||||
blasint lda, float *X, blasint incX, float beta, float *Y, blasint incY); | |||||
void cblas_dsymv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *A, | |||||
blasint lda, double *X, blasint incX, double beta, double *Y, blasint incY); | |||||
void cblas_chemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *A, | |||||
blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); | |||||
void cblas_zhemv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *A, | |||||
blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); | |||||
void cblas_sspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *Ap, | |||||
float *X, blasint incX, float beta, float *Y, blasint incY); | |||||
void cblas_dspmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *Ap, | |||||
double *X, blasint incX, double beta, double *Y, blasint incY); | |||||
void cblas_sspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Ap); | |||||
void cblas_dspr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Ap); | |||||
void cblas_chpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *A); | |||||
void cblas_zhpr(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X,blasint incX, double *A); | |||||
void cblas_sspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float alpha, float *X, blasint incX, float *Y, blasint incY, float *A); | |||||
void cblas_dspr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double alpha, double *X, blasint incX, double *Y, blasint incY, double *A); | |||||
void cblas_chpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, float *alpha, float *X, blasint incX, float *Y, blasint incY, float *Ap); | |||||
void cblas_zhpr2(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, double *alpha, double *X, blasint incX, double *Y, blasint incY, double *Ap); | |||||
void cblas_chbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, | |||||
float *alpha, float *A, blasint lda, float *X, blasint incX, float *beta, float *Y, blasint incY); | |||||
void cblas_zhbmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, blasint K, | |||||
double *alpha, double *A, blasint lda, double *X, blasint incX, double *beta, double *Y, blasint incY); | |||||
void cblas_chpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, | |||||
float *alpha, float *Ap, float *X, blasint incX, float *beta, float *Y, blasint incY); | |||||
void cblas_zhpmv(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint N, | |||||
double *alpha, double *Ap, double *X, blasint incX, double *beta, double *Y, blasint incY); | |||||
void cblas_sgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); | |||||
void cblas_dgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||||
void cblas_cgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); | |||||
void cblas_cgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); | |||||
void cblas_zgemm(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); | |||||
void cblas_zgemm3m(enum CBLAS_ORDER Order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, blasint N, blasint K, | |||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); | |||||
void cblas_ssymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); | |||||
void cblas_dsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||||
void cblas_csymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); | |||||
void cblas_zsymm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); | |||||
void cblas_ssyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, float alpha, float *A, blasint lda, float beta, float *C, blasint ldc); | |||||
void cblas_dsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, double alpha, double *A, blasint lda, double beta, double *C, blasint ldc); | |||||
void cblas_csyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *beta, float *C, blasint ldc); | |||||
void cblas_zsyrk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *beta, double *C, blasint ldc); | |||||
void cblas_ssyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, float alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); | |||||
void cblas_dsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, double alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||||
void cblas_csyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); | |||||
void cblas_zsyr2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, | |||||
blasint N, blasint K, double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); | |||||
void cblas_strmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb); | |||||
void cblas_dtrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb); | |||||
void cblas_ctrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb); | |||||
void cblas_ztrmm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb); | |||||
void cblas_strsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float alpha, float *A, blasint lda, float *B, blasint ldb); | |||||
void cblas_dtrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double alpha, double *A, blasint lda, double *B, blasint ldb); | |||||
void cblas_ctrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, float *alpha, float *A, blasint lda, float *B, blasint ldb); | |||||
void cblas_ztrsm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE TransA, | |||||
enum CBLAS_DIAG Diag, blasint M, blasint N, double *alpha, double *A, blasint lda, double *B, blasint ldb); | |||||
void cblas_chemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float *beta, float *C, blasint ldc); | |||||
void cblas_zhemm(enum CBLAS_ORDER Order, enum CBLAS_SIDE Side, enum CBLAS_UPLO Uplo, blasint M, blasint N, | |||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double *beta, double *C, blasint ldc); | |||||
void cblas_cherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, | |||||
float alpha, float *A, blasint lda, float beta, float *C, blasint ldc); | |||||
void cblas_zherk(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, | |||||
double alpha, double *A, blasint lda, double beta, double *C, blasint ldc); | |||||
void cblas_cher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, | |||||
float *alpha, float *A, blasint lda, float *B, blasint ldb, float beta, float *C, blasint ldc); | |||||
void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANSPOSE Trans, blasint N, blasint K, | |||||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||||
void cblas_xerbla(blasint p, char *rout, char *form, ...); | |||||
/*** BLAS extensions ***/ | |||||
void cblas_saxpby(blasint n, float alpha, float *x, blasint incx,float beta, float *y, blasint incy); | |||||
void cblas_daxpby(blasint n, double alpha, double *x, blasint incx,double beta, double *y, blasint incy); | |||||
void cblas_caxpby(blasint n, float *alpha, float *x, blasint incx,float *beta, float *y, blasint incy); | |||||
void cblas_zaxpby(blasint n, double *alpha, double *x, blasint incx,double *beta, double *y, blasint incy); | |||||
void cblas_somatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a, | |||||
blasint clda, float *b, blasint cldb); | |||||
void cblas_domatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a, | |||||
blasint clda, double *b, blasint cldb); | |||||
void cblas_comatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a, | |||||
blasint clda, void *b, blasint cldb); | |||||
void cblas_zomatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, void* calpha, void* a, | |||||
blasint clda, void *b, blasint cldb); | |||||
void cblas_simatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float calpha, float *a, | |||||
blasint clda, blasint cldb); | |||||
void cblas_dimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double calpha, double *a, | |||||
blasint clda, blasint cldb); | |||||
void cblas_cimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, float* calpha, float* a, | |||||
blasint clda, blasint cldb); | |||||
void cblas_zimatcopy( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, blasint ccols, double* calpha, double* a, | |||||
blasint clda, blasint cldb); | |||||
void cblas_sgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float calpha, float *a, blasint clda, float cbeta, | |||||
float *c, blasint cldc); | |||||
void cblas_dgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double calpha, double *a, blasint clda, double cbeta, | |||||
double *c, blasint cldc); | |||||
void cblas_cgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, float *calpha, float *a, blasint clda, float *cbeta, | |||||
float *c, blasint cldc); | |||||
void cblas_zgeadd( enum CBLAS_ORDER CORDER, blasint crows, blasint ccols, double *calpha, double *a, blasint clda, double *cbeta, | |||||
double *c, blasint cldc); | |||||
#ifdef __cplusplus | |||||
} | |||||
#endif /* __cplusplus */ | |||||
#endif |
@@ -0,0 +1,115 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
## Sets various variables based on architecture. | |||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
if (${ARCH} STREQUAL "x86") | |||||
if (NOT BINARY) | |||||
set(NO_BINARY_MODE 1) | |||||
endif () | |||||
endif () | |||||
if (NOT NO_EXPRECISION) | |||||
if (${F_COMPILER} MATCHES "GFORTRAN") | |||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | |||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
set(EXPRECISION 1) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
set(EXPRECISION 1) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | |||||
endif () | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | |||||
endif () | |||||
if (USE_OPENMP) | |||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
message(WARNING "Clang doesn't support OpenMP yet.") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | |||||
endif () | |||||
endif () | |||||
if (DYNAMIC_ARCH) | |||||
if (${ARCH} STREQUAL "x86") | |||||
set(DYNAMIC_CORE "KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") | |||||
endif () | |||||
if (${ARCH} STREQUAL "x86_64") | |||||
set(DYNAMIC_CORE "PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO") | |||||
if (NOT NO_AVX) | |||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} SANDYBRIDGE BULLDOZER PILEDRIVER STEAMROLLER") | |||||
endif () | |||||
if (NOT NO_AVX2) | |||||
set(DYNAMIC_CORE "${DYNAMIC_CORE} HASWELL") | |||||
endif () | |||||
endif () | |||||
if (NOT DYNAMIC_CORE) | |||||
unset(DYNAMIC_ARCH) | |||||
endif () | |||||
endif () | |||||
if (${ARCH} STREQUAL "ia64") | |||||
set(NO_BINARY_MODE 1) | |||||
set(BINARY_DEFINED 1) | |||||
if (${F_COMPILER} MATCHES "GFORTRAN") | |||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU") | |||||
# EXPRECISION = 1 | |||||
# CCOMMON_OPT += -DEXPRECISION | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${ARCH} STREQUAL "mips64") | |||||
set(NO_BINARY_MODE 1) | |||||
endif () | |||||
if (${ARCH} STREQUAL "alpha") | |||||
set(NO_BINARY_MODE 1) | |||||
set(BINARY_DEFINED 1) | |||||
endif () | |||||
if (${ARCH} STREQUAL "arm") | |||||
set(NO_BINARY_MODE 1) | |||||
set(BINARY_DEFINED 1) | |||||
endif () | |||||
if (${ARCH} STREQUAL "arm64") | |||||
set(NO_BINARY_MODE 1) | |||||
set(BINARY_DEFINED 1) | |||||
endif () | |||||
@@ -0,0 +1,89 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from the OpenBLAS/c_check perl script. | |||||
## This is triggered by prebuild.cmake and runs before any of the code is built. | |||||
## Creates config.h and Makefile.conf. | |||||
# CMake vars set by this file: | |||||
# OSNAME (use CMAKE_SYSTEM_NAME) | |||||
# ARCH | |||||
# C_COMPILER (use CMAKE_C_COMPILER) | |||||
# BINARY32 | |||||
# BINARY64 | |||||
# FU | |||||
# CROSS_SUFFIX | |||||
# CROSS | |||||
# CEXTRALIB | |||||
# Defines set by this file: | |||||
# OS_ | |||||
# ARCH_ | |||||
# C_ | |||||
# __32BIT__ | |||||
# __64BIT__ | |||||
# FUNDERSCORE | |||||
# PTHREAD_CREATE_FUNC | |||||
# N.B. c_check (and ctest.c) is not cross-platform, so instead try to use CMake variables. | |||||
set(FU "") | |||||
if(APPLE) | |||||
set(FU "_") | |||||
elseif(MSVC) | |||||
set(FU "_") | |||||
elseif(UNIX) | |||||
set(FU "") | |||||
endif() | |||||
# Convert CMake vars into the format that OpenBLAS expects | |||||
string(TOUPPER ${CMAKE_SYSTEM_NAME} HOST_OS) | |||||
if (${HOST_OS} STREQUAL "WINDOWS") | |||||
set(HOST_OS WINNT) | |||||
endif () | |||||
# added by hpa - check size of void ptr to detect 64-bit compile | |||||
if (NOT DEFINED BINARY) | |||||
set(BINARY 32) | |||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8) | |||||
set(BINARY 64) | |||||
endif () | |||||
endif () | |||||
if (BINARY EQUAL 64) | |||||
set(BINARY64 1) | |||||
else () | |||||
set(BINARY32 1) | |||||
endif () | |||||
# CMake docs define these: | |||||
# CMAKE_SYSTEM_PROCESSOR - The name of the CPU CMake is building for. | |||||
# CMAKE_HOST_SYSTEM_PROCESSOR - The name of the CPU CMake is running on. | |||||
# | |||||
# TODO: CMAKE_SYSTEM_PROCESSOR doesn't seem to be correct - instead get it from the compiler a la c_check | |||||
set(ARCH ${CMAKE_SYSTEM_PROCESSOR}) | |||||
if (${ARCH} STREQUAL "AMD64") | |||||
set(ARCH "x86_64") | |||||
endif () | |||||
# If you are using a 32-bit compiler on a 64-bit system CMAKE_SYSTEM_PROCESSOR will be wrong | |||||
if (${ARCH} STREQUAL "x86_64" AND BINARY EQUAL 32) | |||||
set(ARCH x86) | |||||
endif () | |||||
if (${ARCH} STREQUAL "X86") | |||||
set(ARCH x86) | |||||
endif () | |||||
set(COMPILER_ID ${CMAKE_CXX_COMPILER_ID}) | |||||
if (${COMPILER_ID} STREQUAL "GNU") | |||||
set(COMPILER_ID "GCC") | |||||
endif () | |||||
string(TOUPPER ${ARCH} UC_ARCH) | |||||
file(WRITE ${TARGET_CONF} | |||||
"#define OS_${HOST_OS}\t1\n" | |||||
"#define ARCH_${UC_ARCH}\t1\n" | |||||
"#define C_${COMPILER_ID}\t1\n" | |||||
"#define __${BINARY}BIT__\t1\n" | |||||
"#define FUNDERSCORE\t${FU}\n") | |||||
@@ -0,0 +1,103 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
## Sets C related variables. | |||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -Wall") | |||||
set(COMMON_PROF "${COMMON_PROF} -fno-inline") | |||||
set(NO_UNINITIALIZED_WARN "-Wno-uninitialized") | |||||
if (QUIET_MAKE) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} ${NO_UNINITIALIZED_WARN} -Wno-unused") | |||||
endif () | |||||
if (NO_BINARY_MODE) | |||||
if (${ARCH} STREQUAL "mips64") | |||||
if (BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=64") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mabi=n32") | |||||
endif () | |||||
set(BINARY_DEFINED 1) | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3A") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3B") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -march=mips64") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -march=mips64") | |||||
endif () | |||||
if (${OSNAME} STREQUAL "AIX") | |||||
set(BINARY_DEFINED 1) | |||||
endif () | |||||
endif () | |||||
if (NOT BINARY_DEFINED) | |||||
if (BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
if (BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7-64") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -tp p7") | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
if (BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
if (${ARCH} STREQUAL "mips64") | |||||
if (NOT BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n32") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -n64") | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3A") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3B") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -loongson3 -static") | |||||
endif () | |||||
else () | |||||
if (BINARY64) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m64") | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "SUN") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -w") | |||||
if (${ARCH} STREQUAL "x86") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -m32") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
endif () | |||||
endif () | |||||
@@ -0,0 +1,60 @@ | |||||
#Only generate .def for dll on MSVC | |||||
if(MSVC) | |||||
set_source_files_properties(${OpenBLAS_DEF_FILE} PROPERTIES GENERATED 1) | |||||
if (NOT DEFINED ARCH) | |||||
set(ARCH_IN "x86_64") | |||||
else() | |||||
set(ARCH_IN ${ARCH}) | |||||
endif() | |||||
if (${CORE} STREQUAL "generic") | |||||
set(ARCH_IN "GENERIC") | |||||
endif () | |||||
if (NOT DEFINED EXPRECISION) | |||||
set(EXPRECISION_IN 0) | |||||
else() | |||||
set(EXPRECISION_IN ${EXPRECISION}) | |||||
endif() | |||||
if (NOT DEFINED NO_CBLAS) | |||||
set(NO_CBLAS_IN 0) | |||||
else() | |||||
set(NO_CBLAS_IN ${NO_CBLAS}) | |||||
endif() | |||||
if (NOT DEFINED NO_LAPACK) | |||||
set(NO_LAPACK_IN 0) | |||||
else() | |||||
set(NO_LAPACK_IN ${NO_LAPACK}) | |||||
endif() | |||||
if (NOT DEFINED NO_LAPACKE) | |||||
set(NO_LAPACKE_IN 0) | |||||
else() | |||||
set(NO_LAPACKE_IN ${NO_LAPACKE}) | |||||
endif() | |||||
if (NOT DEFINED NEED2UNDERSCORES) | |||||
set(NEED2UNDERSCORES_IN 0) | |||||
else() | |||||
set(NEED2UNDERSCORES_IN ${NEED2UNDERSCORES}) | |||||
endif() | |||||
if (NOT DEFINED ONLY_CBLAS) | |||||
set(ONLY_CBLAS_IN 0) | |||||
else() | |||||
set(ONLY_CBLAS_IN ${ONLY_CBLAS}) | |||||
endif() | |||||
add_custom_command( | |||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK | |||||
COMMAND perl | |||||
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||||
COMMENT "Create openblas.def file" | |||||
VERBATIM) | |||||
endif() |
@@ -0,0 +1,66 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Copyright: (c) Stat-Ease, Inc. | |||||
## Created: 12/29/14 | |||||
## Last Modified: 12/29/14 | |||||
## Description: Ported from the OpenBLAS/f_check perl script. | |||||
## This is triggered by prebuild.cmake and runs before any of the code is built. | |||||
## Appends Fortran information to config.h and Makefile.conf. | |||||
# CMake vars set by this file: | |||||
# F_COMPILER | |||||
# FC | |||||
# BU | |||||
# NOFORTRAN | |||||
# NEED2UNDERSCORES | |||||
# FEXTRALIB | |||||
# Defines set by this file: | |||||
# BUNDERSCORE | |||||
# NEEDBUNDERSCORE | |||||
# NEED2UNDERSCORES | |||||
if (MSVC) | |||||
# had to do this for MSVC, else CMake automatically assumes I have ifort... -hpa | |||||
include(CMakeForceCompiler) | |||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | |||||
endif () | |||||
if (NOT NO_LAPACK) | |||||
enable_language(Fortran) | |||||
else() | |||||
include(CMakeForceCompiler) | |||||
CMAKE_FORCE_Fortran_COMPILER(gfortran GNU) | |||||
endif() | |||||
if (NOT ONLY_CBLAS) | |||||
# N.B. f_check is not cross-platform, so instead try to use CMake variables | |||||
# run f_check (appends to TARGET files) | |||||
# message(STATUS "Running f_check...") | |||||
# execute_process(COMMAND perl f_check ${TARGET_MAKE} ${TARGET_CONF} ${CMAKE_Fortran_COMPILER} | |||||
# WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) | |||||
# TODO: detect whether underscore needed, set #defines and BU appropriately - use try_compile | |||||
# TODO: set FEXTRALIB flags a la f_check? | |||||
set(BU "_") | |||||
file(APPEND ${TARGET_CONF} | |||||
"#define BUNDERSCORE _\n" | |||||
"#define NEEDBUNDERSCORE 1\n" | |||||
"#define NEED2UNDERSCORES 0\n") | |||||
else () | |||||
#When we only build CBLAS, we set NOFORTRAN=2 | |||||
set(NOFORTRAN 2) | |||||
set(NO_FBLAS 1) | |||||
#set(F_COMPILER GFORTRAN) # CMake handles the fortran compiler | |||||
set(BU "_") | |||||
file(APPEND ${TARGET_CONF} | |||||
"#define BUNDERSCORE _\n" | |||||
"#define NEEDBUNDERSCORE 1\n") | |||||
endif() | |||||
get_filename_component(F_COMPILER ${CMAKE_Fortran_COMPILER} NAME_WE) | |||||
string(TOUPPER ${F_COMPILER} F_COMPILER) | |||||
@@ -0,0 +1,200 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
## Sets Fortran related variables. | |||||
if (${F_COMPILER} STREQUAL "G77") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G77") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
if (NOT NO_BINARY_MODE) | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "G95") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_G95") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
if (NOT NO_BINARY_MODE) | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
endif () | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "GFORTRAN") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_GFORT") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -Wall") | |||||
#Don't include -lgfortran, when NO_LAPACK=1 or lsbcc | |||||
if (NOT NO_LAPACK) | |||||
set(EXTRALIB "{EXTRALIB} -lgfortran") | |||||
endif () | |||||
if (NO_BINARY_MODE) | |||||
if (${ARCH} STREQUAL "mips64") | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") | |||||
endif () | |||||
endif () | |||||
else () | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fdefault-integer-8") | |||||
endif () | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
endif () | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fopenmp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "INTEL") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_INTEL") | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "FUJITSU") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FUJITSU") | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "IBM") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_IBM") | |||||
# FCOMMON_OPT += -qarch=440 | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q64") | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -qintsize=8") | |||||
endif () | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -q32") | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "PGI") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PGI") | |||||
set(COMMON_PROF "${COMMON_PROF} -DPGICOMPILER") | |||||
if (BINARY64) | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
endif () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7-64") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -tp p7") | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "PATHSCALE") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_PATHSCALE") | |||||
if (BINARY64) | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
endif () | |||||
endif () | |||||
if (NOT ${ARCH} STREQUAL "mips64") | |||||
if (NOT BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
endif () | |||||
else () | |||||
if (BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=64") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mabi=n32") | |||||
endif () | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "OPEN64") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_OPEN64") | |||||
if (BINARY64) | |||||
if (INTERFACE64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -i8") | |||||
endif () | |||||
endif () | |||||
if (${ARCH} STREQUAL "mips64") | |||||
if (NOT BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n32") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -n64") | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3A") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") | |||||
endif () | |||||
if (${CORE} STREQUAL "LOONGSON3B") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -loongson3 -static") | |||||
endif () | |||||
else () | |||||
if (NOT BINARY64) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
endif () | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FEXTRALIB "${FEXTRALIB} -lstdc++") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mp") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "SUN") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_SUN") | |||||
if (${ARCH} STREQUAL "x86") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m32") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m64") | |||||
endif () | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -xopenmp=parallel") | |||||
endif () | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "COMPAQ") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_COMPAQ") | |||||
if (USE_OPENMP) | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -openmp") | |||||
endif () | |||||
endif () | |||||
# from the root Makefile - this is for lapack-netlib to compile the correct secnd file. | |||||
if (${F_COMPILER} STREQUAL "GFORTRAN") | |||||
set(TIMER "INT_ETIME") | |||||
else () | |||||
set(TIMER "NONE") | |||||
endif () | |||||
@@ -0,0 +1,165 @@ | |||||
# helper functions for the kernel CMakeLists.txt | |||||
# Set the default filenames for L1 objects. Most of these will be overriden by the appropriate KERNEL file. | |||||
macro(SetDefaultL1) | |||||
set(SAMAXKERNEL amax.S) | |||||
set(DAMAXKERNEL amax.S) | |||||
set(QAMAXKERNEL amax.S) | |||||
set(CAMAXKERNEL zamax.S) | |||||
set(ZAMAXKERNEL zamax.S) | |||||
set(XAMAXKERNEL zamax.S) | |||||
set(SAMINKERNEL amin.S) | |||||
set(DAMINKERNEL amin.S) | |||||
set(QAMINKERNEL amin.S) | |||||
set(CAMINKERNEL zamin.S) | |||||
set(ZAMINKERNEL zamin.S) | |||||
set(XAMINKERNEL zamin.S) | |||||
set(SMAXKERNEL max.S) | |||||
set(DMAXKERNEL max.S) | |||||
set(QMAXKERNEL max.S) | |||||
set(SMINKERNEL min.S) | |||||
set(DMINKERNEL min.S) | |||||
set(QMINKERNEL min.S) | |||||
set(ISAMAXKERNEL iamax.S) | |||||
set(IDAMAXKERNEL iamax.S) | |||||
set(IQAMAXKERNEL iamax.S) | |||||
set(ICAMAXKERNEL izamax.S) | |||||
set(IZAMAXKERNEL izamax.S) | |||||
set(IXAMAXKERNEL izamax.S) | |||||
set(ISAMINKERNEL iamin.S) | |||||
set(IDAMINKERNEL iamin.S) | |||||
set(IQAMINKERNEL iamin.S) | |||||
set(ICAMINKERNEL izamin.S) | |||||
set(IZAMINKERNEL izamin.S) | |||||
set(IXAMINKERNEL izamin.S) | |||||
set(ISMAXKERNEL iamax.S) | |||||
set(IDMAXKERNEL iamax.S) | |||||
set(IQMAXKERNEL iamax.S) | |||||
set(ISMINKERNEL iamin.S) | |||||
set(IDMINKERNEL iamin.S) | |||||
set(IQMINKERNEL iamin.S) | |||||
set(SASUMKERNEL asum.S) | |||||
set(DASUMKERNEL asum.S) | |||||
set(CASUMKERNEL zasum.S) | |||||
set(ZASUMKERNEL zasum.S) | |||||
set(QASUMKERNEL asum.S) | |||||
set(XASUMKERNEL zasum.S) | |||||
set(SAXPYKERNEL axpy.S) | |||||
set(DAXPYKERNEL axpy.S) | |||||
set(CAXPYKERNEL zaxpy.S) | |||||
set(ZAXPYKERNEL zaxpy.S) | |||||
set(QAXPYKERNEL axpy.S) | |||||
set(XAXPYKERNEL zaxpy.S) | |||||
set(SCOPYKERNEL copy.S) | |||||
set(DCOPYKERNEL copy.S) | |||||
set(CCOPYKERNEL zcopy.S) | |||||
set(ZCOPYKERNEL zcopy.S) | |||||
set(QCOPYKERNEL copy.S) | |||||
set(XCOPYKERNEL zcopy.S) | |||||
set(SDOTKERNEL dot.S) | |||||
set(DDOTKERNEL dot.S) | |||||
set(CDOTKERNEL zdot.S) | |||||
set(ZDOTKERNEL zdot.S) | |||||
set(QDOTKERNEL dot.S) | |||||
set(XDOTKERNEL zdot.S) | |||||
set(SNRM2KERNEL nrm2.S) | |||||
set(DNRM2KERNEL nrm2.S) | |||||
set(QNRM2KERNEL nrm2.S) | |||||
set(CNRM2KERNEL znrm2.S) | |||||
set(ZNRM2KERNEL znrm2.S) | |||||
set(XNRM2KERNEL znrm2.S) | |||||
set(SROTKERNEL rot.S) | |||||
set(DROTKERNEL rot.S) | |||||
set(QROTKERNEL rot.S) | |||||
set(CROTKERNEL zrot.S) | |||||
set(ZROTKERNEL zrot.S) | |||||
set(XROTKERNEL zrot.S) | |||||
set(SSCALKERNEL scal.S) | |||||
set(DSCALKERNEL scal.S) | |||||
set(CSCALKERNEL zscal.S) | |||||
set(ZSCALKERNEL zscal.S) | |||||
set(QSCALKERNEL scal.S) | |||||
set(XSCALKERNEL zscal.S) | |||||
set(SSWAPKERNEL swap.S) | |||||
set(DSWAPKERNEL swap.S) | |||||
set(CSWAPKERNEL zswap.S) | |||||
set(ZSWAPKERNEL zswap.S) | |||||
set(QSWAPKERNEL swap.S) | |||||
set(XSWAPKERNEL zswap.S) | |||||
set(SGEMVNKERNEL gemv_n.S) | |||||
set(SGEMVTKERNEL gemv_t.S) | |||||
set(DGEMVNKERNEL gemv_n.S) | |||||
set(DGEMVTKERNEL gemv_t.S) | |||||
set(CGEMVNKERNEL zgemv_n.S) | |||||
set(CGEMVTKERNEL zgemv_t.S) | |||||
set(ZGEMVNKERNEL zgemv_n.S) | |||||
set(ZGEMVTKERNEL zgemv_t.S) | |||||
set(QGEMVNKERNEL gemv_n.S) | |||||
set(QGEMVTKERNEL gemv_t.S) | |||||
set(XGEMVNKERNEL zgemv_n.S) | |||||
set(XGEMVTKERNEL zgemv_t.S) | |||||
set(SCABS_KERNEL ../generic/cabs.c) | |||||
set(DCABS_KERNEL ../generic/cabs.c) | |||||
set(QCABS_KERNEL ../generic/cabs.c) | |||||
set(LSAME_KERNEL ../generic/lsame.c) | |||||
set(SAXPBYKERNEL ../arm/axpby.c) | |||||
set(DAXPBYKERNEL ../arm/axpby.c) | |||||
set(CAXPBYKERNEL ../arm/zaxpby.c) | |||||
set(ZAXPBYKERNEL ../arm/zaxpby.c) | |||||
endmacro () | |||||
macro(SetDefaultL2) | |||||
set(SGEMVNKERNEL gemv_n.S) | |||||
set(SGEMVTKERNEL gemv_t.S) | |||||
set(DGEMVNKERNEL gemv_n.S) | |||||
set(DGEMVTKERNEL gemv_t.S) | |||||
set(CGEMVNKERNEL zgemv_n.S) | |||||
set(CGEMVTKERNEL zgemv_t.S) | |||||
set(ZGEMVNKERNEL zgemv_n.S) | |||||
set(ZGEMVTKERNEL zgemv_t.S) | |||||
set(QGEMVNKERNEL gemv_n.S) | |||||
set(QGEMVTKERNEL gemv_t.S) | |||||
set(XGEMVNKERNEL zgemv_n.S) | |||||
set(XGEMVTKERNEL zgemv_t.S) | |||||
set(SGERKERNEL ../generic/ger.c) | |||||
set(DGERKERNEL ../generic/ger.c) | |||||
set(QGERKERNEL ../generic/ger.c) | |||||
set(CGERUKERNEL ../generic/zger.c) | |||||
set(CGERCKERNEL ../generic/zger.c) | |||||
set(ZGERUKERNEL ../generic/zger.c) | |||||
set(ZGERCKERNEL ../generic/zger.c) | |||||
set(XGERUKERNEL ../generic/zger.c) | |||||
set(XGERCKERNEL ../generic/zger.c) | |||||
set(SSYMV_U_KERNEL ../generic/symv_k.c) | |||||
set(SSYMV_L_KERNEL ../generic/symv_k.c) | |||||
set(DSYMV_U_KERNEL ../generic/symv_k.c) | |||||
set(DSYMV_L_KERNEL ../generic/symv_k.c) | |||||
set(QSYMV_U_KERNEL ../generic/symv_k.c) | |||||
set(QSYMV_L_KERNEL ../generic/symv_k.c) | |||||
set(CSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
set(CSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
set(ZSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
set(ZSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
set(XSYMV_U_KERNEL ../generic/zsymv_k.c) | |||||
set(XSYMV_L_KERNEL ../generic/zsymv_k.c) | |||||
set(CHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
set(CHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
set(CHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
set(CHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
set(ZHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
set(ZHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
set(ZHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
set(ZHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
set(XHEMV_U_KERNEL ../generic/zhemv_k.c) | |||||
set(XHEMV_L_KERNEL ../generic/zhemv_k.c) | |||||
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | |||||
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | |||||
endmacro () | |||||
macro(SetDefaultL3) | |||||
set(SGEADD_KERNEL ../generic/geadd.c) | |||||
set(DGEADD_KERNEL ../generic/geadd.c) | |||||
set(CGEADD_KERNEL ../generic/zgeadd.c) | |||||
set(ZGEADD_KERNEL ../generic/zgeadd.c) | |||||
endmacro () |
@@ -0,0 +1,347 @@ | |||||
# Sources for compiling lapack-netlib. Can't use CMakeLists.txt because lapack-netlib already has its own cmake files. | |||||
set(ALLAUX | |||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f | |||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f | |||||
../INSTALL/ilaver.f ../INSTALL/slamch.f | |||||
) | |||||
set(SCLAUX | |||||
sbdsdc.f | |||||
sbdsqr.f sdisna.f slabad.f slacpy.f sladiv.f slae2.f slaebz.f | |||||
slaed0.f slaed1.f slaed2.f slaed3.f slaed4.f slaed5.f slaed6.f | |||||
slaed7.f slaed8.f slaed9.f slaeda.f slaev2.f slagtf.f | |||||
slagts.f slamrg.f slanst.f | |||||
slapy2.f slapy3.f slarnv.f | |||||
slarra.f slarrb.f slarrc.f slarrd.f slarre.f slarrf.f slarrj.f | |||||
slarrk.f slarrr.f slaneg.f | |||||
slartg.f slaruv.f slas2.f slascl.f | |||||
slasd0.f slasd1.f slasd2.f slasd3.f slasd4.f slasd5.f slasd6.f | |||||
slasd7.f slasd8.f slasda.f slasdq.f slasdt.f | |||||
slaset.f slasq1.f slasq2.f slasq3.f slasq4.f slasq5.f slasq6.f | |||||
slasr.f slasrt.f slassq.f slasv2.f spttrf.f sstebz.f sstedc.f | |||||
ssteqr.f ssterf.f slaisnan.f sisnan.f | |||||
slartgp.f slartgs.f | |||||
../INSTALL/second_${TIMER}.f | |||||
) | |||||
set(DZLAUX | |||||
dbdsdc.f | |||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f | |||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f | |||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f | |||||
dlagts.f dlamrg.f dlanst.f | |||||
dlapy2.f dlapy3.f dlarnv.f | |||||
dlarra.f dlarrb.f dlarrc.f dlarrd.f dlarre.f dlarrf.f dlarrj.f | |||||
dlarrk.f dlarrr.f dlaneg.f | |||||
dlartg.f dlaruv.f dlas2.f dlascl.f | |||||
dlasd0.f dlasd1.f dlasd2.f dlasd3.f dlasd4.f dlasd5.f dlasd6.f | |||||
dlasd7.f dlasd8.f dlasda.f dlasdq.f dlasdt.f | |||||
dlaset.f dlasq1.f dlasq2.f dlasq3.f dlasq4.f dlasq5.f dlasq6.f | |||||
dlasr.f dlasrt.f dlassq.f dlasv2.f dpttrf.f dstebz.f dstedc.f | |||||
dsteqr.f dsterf.f dlaisnan.f disnan.f | |||||
dlartgp.f dlartgs.f | |||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f | |||||
) | |||||
set(SLASRC | |||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | |||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | |||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f | |||||
sgetc2.f sgetri.f | |||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f | |||||
sggglm.f sgghrd.f sgglse.f sggqrf.f | |||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f | |||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | |||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f | |||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | |||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | |||||
slansy.f slantb.f slantp.f slantr.f slanv2.f | |||||
slapll.f slapmt.f | |||||
slaqgb.f slaqge.f slaqp2.f slaqps.f slaqsb.f slaqsp.f slaqsy.f | |||||
slaqr0.f slaqr1.f slaqr2.f slaqr3.f slaqr4.f slaqr5.f | |||||
slaqtr.f slar1v.f slar2v.f ilaslr.f ilaslc.f | |||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | |||||
slarrv.f slartv.f | |||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | |||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f | |||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f | |||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | |||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f | |||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f | |||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f | |||||
spbstf.f spbsv.f spbsvx.f | |||||
spbtf2.f spbtrf.f spbtrs.f spocon.f spoequ.f sporfs.f sposv.f | |||||
sposvx.f spstrf.f spstf2.f | |||||
sppcon.f sppequ.f | |||||
spprfs.f sppsv.f sppsvx.f spptrf.f spptri.f spptrs.f sptcon.f | |||||
spteqr.f sptrfs.f sptsv.f sptsvx.f spttrs.f sptts2.f srscl.f | |||||
ssbev.f ssbevd.f ssbevx.f ssbgst.f ssbgv.f ssbgvd.f ssbgvx.f | |||||
ssbtrd.f sspcon.f sspev.f sspevd.f sspevx.f sspgst.f | |||||
sspgv.f sspgvd.f sspgvx.f ssprfs.f sspsv.f sspsvx.f ssptrd.f | |||||
ssptrf.f ssptri.f ssptrs.f sstegr.f sstein.f sstev.f sstevd.f sstevr.f | |||||
sstevx.f | |||||
ssycon.f ssyev.f ssyevd.f ssyevr.f ssyevx.f ssygs2.f | |||||
ssygst.f ssygv.f ssygvd.f ssygvx.f ssyrfs.f ssysv.f ssysvx.f | |||||
ssytd2.f ssytf2.f ssytrd.f ssytrf.f ssytri.f ssytri2.f ssytri2x.f | |||||
ssyswapr.f ssytrs.f ssytrs2.f ssyconv.f | |||||
ssytf2_rook.f ssytrf_rook.f ssytrs_rook.f | |||||
ssytri_rook.f ssycon_rook.f ssysv_rook.f | |||||
stbcon.f | |||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f | |||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | |||||
stptrs.f | |||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | |||||
strtrs.f stzrqf.f stzrzf.f sstemr.f | |||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | |||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f | |||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | |||||
sgeequb.f ssyequb.f spoequb.f sgbequb.f | |||||
sbbcsd.f slapmr.f sorbdb.f sorbdb1.f sorbdb2.f sorbdb3.f sorbdb4.f | |||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f | |||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f | |||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f | |||||
) | |||||
set(DSLASRC spotrs.f) | |||||
set(CLASRC | |||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | |||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | |||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | |||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | |||||
cgesvx.f cgetc2.f cgetri.f | |||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | |||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f | |||||
cggsvd.f cggsvp.f | |||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | |||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | |||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | |||||
chegv.f chegvd.f chegvx.f cherfs.f chesv.f chesvx.f chetd2.f | |||||
chetf2.f chetrd.f | |||||
chetrf.f chetri.f chetri2.f chetri2x.f cheswapr.f | |||||
chetrs.f chetrs2.f | |||||
chetf2_rook.f chetrf_rook.f chetri_rook.f chetrs_rook.f checon_rook.f chesv_rook.f | |||||
chgeqz.f chpcon.f chpev.f chpevd.f | |||||
chpevx.f chpgst.f chpgv.f chpgvd.f chpgvx.f chprfs.f chpsv.f | |||||
chpsvx.f | |||||
chptrd.f chptrf.f chptri.f chptrs.f chsein.f chseqr.f clabrd.f | |||||
clacgv.f clacon.f clacn2.f clacp2.f clacpy.f clacrm.f clacrt.f cladiv.f | |||||
claed0.f claed7.f claed8.f | |||||
claein.f claesy.f claev2.f clags2.f clagtm.f | |||||
clahef.f clahef_rook.f clahqr.f | |||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
clanhb.f clanhe.f | |||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | |||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | |||||
claqhb.f claqhe.f claqhp.f claqp2.f claqps.f claqsb.f | |||||
claqr0.f claqr1.f claqr2.f claqr3.f claqr4.f claqr5.f | |||||
claqsp.f claqsy.f clar1v.f clar2v.f ilaclr.f ilaclc.f | |||||
clarf.f clarfb.f clarfg.f clarft.f clarfgp.f | |||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | |||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | |||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | |||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | |||||
cposv.f cposvx.f cpstrf.f cpstf2.f | |||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | |||||
cptcon.f cpteqr.f cptrfs.f cptsv.f cptsvx.f cpttrf.f cpttrs.f cptts2.f | |||||
crot.f cspcon.f csprfs.f cspsv.f | |||||
cspsvx.f csptrf.f csptri.f csptrs.f csrscl.f cstedc.f | |||||
cstegr.f cstein.f csteqr.f | |||||
csycon.f | |||||
csyrfs.f csysv.f csysvx.f csytf2.f csytrf.f csytri.f csytri2.f csytri2x.f | |||||
csyswapr.f csytrs.f csytrs2.f csyconv.f | |||||
csytf2_rook.f csytrf_rook.f csytrs_rook.f | |||||
csytri_rook.f csycon_rook.f csysv_rook.f | |||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f | |||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | |||||
ctprfs.f ctptri.f | |||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | |||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | |||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | |||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | |||||
cunmtr.f cupgtr.f cupmtr.f icmax1.f scsum1.f cstemr.f | |||||
chfrk.f ctfttp.f clanhf.f cpftrf.f cpftri.f cpftrs.f ctfsm.f ctftri.f | |||||
ctfttr.f ctpttf.f ctpttr.f ctrttf.f ctrttp.f | |||||
cgeequb.f cgbequb.f csyequb.f cpoequb.f cheequb.f | |||||
cbbcsd.f clapmr.f cunbdb.f cunbdb1.f cunbdb2.f cunbdb3.f cunbdb4.f | |||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f | |||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f | |||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f | |||||
) | |||||
set(ZCLASRC cpotrs.f) | |||||
set(DLASRC | |||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | |||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | |||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | |||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f | |||||
dgetc2.f dgetri.f | |||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f | |||||
dggglm.f dgghrd.f dgglse.f dggqrf.f | |||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | |||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | |||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | |||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | |||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | |||||
dlapll.f dlapmt.f | |||||
dlaqgb.f dlaqge.f dlaqp2.f dlaqps.f dlaqsb.f dlaqsp.f dlaqsy.f | |||||
dlaqr0.f dlaqr1.f dlaqr2.f dlaqr3.f dlaqr4.f dlaqr5.f | |||||
dlaqtr.f dlar1v.f dlar2v.f iladlr.f iladlc.f | |||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | |||||
dlargv.f dlarrv.f dlartv.f | |||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | |||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f | |||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f | |||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | |||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f | |||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f | |||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f | |||||
dpbstf.f dpbsv.f dpbsvx.f | |||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f | |||||
dposvx.f dpotrs.f dpstrf.f dpstf2.f | |||||
dppcon.f dppequ.f | |||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f | |||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f | |||||
dsbev.f dsbevd.f dsbevx.f dsbgst.f dsbgv.f dsbgvd.f dsbgvx.f | |||||
dsbtrd.f dspcon.f dspev.f dspevd.f dspevx.f dspgst.f | |||||
dspgv.f dspgvd.f dspgvx.f dsprfs.f dspsv.f dspsvx.f dsptrd.f | |||||
dsptrf.f dsptri.f dsptrs.f dstegr.f dstein.f dstev.f dstevd.f dstevr.f | |||||
dstevx.f | |||||
dsycon.f dsyev.f dsyevd.f dsyevr.f | |||||
dsyevx.f dsygs2.f dsygst.f dsygv.f dsygvd.f dsygvx.f dsyrfs.f | |||||
dsysv.f dsysvx.f | |||||
dsytd2.f dsytf2.f dsytrd.f dsytrf.f dsytri.f dsytri2.f dsytri2x.f | |||||
dsyswapr.f dsytrs.f dsytrs2.f dsyconv.f | |||||
dsytf2_rook.f dsytrf_rook.f dsytrs_rook.f | |||||
dsytri_rook.f dsycon_rook.f dsysv_rook.f | |||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f | |||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | |||||
dtptrs.f | |||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | |||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f | |||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | |||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | |||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | |||||
dgejsv.f dgesvj.f dgsvj0.f dgsvj1.f | |||||
dgeequb.f dsyequb.f dpoequb.f dgbequb.f | |||||
dbbcsd.f dlapmr.f dorbdb.f dorbdb1.f dorbdb2.f dorbdb3.f dorbdb4.f | |||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f | |||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f | |||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f | |||||
) | |||||
set(ZLASRC | |||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | |||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | |||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | |||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f | |||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f | |||||
zgetri.f | |||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f | |||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f | |||||
zggsvd.f zggsvp.f | |||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | |||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | |||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | |||||
zhegv.f zhegvd.f zhegvx.f zherfs.f zhesv.f zhesvx.f zhetd2.f | |||||
zhetf2.f zhetrd.f | |||||
zhetrf.f zhetri.f zhetri2.f zhetri2x.f zheswapr.f | |||||
zhetrs.f zhetrs2.f | |||||
zhetf2_rook.f zhetrf_rook.f zhetri_rook.f zhetrs_rook.f zhecon_rook.f zhesv_rook.f | |||||
zhgeqz.f zhpcon.f zhpev.f zhpevd.f | |||||
zhpevx.f zhpgst.f zhpgv.f zhpgvd.f zhpgvx.f zhprfs.f zhpsv.f | |||||
zhpsvx.f | |||||
zhptrd.f zhptrf.f zhptri.f zhptrs.f zhsein.f zhseqr.f zlabrd.f | |||||
zlacgv.f zlacon.f zlacn2.f zlacp2.f zlacpy.f zlacrm.f zlacrt.f zladiv.f | |||||
zlaed0.f zlaed7.f zlaed8.f | |||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | |||||
zlahef.f zlahef_rook.f zlahqr.f | |||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
zlangt.f zlanhb.f | |||||
zlanhe.f | |||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | |||||
zlantp.f zlantr.f zlapll.f zlapmt.f zlaqgb.f zlaqge.f | |||||
zlaqhb.f zlaqhe.f zlaqhp.f zlaqp2.f zlaqps.f zlaqsb.f | |||||
zlaqr0.f zlaqr1.f zlaqr2.f zlaqr3.f zlaqr4.f zlaqr5.f | |||||
zlaqsp.f zlaqsy.f zlar1v.f zlar2v.f ilazlr.f ilazlc.f | |||||
zlarcm.f zlarf.f zlarfb.f | |||||
zlarfg.f zlarft.f zlarfgp.f | |||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | |||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | |||||
zlassq.f zlasyf.f zlasyf_rook.f | |||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f | |||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | |||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | |||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f | |||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f | |||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f | |||||
zrot.f zspcon.f zsprfs.f zspsv.f | |||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f | |||||
zstegr.f zstein.f zsteqr.f | |||||
zsycon.f | |||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f | |||||
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f | |||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f | |||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f | |||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f | |||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | |||||
ztprfs.f ztptri.f | |||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | |||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f | |||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | |||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f | |||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | |||||
zunmtr.f zupgtr.f | |||||
zupmtr.f izmax1.f dzsum1.f zstemr.f | |||||
zcgesv.f zcposv.f zlag2c.f clag2z.f zlat2c.f | |||||
zhfrk.f ztfttp.f zlanhf.f zpftrf.f zpftri.f zpftrs.f ztfsm.f ztftri.f | |||||
ztfttr.f ztpttf.f ztpttr.f ztrttf.f ztrttp.f | |||||
zgeequb.f zgbequb.f zsyequb.f zpoequb.f zheequb.f | |||||
zbbcsd.f zlapmr.f zunbdb.f zunbdb1.f zunbdb2.f zunbdb3.f zunbdb4.f | |||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f | |||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f | |||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f | |||||
) | |||||
set(LA_REL_SRC ${ALLAUX}) | |||||
if (BUILD_SINGLE) | |||||
list(APPEND LA_REL_SRC ${SLASRC} ${DSLASRC} ${SCLAUX}) | |||||
endif () | |||||
if (BUILD_DOUBLE) | |||||
list(APPEND LA_REL_SRC ${DLASRC} ${DSLASRC} ${DZLAUX}) | |||||
endif () | |||||
if (BUILD_COMPLEX) | |||||
list(APPEND LA_REL_SRC ${CLASRC} ${ZCLASRC} ${SCLAUX}) | |||||
endif () | |||||
if (BUILD_COMPLEX16) | |||||
list(APPEND LA_REL_SRC ${ZLASRC} ${ZCLASRC} ${DZLAUX}) | |||||
endif () | |||||
# add lapack-netlib folder to the sources | |||||
set(LA_SOURCES "") | |||||
foreach (LA_FILE ${LA_REL_SRC}) | |||||
list(APPEND LA_SOURCES "${NETLIB_LAPACK_DIR}/SRC/${LA_FILE}") | |||||
endforeach () | |||||
set_source_files_properties(${LA_SOURCES} PROPERTIES COMPILE_FLAGS "${LAPACK_FFLAGS}") |
@@ -0,0 +1,104 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from portion of OpenBLAS/Makefile.system | |||||
## Detects the OS and sets appropriate variables. | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") | |||||
set(ENV{MACOSX_DEPLOYMENT_TARGET} "10.2") # TODO: should be exported as an env var | |||||
set(MD5SUM "md5 -r") | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD") | |||||
set(MD5SUM "md5 -r") | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "NetBSD") | |||||
set(MD5SUM "md5 -n") | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") | |||||
set(EXTRALIB "${EXTRALIB} -lm") | |||||
set(NO_EXPRECISION 1) | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "AIX") | |||||
set(EXTRALIB "${EXTRALIB} -lm") | |||||
endif () | |||||
# TODO: this is probably meant for mingw, not other windows compilers | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
set(NEED_PIC 0) | |||||
set(NO_EXPRECISION 1) | |||||
set(EXTRALIB "${EXTRALIB} -defaultlib:advapi32") | |||||
# probably not going to use these | |||||
set(SUFFIX "obj") | |||||
set(PSUFFIX "pobj") | |||||
set(LIBSUFFIX "a") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
# Test for supporting MS_ABI | |||||
# removed string parsing in favor of CMake's version comparison -hpa | |||||
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
# GCC Version >=4.7 | |||||
# It is compatible with MSVC ABI. | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMS_ABI") | |||||
endif () | |||||
endif () | |||||
# Ensure the correct stack alignment on Win32 | |||||
# http://permalink.gmane.org/gmane.comp.lib.openblas.general/97 | |||||
if (${ARCH} STREQUAL "x86") | |||||
if (NOT MSVC AND NOT ${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mincoming-stack-boundary=2") | |||||
endif () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -mincoming-stack-boundary=2") | |||||
endif () | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Interix") | |||||
set(NEED_PIC 0) | |||||
set(NO_EXPRECISION 1) | |||||
set(INTERIX_TOOL_DIR STREQUAL "/opt/gcc.3.3/i586-pc-interix3/bin") | |||||
endif () | |||||
if (CYGWIN) | |||||
set(NEED_PIC 0) | |||||
set(NO_EXPRECISION 1) | |||||
endif () | |||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows" AND NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Interix") | |||||
if (SMP) | |||||
set(EXTRALIB "${EXTRALIB} -lpthread") | |||||
endif () | |||||
endif () | |||||
if (QUAD_PRECISION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DQUAD_PRECISION") | |||||
set(NO_EXPRECISION 1) | |||||
endif () | |||||
if (${ARCH} STREQUAL "x86") | |||||
set(NO_EXPRECISION 1) | |||||
endif () | |||||
if (UTEST_CHECK) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") | |||||
set(SANITY_CHECK 1) | |||||
endif () | |||||
if (SANITY_CHECK) | |||||
# TODO: need some way to get $(*F) (target filename) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSANITY_CHECK -DREFNAME=$(*F)f${BU}") | |||||
endif () | |||||
@@ -0,0 +1,113 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from OpenBLAS/Makefile.prebuild | |||||
## This is triggered by system.cmake and runs before any of the code is built. | |||||
## Creates config.h and Makefile.conf by first running the c_check perl script (which creates those files). | |||||
## Next it runs f_check and appends some fortran information to the files. | |||||
## Finally it runs getarch and getarch_2nd for even more environment information. | |||||
# CMake vars set by this file: | |||||
# CORE | |||||
# LIBCORE | |||||
# NUM_CORES | |||||
# HAVE_MMX | |||||
# HAVE_SSE | |||||
# HAVE_SSE2 | |||||
# HAVE_SSE3 | |||||
# MAKE | |||||
# SGEMM_UNROLL_M | |||||
# SGEMM_UNROLL_N | |||||
# DGEMM_UNROLL_M | |||||
# DGEMM_UNROLL_M | |||||
# QGEMM_UNROLL_N | |||||
# QGEMM_UNROLL_N | |||||
# CGEMM_UNROLL_M | |||||
# CGEMM_UNROLL_M | |||||
# ZGEMM_UNROLL_N | |||||
# ZGEMM_UNROLL_N | |||||
# XGEMM_UNROLL_M | |||||
# XGEMM_UNROLL_N | |||||
# CGEMM3M_UNROLL_M | |||||
# CGEMM3M_UNROLL_N | |||||
# ZGEMM3M_UNROLL_M | |||||
# ZGEMM3M_UNROLL_M | |||||
# XGEMM3M_UNROLL_N | |||||
# XGEMM3M_UNROLL_N | |||||
# CPUIDEMU = ../../cpuid/table.o | |||||
if (DEFINED CPUIDEMU) | |||||
set(EXFLAGS "-DCPUIDEMU -DVENDOR=99") | |||||
endif () | |||||
if (DEFINED TARGET_CORE) | |||||
# set the C flags for just this file | |||||
set(GETARCH2_FLAGS "-DBUILD_KERNEL") | |||||
set(TARGET_MAKE "Makefile_kernel.conf") | |||||
set(TARGET_CONF "config_kernel.h") | |||||
else() | |||||
set(TARGET_MAKE "Makefile.conf") | |||||
set(TARGET_CONF "config.h") | |||||
endif () | |||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") | |||||
if (NOT NOFORTRAN) | |||||
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") | |||||
endif () | |||||
# compile getarch | |||||
set(GETARCH_SRC | |||||
${CMAKE_SOURCE_DIR}/getarch.c | |||||
${CPUIDEMO} | |||||
) | |||||
if (NOT MSVC) | |||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) | |||||
endif () | |||||
if (MSVC) | |||||
#Use generic for MSVC now | |||||
set(GETARCH_FLAGS ${GETARCH_FLAGS} -DFORCE_GENERIC) | |||||
endif() | |||||
set(GETARCH_DIR "${PROJECT_BINARY_DIR}/getarch_build") | |||||
set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | |||||
file(MAKE_DIRECTORY ${GETARCH_DIR}) | |||||
try_compile(GETARCH_RESULT ${GETARCH_DIR} | |||||
SOURCES ${GETARCH_SRC} | |||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
OUTPUT_VARIABLE GETARCH_LOG | |||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | |||||
) | |||||
message(STATUS "Running getarch") | |||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way | |||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 0 OUTPUT_VARIABLE GETARCH_MAKE_OUT) | |||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH_BIN} 1 OUTPUT_VARIABLE GETARCH_CONF_OUT) | |||||
message(STATUS "GETARCH results:\n${GETARCH_MAKE_OUT}") | |||||
# append config data from getarch to the TARGET file and read in CMake vars | |||||
file(APPEND ${TARGET_CONF} ${GETARCH_CONF_OUT}) | |||||
ParseGetArchVars(${GETARCH_MAKE_OUT}) | |||||
set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") | |||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") | |||||
file(MAKE_DIRECTORY ${GETARCH2_DIR}) | |||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | |||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c | |||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
OUTPUT_VARIABLE GETARCH2_LOG | |||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | |||||
) | |||||
# use the cmake binary w/ the -E param to run a shell command in a cross-platform way | |||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 0 OUTPUT_VARIABLE GETARCH2_MAKE_OUT) | |||||
execute_process(COMMAND ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} 1 OUTPUT_VARIABLE GETARCH2_CONF_OUT) | |||||
# append config data from getarch_2nd to the TARGET file and read in CMake vars | |||||
file(APPEND ${TARGET_CONF} ${GETARCH2_CONF_OUT}) | |||||
ParseGetArchVars(${GETARCH2_MAKE_OUT}) | |||||
@@ -0,0 +1,552 @@ | |||||
## | |||||
## Author: Hank Anderson <hank@statease.com> | |||||
## Description: Ported from OpenBLAS/Makefile.system | |||||
## | |||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") | |||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa | |||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile | |||||
# TODO: Makefile.system sets HOSTCC = $(CC) here if not already set -hpa | |||||
# TARGET_CORE will override TARGET which is used in DYNAMIC_ARCH=1. | |||||
if (DEFINED TARGET_CORE) | |||||
set(TARGET ${TARGET_CORE}) | |||||
endif () | |||||
# Force fallbacks for 32bit | |||||
if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
message(STATUS "Compiling a ${BINARY}-bit binary.") | |||||
set(NO_AVX 1) | |||||
if (${TARGET} STREQUAL "HASWELL" OR ${TARGET} STREQUAL "SANDYBRIDGE") | |||||
set(TARGET "NEHALEM") | |||||
endif () | |||||
if (${TARGET} STREQUAL "BULLDOZER" OR ${TARGET} STREQUAL "PILEDRIVER") | |||||
set(TARGET "BARCELONA") | |||||
endif () | |||||
endif () | |||||
if (DEFINED TARGET) | |||||
message(STATUS "Targetting the ${TARGET} architecture.") | |||||
set(GETARCH_FLAGS "-DFORCE_${TARGET}") | |||||
endif () | |||||
if (INTERFACE64) | |||||
message(STATUS "Using 64-bit integers.") | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DUSE64BITINT") | |||||
endif () | |||||
if (NOT DEFINED GEMM_MULTITHREAD_THRESHOLD) | |||||
set(GEMM_MULTITHREAD_THRESHOLD 4) | |||||
endif () | |||||
message(STATUS "GEMM multithread threshold set to ${GEMM_MULTITHREAD_THRESHOLD}.") | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DGEMM_MULTITHREAD_THRESHOLD=${GEMM_MULTITHREAD_THRESHOLD}") | |||||
if (NO_AVX) | |||||
message(STATUS "Disabling Advanced Vector Extensions (AVX).") | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX") | |||||
endif () | |||||
if (NO_AVX2) | |||||
message(STATUS "Disabling Advanced Vector Extensions 2 (AVX2).") | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_AVX2") | |||||
endif () | |||||
if (CMAKE_BUILD_TYPE STREQUAL Debug) | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -g") | |||||
endif () | |||||
# TODO: let CMake handle this? -hpa | |||||
#if (${QUIET_MAKE}) | |||||
# set(MAKE "${MAKE} -s") | |||||
#endif() | |||||
if (NOT DEFINED NO_PARALLEL_MAKE) | |||||
set(NO_PARALLEL_MAKE 0) | |||||
endif () | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -DNO_PARALLEL_MAKE=${NO_PARALLEL_MAKE}") | |||||
if (CMAKE_CXX_COMPILER STREQUAL loongcc) | |||||
set(GETARCH_FLAGS "${GETARCH_FLAGS} -static") | |||||
endif () | |||||
#if don't use Fortran, it will only compile CBLAS. | |||||
if (ONLY_CBLAS) | |||||
set(NO_LAPACK 1) | |||||
else () | |||||
set(ONLY_CBLAS 0) | |||||
endif () | |||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") | |||||
if (NOT DEFINED NUM_THREADS) | |||||
set(NUM_THREADS ${NUM_CORES}) | |||||
endif () | |||||
if (${NUM_THREADS} EQUAL 1) | |||||
set(USE_THREAD 0) | |||||
endif () | |||||
if (DEFINED USE_THREAD) | |||||
if (NOT ${USE_THREAD}) | |||||
unset(SMP) | |||||
else () | |||||
set(SMP 1) | |||||
endif () | |||||
else () | |||||
# N.B. this is NUM_THREAD in Makefile.system which is probably a bug -hpa | |||||
if (${NUM_THREADS} EQUAL 1) | |||||
unset(SMP) | |||||
else () | |||||
set(SMP 1) | |||||
endif () | |||||
endif () | |||||
if (${SMP}) | |||||
message(STATUS "SMP enabled.") | |||||
endif () | |||||
if (NOT DEFINED NEED_PIC) | |||||
set(NEED_PIC 1) | |||||
endif () | |||||
# TODO: I think CMake should be handling all this stuff -hpa | |||||
unset(ARFLAGS) | |||||
set(CPP "${COMPILER} -E") | |||||
set(AR "${CROSS_SUFFIX}ar") | |||||
set(AS "${CROSS_SUFFIX}as") | |||||
set(LD "${CROSS_SUFFIX}ld") | |||||
set(RANLIB "${CROSS_SUFFIX}ranlib") | |||||
set(NM "${CROSS_SUFFIX}nm") | |||||
set(DLLWRAP "${CROSS_SUFFIX}dllwrap") | |||||
set(OBJCOPY "${CROSS_SUFFIX}objcopy") | |||||
set(OBJCONV "${CROSS_SUFFIX}objconv") | |||||
# OS dependent settings | |||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") | |||||
# Architecture dependent settings | |||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") | |||||
# C Compiler dependent settings | |||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") | |||||
if (NOT NOFORTRAN) | |||||
# Fortran Compiler dependent settings | |||||
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") | |||||
endif () | |||||
if (BINARY64) | |||||
if (INTERFACE64) | |||||
# CCOMMON_OPT += -DUSE64BITINT | |||||
endif () | |||||
endif () | |||||
if (NEED_PIC) | |||||
if (${CMAKE_C_COMPILER} STREQUAL "IBM") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -qpic=large") | |||||
else () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fPIC") | |||||
endif () | |||||
if (${F_COMPILER} STREQUAL "SUN") | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -pic") | |||||
else () | |||||
set(FCOMMON_OPT "${FCOMMON_OPT} -fPIC") | |||||
endif () | |||||
endif () | |||||
if (DYNAMIC_ARCH) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_ARCH") | |||||
endif () | |||||
if (NO_LAPACK) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACK") | |||||
#Disable LAPACK C interface | |||||
set(NO_LAPACKE 1) | |||||
endif () | |||||
if (NO_LAPACKE) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_LAPACKE") | |||||
endif () | |||||
if (NO_AVX) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") | |||||
endif () | |||||
if (${ARCH} STREQUAL "x86") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX") | |||||
endif () | |||||
if (NO_AVX2) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX2") | |||||
endif () | |||||
if (SMP) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DSMP_SERVER") | |||||
if (${ARCH} STREQUAL "mips64") | |||||
if (NOT ${CORE} STREQUAL "LOONGSON3B") | |||||
set(USE_SIMPLE_THREADED_LEVEL3 1) | |||||
endif () | |||||
endif () | |||||
if (USE_OPENMP) | |||||
# USE_SIMPLE_THREADED_LEVEL3 = 1 | |||||
# NO_AFFINITY = 1 | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_OPENMP") | |||||
endif () | |||||
if (BIGNUMA) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DBIGNUMA") | |||||
endif () | |||||
endif () | |||||
if (NO_WARMUP) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_WARMUP") | |||||
endif () | |||||
if (CONSISTENT_FPCSR) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DCONSISTENT_FPCSR") | |||||
endif () | |||||
# Only for development | |||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPARAMTEST") | |||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DPREFETCHTEST") | |||||
# set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_SWITCHING") | |||||
# set(USE_PAPI 1) | |||||
if (USE_PAPI) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_PAPI") | |||||
set(EXTRALIB "${EXTRALIB} -lpapi -lperfctr") | |||||
endif () | |||||
if (DYNAMIC_THREADS) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DDYNAMIC_THREADS") | |||||
endif () | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMAX_CPU_NUMBER=${NUM_THREADS}") | |||||
if (USE_SIMPLE_THREADED_LEVEL3) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DUSE_SIMPLE_THREADED_LEVEL3") | |||||
endif () | |||||
if (DEFINED LIBNAMESUFFIX) | |||||
set(LIBPREFIX "libopenblas_${LIBNAMESUFFIX}") | |||||
else () | |||||
set(LIBPREFIX "libopenblas") | |||||
endif () | |||||
if (NOT DEFINED SYMBOLPREFIX) | |||||
set(SYMBOLPREFIX "") | |||||
endif () | |||||
if (NOT DEFINED SYMBOLSUFFIX) | |||||
set(SYMBOLSUFFIX "") | |||||
endif () | |||||
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") | |||||
# TODO: nead to convert these Makefiles | |||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake | |||||
if (${CORE} STREQUAL "PPC440") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") | |||||
endif () | |||||
if (${CORE} STREQUAL "PPC440FP2") | |||||
set(STATIC_ALLOCATION 1) | |||||
endif () | |||||
if (NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") | |||||
set(NO_AFFINITY 1) | |||||
endif () | |||||
if (NOT ${ARCH} STREQUAL "x86_64" AND NOT ${ARCH} STREQUAL "x86" AND NOT ${CORE} STREQUAL "LOONGSON3B") | |||||
set(NO_AFFINITY 1) | |||||
endif () | |||||
if (NO_AFFINITY) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DNO_AFFINITY") | |||||
endif () | |||||
if (FUNCTION_PROFILE) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DFUNCTION_PROFILE") | |||||
endif () | |||||
if (HUGETLB_ALLOCATION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLB") | |||||
endif () | |||||
if (DEFINED HUGETLBFILE_ALLOCATION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_HUGETLBFILE -DHUGETLB_FILE_NAME=${HUGETLBFILE_ALLOCATION})") | |||||
endif () | |||||
if (STATIC_ALLOCATION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_STATIC") | |||||
endif () | |||||
if (DEVICEDRIVER_ALLOCATION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_DEVICEDRIVER -DDEVICEDRIVER_NAME=\"/dev/mapper\"") | |||||
endif () | |||||
if (MIXED_MEMORY_ALLOCATION) | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DMIXED_MEMORY_ALLOCATION") | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "SunOS") | |||||
set(TAR gtar) | |||||
set(PATCH gpatch) | |||||
set(GREP ggrep) | |||||
else () | |||||
set(TAR tar) | |||||
set(PATCH patch) | |||||
set(GREP grep) | |||||
endif () | |||||
if (NOT DEFINED MD5SUM) | |||||
set(MD5SUM md5sum) | |||||
endif () | |||||
set(AWK awk) | |||||
set(REVISION "-r${OpenBLAS_VERSION}") | |||||
set(MAJOR_VERSION ${OpenBLAS_MAJOR_VERSION}) | |||||
if (DEBUG) | |||||
set(COMMON_OPT "${COMMON_OPT} -g") | |||||
endif () | |||||
if (NOT DEFINED COMMON_OPT) | |||||
set(COMMON_OPT "-O2") | |||||
endif () | |||||
#For x86 32-bit | |||||
if (DEFINED BINARY AND BINARY EQUAL 32) | |||||
if (NOT MSVC) | |||||
set(COMMON_OPT "${COMMON_OPT} -m32") | |||||
endif() | |||||
endif() | |||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") | |||||
if(NOT MSVC) | |||||
set(CMAKE_ASM_FLAGS "${CMAKE_ASM_FLAGS} ${COMMON_OPT} ${CCOMMON_OPT}") | |||||
endif() | |||||
# TODO: not sure what PFLAGS is -hpa | |||||
set(PFLAGS "${PFLAGS} ${COMMON_OPT} ${CCOMMON_OPT} -I${TOPDIR} -DPROFILE ${COMMON_PROF}") | |||||
set(CMAKE_Fortran_FLAGS "${CMAKE_Fortran_FLAGS} ${COMMON_OPT} ${FCOMMON_OPT}") | |||||
# TODO: not sure what FPFLAGS is -hpa | |||||
set(FPFLAGS "${FPFLAGS} ${COMMON_OPT} ${FCOMMON_OPT} ${COMMON_PROF}") | |||||
#For LAPACK Fortran codes. | |||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} ${CMAKE_Fortran_FLAGS}") | |||||
set(LAPACK_FPFLAGS "${LAPACK_FPFLAGS} ${FPFLAGS}") | |||||
#Disable -fopenmp for LAPACK Fortran codes on Windows. | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
set(FILTER_FLAGS "-fopenmp;-mp;-openmp;-xopenmp=parralel") | |||||
foreach (FILTER_FLAG ${FILTER_FLAGS}) | |||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FFLAGS ${LAPACK_FFLAGS}) | |||||
string(REPLACE ${FILTER_FLAG} "" LAPACK_FPFLAGS ${LAPACK_FPFLAGS}) | |||||
endforeach () | |||||
endif () | |||||
if ("${F_COMPILER}" STREQUAL "GFORTRAN") | |||||
# lapack-netlib is rife with uninitialized warnings -hpa | |||||
set(LAPACK_FFLAGS "${LAPACK_FFLAGS} -Wno-maybe-uninitialized") | |||||
endif () | |||||
set(LAPACK_CFLAGS "${CMAKE_C_CFLAGS} -DHAVE_LAPACK_CONFIG_H") | |||||
if (INTERFACE64) | |||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_ILP64") | |||||
endif () | |||||
if (${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DOPENBLAS_OS_WINDOWS") | |||||
endif () | |||||
if (${CMAKE_C_COMPILER} STREQUAL "LSB" OR ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") | |||||
set(LAPACK_CFLAGS "${LAPACK_CFLAGS} -DLAPACK_COMPLEX_STRUCTURE") | |||||
endif () | |||||
if (NOT DEFINED SUFFIX) | |||||
set(SUFFIX o) | |||||
endif () | |||||
if (NOT DEFINED PSUFFIX) | |||||
set(PSUFFIX po) | |||||
endif () | |||||
if (NOT DEFINED LIBSUFFIX) | |||||
set(LIBSUFFIX a) | |||||
endif () | |||||
if (DYNAMIC_ARCH) | |||||
if (DEFINED SMP) | |||||
set(LIBNAME "${LIBPREFIX}p${REVISION}.${LIBSUFFIX}") | |||||
set(LIBNAME_P "${LIBPREFIX}p${REVISION}_p.${LIBSUFFIX}") | |||||
else () | |||||
set(LIBNAME "${LIBPREFIX}${REVISION}.${LIBSUFFIX}") | |||||
set(LIBNAME_P "${LIBPREFIX}${REVISION}_p.${LIBSUFFIX}") | |||||
endif () | |||||
else () | |||||
if (DEFINED SMP) | |||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}p${REVISION}.${LIBSUFFIX}") | |||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}p${REVISION}_p.${LIBSUFFIX}") | |||||
else () | |||||
set(LIBNAME "${LIBPREFIX}_${LIBCORE}${REVISION}.${LIBSUFFIX}") | |||||
set(LIBNAME_P "${LIBPREFIX}_${LIBCORE}${REVISION}_p.${LIBSUFFIX}") | |||||
endif () | |||||
endif () | |||||
set(LIBDLLNAME "${LIBPREFIX}.dll") | |||||
set(LIBSONAME "${LIBNAME}.${LIBSUFFIX}.so") | |||||
set(LIBDYNNAME "${LIBNAME}.${LIBSUFFIX}.dylib") | |||||
set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def") | |||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") | |||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") | |||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") | |||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") | |||||
set(LIB_COMPONENTS BLAS) | |||||
if (NOT NO_CBLAS) | |||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} CBLAS") | |||||
endif () | |||||
if (NOT NO_LAPACK) | |||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACK") | |||||
if (NOT NO_LAPACKE) | |||||
set(LIB_COMPONENTS "${LIB_COMPONENTS} LAPACKE") | |||||
endif () | |||||
endif () | |||||
if (ONLY_CBLAS) | |||||
set(LIB_COMPONENTS CBLAS) | |||||
endif () | |||||
# For GEMM3M | |||||
set(USE_GEMM3M 0) | |||||
if (DEFINED ARCH) | |||||
if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64" OR ${ARCH} STREQUAL "ia64" OR ${ARCH} STREQUAL "MIPS") | |||||
set(USE_GEMM3M 1) | |||||
endif () | |||||
if (${CORE} STREQUAL "generic") | |||||
set(USE_GEMM3M 0) | |||||
endif () | |||||
endif () | |||||
#export OSNAME | |||||
#export ARCH | |||||
#export CORE | |||||
#export LIBCORE | |||||
#export PGCPATH | |||||
#export CONFIG | |||||
#export CC | |||||
#export FC | |||||
#export BU | |||||
#export FU | |||||
#export NEED2UNDERSCORES | |||||
#export USE_THREAD | |||||
#export NUM_THREADS | |||||
#export NUM_CORES | |||||
#export SMP | |||||
#export MAKEFILE_RULE | |||||
#export NEED_PIC | |||||
#export BINARY | |||||
#export BINARY32 | |||||
#export BINARY64 | |||||
#export F_COMPILER | |||||
#export C_COMPILER | |||||
#export USE_OPENMP | |||||
#export CROSS | |||||
#export CROSS_SUFFIX | |||||
#export NOFORTRAN | |||||
#export NO_FBLAS | |||||
#export EXTRALIB | |||||
#export CEXTRALIB | |||||
#export FEXTRALIB | |||||
#export HAVE_SSE | |||||
#export HAVE_SSE2 | |||||
#export HAVE_SSE3 | |||||
#export HAVE_SSSE3 | |||||
#export HAVE_SSE4_1 | |||||
#export HAVE_SSE4_2 | |||||
#export HAVE_SSE4A | |||||
#export HAVE_SSE5 | |||||
#export HAVE_AVX | |||||
#export HAVE_VFP | |||||
#export HAVE_VFPV3 | |||||
#export HAVE_VFPV4 | |||||
#export HAVE_NEON | |||||
#export KERNELDIR | |||||
#export FUNCTION_PROFILE | |||||
#export TARGET_CORE | |||||
# | |||||
#export SGEMM_UNROLL_M | |||||
#export SGEMM_UNROLL_N | |||||
#export DGEMM_UNROLL_M | |||||
#export DGEMM_UNROLL_N | |||||
#export QGEMM_UNROLL_M | |||||
#export QGEMM_UNROLL_N | |||||
#export CGEMM_UNROLL_M | |||||
#export CGEMM_UNROLL_N | |||||
#export ZGEMM_UNROLL_M | |||||
#export ZGEMM_UNROLL_N | |||||
#export XGEMM_UNROLL_M | |||||
#export XGEMM_UNROLL_N | |||||
#export CGEMM3M_UNROLL_M | |||||
#export CGEMM3M_UNROLL_N | |||||
#export ZGEMM3M_UNROLL_M | |||||
#export ZGEMM3M_UNROLL_N | |||||
#export XGEMM3M_UNROLL_M | |||||
#export XGEMM3M_UNROLL_N | |||||
#if (USE_CUDA) | |||||
# export CUDADIR | |||||
# export CUCC | |||||
# export CUFLAGS | |||||
# export CULIB | |||||
#endif | |||||
#.SUFFIXES: .$(PSUFFIX) .$(SUFFIX) .f | |||||
# | |||||
#.f.$(SUFFIX): | |||||
# $(FC) $(FFLAGS) -c $< -o $(@F) | |||||
# | |||||
#.f.$(PSUFFIX): | |||||
# $(FC) $(FPFLAGS) -pg -c $< -o $(@F) | |||||
# these are not cross-platform | |||||
#ifdef BINARY64 | |||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1 | |||||
#PGIPATH = /opt/pgi/linux86-64/7.1-5/lib | |||||
#else | |||||
#PATHSCALEPATH = /opt/pathscale/lib/3.1/32 | |||||
#PGIPATH = /opt/pgi/linux86/7.1-5/lib | |||||
#endif | |||||
#ACMLPATH = /opt/acml/4.3.0 | |||||
#ifneq ($(OSNAME), Darwin) | |||||
#MKLPATH = /opt/intel/mkl/10.2.2.025/lib | |||||
#else | |||||
#MKLPATH = /Library/Frameworks/Intel_MKL.framework/Versions/10.0.1.014/lib | |||||
#endif | |||||
#ATLASPATH = /opt/atlas/3.9.17/opteron | |||||
#FLAMEPATH = $(HOME)/flame/lib | |||||
#ifneq ($(OSNAME), SunOS) | |||||
#SUNPATH = /opt/sunstudio12.1 | |||||
#else | |||||
#SUNPATH = /opt/SUNWspro | |||||
#endif | |||||
@@ -0,0 +1,346 @@ | |||||
# Functions to help with the OpenBLAS build | |||||
# Reads string from getarch into CMake vars. Format of getarch vars is VARNAME=VALUE | |||||
function(ParseGetArchVars GETARCH_IN) | |||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+=[0-9_a-zA-Z]+" GETARCH_RESULT_LIST "${GETARCH_IN}") | |||||
foreach (GETARCH_LINE ${GETARCH_RESULT_LIST}) | |||||
# split the line into var and value, then assign the value to a CMake var | |||||
string(REGEX MATCHALL "[0-9_a-zA-Z]+" SPLIT_VAR "${GETARCH_LINE}") | |||||
list(GET SPLIT_VAR 0 VAR_NAME) | |||||
list(GET SPLIT_VAR 1 VAR_VALUE) | |||||
set(${VAR_NAME} ${VAR_VALUE} PARENT_SCOPE) | |||||
endforeach () | |||||
endfunction () | |||||
# Reads a Makefile into CMake vars. | |||||
macro(ParseMakefileVars MAKEFILE_IN) | |||||
message(STATUS "Reading vars from ${MAKEFILE_IN}...") | |||||
file(STRINGS ${MAKEFILE_IN} makefile_contents) | |||||
foreach (makefile_line ${makefile_contents}) | |||||
string(REGEX MATCH "([0-9_a-zA-Z]+)[ \t]*=[ \t]*(.+)$" line_match "${makefile_line}") | |||||
if (NOT "${line_match}" STREQUAL "") | |||||
set(var_name ${CMAKE_MATCH_1}) | |||||
set(var_value ${CMAKE_MATCH_2}) | |||||
# check for Makefile variables in the string, e.g. $(TSUFFIX) | |||||
string(REGEX MATCHALL "\\$\\(([0-9_a-zA-Z]+)\\)" make_var_matches ${var_value}) | |||||
foreach (make_var ${make_var_matches}) | |||||
# strip out Makefile $() markup | |||||
string(REGEX REPLACE "\\$\\(([0-9_a-zA-Z]+)\\)" "\\1" make_var ${make_var}) | |||||
# now replace the instance of the Makefile variable with the value of the CMake variable (note the double quote) | |||||
string(REPLACE "$(${make_var})" "${${make_var}}" var_value ${var_value}) | |||||
endforeach () | |||||
set(${var_name} ${var_value}) | |||||
else () | |||||
string(REGEX MATCH "include \\$\\(KERNELDIR\\)/(.+)$" line_match "${makefile_line}") | |||||
if (NOT "${line_match}" STREQUAL "") | |||||
ParseMakefileVars(${KERNELDIR}/${CMAKE_MATCH_1}) | |||||
endif () | |||||
endif () | |||||
endforeach () | |||||
endmacro () | |||||
# Returns all combinations of the input list, as a list with colon-separated combinations | |||||
# E.g. input of A B C returns A B C A:B A:C B:C | |||||
# N.B. The input is meant to be a list, and to past a list to a function in CMake you must quote it (e.g. AllCombinations("${LIST_VAR}")). | |||||
# #param absent_codes codes to use when an element is absent from a combination. For example, if you have TRANS;UNIT;UPPER you may want the code to be NNL when nothing is present. | |||||
# @returns LIST_OUT a list of combinations | |||||
# CODES_OUT a list of codes corresponding to each combination, with N meaning the item is not present, and the first letter of the list item meaning it is presen | |||||
function(AllCombinations list_in absent_codes_in) | |||||
list(LENGTH list_in list_count) | |||||
set(num_combos 1) | |||||
# subtract 1 since we will iterate from 0 to num_combos | |||||
math(EXPR num_combos "(${num_combos} << ${list_count}) - 1") | |||||
set(LIST_OUT "") | |||||
set(CODES_OUT "") | |||||
foreach (c RANGE 0 ${num_combos}) | |||||
set(current_combo "") | |||||
set(current_code "") | |||||
# this is a little ridiculous just to iterate through a list w/ indices | |||||
math(EXPR last_list_index "${list_count} - 1") | |||||
foreach (list_index RANGE 0 ${last_list_index}) | |||||
math(EXPR bit "1 << ${list_index}") | |||||
math(EXPR combo_has_bit "${c} & ${bit}") | |||||
list(GET list_in ${list_index} list_elem) | |||||
if (combo_has_bit) | |||||
if (current_combo) | |||||
set(current_combo "${current_combo}:${list_elem}") | |||||
else () | |||||
set(current_combo ${list_elem}) | |||||
endif () | |||||
string(SUBSTRING ${list_elem} 0 1 code_char) | |||||
else () | |||||
list(GET absent_codes_in ${list_index} code_char) | |||||
endif () | |||||
set(current_code "${current_code}${code_char}") | |||||
endforeach () | |||||
if (current_combo STREQUAL "") | |||||
list(APPEND LIST_OUT " ") # Empty set is a valid combination, but CMake isn't appending the empty string for some reason, use a space | |||||
else () | |||||
list(APPEND LIST_OUT ${current_combo}) | |||||
endif () | |||||
list(APPEND CODES_OUT ${current_code}) | |||||
endforeach () | |||||
set(LIST_OUT ${LIST_OUT} PARENT_SCOPE) | |||||
set(CODES_OUT ${CODES_OUT} PARENT_SCOPE) | |||||
endfunction () | |||||
# generates object files for each of the sources, using the BLAS naming scheme to pass the funciton name as a preprocessor definition | |||||
# @param sources_in the source files to build from | |||||
# @param defines_in (optional) preprocessor definitions that will be applied to all objects | |||||
# @param name_in (optional) if this is set this name will be used instead of the filename. Use a * to indicate where the float character should go, if no star the character will be prepended. | |||||
# e.g. with DOUBLE set, "i*max" will generate the name "idmax", and "max" will be "dmax" | |||||
# @param replace_last_with replaces the last character in the filename with this string (e.g. symm_k should be symm_TU) | |||||
# @param append_with appends the filename with this string (e.g. trmm_R should be trmm_RTUU or some other combination of characters) | |||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | |||||
# @param complex_filename_scheme some routines have separate source files for complex and non-complex float types. | |||||
# 0 - compiles for all types | |||||
# 1 - compiles the sources for non-complex types only (SINGLE/DOUBLE) | |||||
# 2 - compiles for complex types only (COMPLEX/DOUBLE COMPLEX) | |||||
# 3 - compiles for all types, but changes source names for complex by prepending z (e.g. axpy.c becomes zaxpy.c) | |||||
# 4 - compiles for complex types only, but changes source names for complex by prepending z (e.g. hemv.c becomes zhemv.c) | |||||
# STRING - compiles only the given type (e.g. DOUBLE) | |||||
function(GenerateNamedObjects sources_in) | |||||
if (DEFINED ARGV1) | |||||
set(defines_in ${ARGV1}) | |||||
endif () | |||||
if (DEFINED ARGV2 AND NOT "${ARGV2}" STREQUAL "") | |||||
set(name_in ${ARGV2}) | |||||
# strip off extension for kernel files that pass in the object name. | |||||
get_filename_component(name_in ${name_in} NAME_WE) | |||||
endif () | |||||
if (DEFINED ARGV3) | |||||
set(use_cblas ${ARGV3}) | |||||
else () | |||||
set(use_cblas false) | |||||
endif () | |||||
if (DEFINED ARGV4) | |||||
set(replace_last_with ${ARGV4}) | |||||
endif () | |||||
if (DEFINED ARGV5) | |||||
set(append_with ${ARGV5}) | |||||
endif () | |||||
if (DEFINED ARGV6) | |||||
set(no_float_type ${ARGV6}) | |||||
else () | |||||
set(no_float_type false) | |||||
endif () | |||||
if (no_float_type) | |||||
set(float_list "DUMMY") # still need to loop once | |||||
else () | |||||
set(float_list "${FLOAT_TYPES}") | |||||
endif () | |||||
set(real_only false) | |||||
set(complex_only false) | |||||
set(mangle_complex_sources false) | |||||
if (DEFINED ARGV7 AND NOT "${ARGV7}" STREQUAL "") | |||||
if (${ARGV7} EQUAL 1) | |||||
set(real_only true) | |||||
elseif (${ARGV7} EQUAL 2) | |||||
set(complex_only true) | |||||
elseif (${ARGV7} EQUAL 3) | |||||
set(mangle_complex_sources true) | |||||
elseif (${ARGV7} EQUAL 4) | |||||
set(mangle_complex_sources true) | |||||
set(complex_only true) | |||||
elseif (NOT ${ARGV7} EQUAL 0) | |||||
set(float_list ${ARGV7}) | |||||
endif () | |||||
endif () | |||||
if (complex_only) | |||||
list(REMOVE_ITEM float_list "SINGLE") | |||||
list(REMOVE_ITEM float_list "DOUBLE") | |||||
elseif (real_only) | |||||
list(REMOVE_ITEM float_list "COMPLEX") | |||||
list(REMOVE_ITEM float_list "ZCOMPLEX") | |||||
endif () | |||||
set(float_char "") | |||||
set(OBJ_LIST_OUT "") | |||||
foreach (float_type ${float_list}) | |||||
foreach (source_file ${sources_in}) | |||||
if (NOT no_float_type) | |||||
string(SUBSTRING ${float_type} 0 1 float_char) | |||||
string(TOLOWER ${float_char} float_char) | |||||
endif () | |||||
if (NOT name_in) | |||||
get_filename_component(source_name ${source_file} NAME_WE) | |||||
set(obj_name "${float_char}${source_name}") | |||||
else () | |||||
# replace * with float_char | |||||
if (${name_in} MATCHES "\\*") | |||||
string(REPLACE "*" ${float_char} obj_name ${name_in}) | |||||
else () | |||||
set(obj_name "${float_char}${name_in}") | |||||
endif () | |||||
endif () | |||||
if (replace_last_with) | |||||
string(REGEX REPLACE ".$" ${replace_last_with} obj_name ${obj_name}) | |||||
else () | |||||
set(obj_name "${obj_name}${append_with}") | |||||
endif () | |||||
# now add the object and set the defines | |||||
set(obj_defines ${defines_in}) | |||||
if (use_cblas) | |||||
set(obj_name "cblas_${obj_name}") | |||||
list(APPEND obj_defines "CBLAS") | |||||
endif () | |||||
list(APPEND obj_defines "ASMNAME=${FU}${obj_name};ASMFNAME=${FU}${obj_name}${BU};NAME=${obj_name}${BU};CNAME=${obj_name};CHAR_NAME=\"${obj_name}${BU}\";CHAR_CNAME=\"${obj_name}\"") | |||||
if (${float_type} STREQUAL "DOUBLE" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
list(APPEND obj_defines "DOUBLE") | |||||
endif () | |||||
if (${float_type} STREQUAL "COMPLEX" OR ${float_type} STREQUAL "ZCOMPLEX") | |||||
list(APPEND obj_defines "COMPLEX") | |||||
if (mangle_complex_sources) | |||||
# add a z to the filename | |||||
get_filename_component(source_name ${source_file} NAME) | |||||
get_filename_component(source_dir ${source_file} DIRECTORY) | |||||
string(REPLACE ${source_name} "z${source_name}" source_file ${source_file}) | |||||
endif () | |||||
endif () | |||||
if (VERBOSE_GEN) | |||||
message(STATUS "${obj_name}:${source_file}") | |||||
message(STATUS "${obj_defines}") | |||||
endif () | |||||
# create a copy of the source to avoid duplicate obj filename problem with ar.exe | |||||
get_filename_component(source_extension ${source_file} EXT) | |||||
set(new_source_file "${CMAKE_CURRENT_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/${obj_name}${source_extension}") | |||||
if (IS_ABSOLUTE ${source_file}) | |||||
set(old_source_file ${source_file}) | |||||
else () | |||||
set(old_source_file "${CMAKE_CURRENT_LIST_DIR}/${source_file}") | |||||
endif () | |||||
string(REPLACE ";" "\n#define " define_source "${obj_defines}") | |||||
string(REPLACE "=" " " define_source "${define_source}") | |||||
file(WRITE ${new_source_file} "#define ${define_source}\n#include \"${old_source_file}\"") | |||||
list(APPEND SRC_LIST_OUT ${new_source_file}) | |||||
endforeach () | |||||
endforeach () | |||||
list(APPEND OPENBLAS_SRC ${SRC_LIST_OUT}) | |||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) | |||||
endfunction () | |||||
# generates object files for each of the sources for each of the combinations of the preprocessor definitions passed in | |||||
# @param sources_in the source files to build from | |||||
# @param defines_in the preprocessor definitions that will be combined to create the object files | |||||
# @param all_defines_in (optional) preprocessor definitions that will be applied to all objects | |||||
# @param replace_scheme If 1, replace the "k" in the filename with the define combo letters. E.g. symm_k.c with TRANS and UNIT defined will be symm_TU. | |||||
# If 0, it will simply append the code, e.g. symm_L.c with TRANS and UNIT will be symm_LTU. | |||||
# If 2, it will append the code with an underscore, e.g. symm.c with TRANS and UNIT will be symm_TU. | |||||
# If 3, it will insert the code *around* the last character with an underscore, e.g. symm_L.c with TRANS and UNIT will be symm_TLU (required by BLAS level2 objects). | |||||
# If 4, it will insert the code before the last underscore. E.g. trtri_U_parallel with TRANS will be trtri_UT_parallel | |||||
# @param alternate_name replaces the source name as the object name (define codes are still appended) | |||||
# @param no_float_type turns off the float type define for this build (e.g. SINGLE/DOUBLE/etc) | |||||
# @param complex_filename_scheme see GenerateNamedObjects | |||||
function(GenerateCombinationObjects sources_in defines_in absent_codes_in all_defines_in replace_scheme) | |||||
set(alternate_name_in "") | |||||
if (DEFINED ARGV5) | |||||
set(alternate_name_in ${ARGV5}) | |||||
endif () | |||||
set(no_float_type false) | |||||
if (DEFINED ARGV6) | |||||
set(no_float_type ${ARGV6}) | |||||
endif () | |||||
set(complex_filename_scheme "") | |||||
if (DEFINED ARGV7) | |||||
set(complex_filename_scheme ${ARGV7}) | |||||
endif () | |||||
AllCombinations("${defines_in}" "${absent_codes_in}") | |||||
set(define_combos ${LIST_OUT}) | |||||
set(define_codes ${CODES_OUT}) | |||||
list(LENGTH define_combos num_combos) | |||||
math(EXPR num_combos "${num_combos} - 1") | |||||
foreach (c RANGE 0 ${num_combos}) | |||||
list(GET define_combos ${c} define_combo) | |||||
list(GET define_codes ${c} define_code) | |||||
foreach (source_file ${sources_in}) | |||||
set(alternate_name ${alternate_name_in}) | |||||
# replace colon separated list with semicolons, this turns it into a CMake list that we can use foreach with | |||||
string(REPLACE ":" ";" define_combo ${define_combo}) | |||||
# now add the object and set the defines | |||||
set(cur_defines ${define_combo}) | |||||
if ("${cur_defines}" STREQUAL " ") | |||||
set(cur_defines ${all_defines_in}) | |||||
else () | |||||
list(APPEND cur_defines ${all_defines_in}) | |||||
endif () | |||||
set(replace_code "") | |||||
set(append_code "") | |||||
if (replace_scheme EQUAL 1) | |||||
set(replace_code ${define_code}) | |||||
else () | |||||
if (replace_scheme EQUAL 2) | |||||
set(append_code "_${define_code}") | |||||
elseif (replace_scheme EQUAL 3) | |||||
if ("${alternate_name}" STREQUAL "") | |||||
string(REGEX MATCH "[a-zA-Z]\\." last_letter ${source_file}) | |||||
else () | |||||
string(REGEX MATCH "[a-zA-Z]$" last_letter ${alternate_name}) | |||||
endif () | |||||
# first extract the last letter | |||||
string(SUBSTRING ${last_letter} 0 1 last_letter) # remove period from match | |||||
# break the code up into the first letter and the remaining (should only be 2 anyway) | |||||
string(SUBSTRING ${define_code} 0 1 define_code_first) | |||||
string(SUBSTRING ${define_code} 1 -1 define_code_second) | |||||
set(replace_code "${define_code_first}${last_letter}${define_code_second}") | |||||
elseif (replace_scheme EQUAL 4) | |||||
# insert code before the last underscore and pass that in as the alternate_name | |||||
if ("${alternate_name}" STREQUAL "") | |||||
get_filename_component(alternate_name ${source_file} NAME_WE) | |||||
endif () | |||||
set(extra_underscore "") | |||||
# check if filename has two underscores, insert another if not (e.g. getrs_parallel needs to become getrs_U_parallel not getrsU_parallel) | |||||
string(REGEX MATCH "_[a-zA-Z]+_" underscores ${alternate_name}) | |||||
string(LENGTH "${underscores}" underscores) | |||||
if (underscores EQUAL 0) | |||||
set(extra_underscore "_") | |||||
endif () | |||||
string(REGEX REPLACE "(.+)(_[^_]+)$" "\\1${extra_underscore}${define_code}\\2" alternate_name ${alternate_name}) | |||||
else() | |||||
set(append_code ${define_code}) # replace_scheme should be 0 | |||||
endif () | |||||
endif () | |||||
GenerateNamedObjects("${source_file}" "${cur_defines}" "${alternate_name}" false "${replace_code}" "${append_code}" "${no_float_type}" "${complex_filename_scheme}") | |||||
endforeach () | |||||
endforeach () | |||||
set(OPENBLAS_SRC ${OPENBLAS_SRC} PARENT_SCOPE) | |||||
endfunction () | |||||
@@ -82,7 +82,10 @@ extern "C" { | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <string.h> | #include <string.h> | ||||
#if !defined(_MSC_VER) | |||||
#include <unistd.h> | #include <unistd.h> | ||||
#endif | |||||
#ifdef OS_LINUX | #ifdef OS_LINUX | ||||
#include <malloc.h> | #include <malloc.h> | ||||
@@ -93,6 +96,14 @@ extern "C" { | |||||
#include <sched.h> | #include <sched.h> | ||||
#endif | #endif | ||||
#ifdef OS_ANDROID | |||||
#define NO_SYSV_IPC | |||||
//Android NDK only supports complex.h since Android 5.0 | |||||
#if __ANDROID_API__ < 21 | |||||
#define FORCE_OPENBLAS_COMPLEX_STRUCT | |||||
#endif | |||||
#endif | |||||
#ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
#ifdef ATOM | #ifdef ATOM | ||||
#define GOTO_ATOM ATOM | #define GOTO_ATOM ATOM | ||||
@@ -106,8 +117,11 @@ extern "C" { | |||||
#endif | #endif | ||||
#else | #else | ||||
#include <sys/mman.h> | #include <sys/mman.h> | ||||
#ifndef NO_SYSV_IPC | |||||
#include <sys/shm.h> | #include <sys/shm.h> | ||||
#endif | |||||
#include <sys/time.h> | #include <sys/time.h> | ||||
#include <time.h> | |||||
#include <unistd.h> | #include <unistd.h> | ||||
#include <math.h> | #include <math.h> | ||||
#ifdef SMP | #ifdef SMP | ||||
@@ -287,13 +301,6 @@ typedef int blasint; | |||||
#define COMPSIZE 2 | #define COMPSIZE 2 | ||||
#endif | #endif | ||||
#if defined(C_PGI) || defined(C_SUN) | |||||
#define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
#define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
#else | |||||
#define CREAL __real__ | |||||
#define CIMAG __imag__ | |||||
#endif | |||||
#define Address_H(x) (((x)+(1<<15))>>16) | #define Address_H(x) (((x)+(1<<15))>>16) | ||||
#define Address_L(x) ((x)-((Address_H(x))<<16)) | #define Address_L(x) ((x)-((Address_H(x))<<16)) | ||||
@@ -307,8 +314,12 @@ typedef int blasint; | |||||
#endif | #endif | ||||
#if defined(OS_WINDOWS) | #if defined(OS_WINDOWS) | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
#define YIELDING YieldProcessor() | |||||
#else | |||||
#define YIELDING SwitchToThread() | #define YIELDING SwitchToThread() | ||||
#endif | #endif | ||||
#endif | |||||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) | #if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) || defined(ARMV5) | ||||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); | #define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); | ||||
@@ -404,7 +415,51 @@ typedef char env_var_t[MAX_PATH]; | |||||
typedef char* env_var_t; | typedef char* env_var_t; | ||||
#define readenv(p, n) ((p)=getenv(n)) | #define readenv(p, n) ((p)=getenv(n)) | ||||
#endif | #endif | ||||
#if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS) | |||||
#ifdef _POSIX_MONOTONIC_CLOCK | |||||
#if defined(__GLIBC_PREREQ) // cut the if condition if two lines, otherwise will fail at __GLIBC_PREREQ(2, 17) | |||||
#if __GLIBC_PREREQ(2, 17) // don't require -lrt | |||||
#define USE_MONOTONIC | |||||
#endif | |||||
#elif defined(OS_ANDROID) | |||||
#define USE_MONOTONIC | |||||
#endif | |||||
#endif | #endif | ||||
/* use similar scale as x86 rdtsc for timeouts to work correctly */ | |||||
static inline unsigned long long rpcc(void){ | |||||
#ifdef USE_MONOTONIC | |||||
struct timespec ts; | |||||
clock_gettime(CLOCK_MONOTONIC, &ts); | |||||
return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec; | |||||
#else | |||||
struct timeval tv; | |||||
gettimeofday(&tv,NULL); | |||||
return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000; | |||||
#endif | |||||
} | |||||
#define RPCC_DEFINED | |||||
#define RPCC64BIT | |||||
#endif // !RPCC_DEFINED | |||||
#if !defined(BLAS_LOCK_DEFINED) && defined(__GNUC__) | |||||
static void __inline blas_lock(volatile BLASULONG *address){ | |||||
do { | |||||
while (*address) {YIELDING;}; | |||||
} while (!__sync_bool_compare_and_swap(address, 0, 1)); | |||||
} | |||||
#define BLAS_LOCK_DEFINED | |||||
#endif | |||||
#ifndef RPCC_DEFINED | |||||
#error "rpcc() implementation is missing for your platform" | |||||
#endif | |||||
#ifndef BLAS_LOCK_DEFINED | |||||
#error "blas_lock() implementation is missing for your platform" | |||||
#endif | |||||
#endif // !ASSEMBLER | |||||
#ifdef OS_LINUX | #ifdef OS_LINUX | ||||
#include "common_linux.h" | #include "common_linux.h" | ||||
@@ -450,18 +505,52 @@ typedef char* env_var_t; | |||||
/* C99 supports complex floating numbers natively, which GCC also offers as an | /* C99 supports complex floating numbers natively, which GCC also offers as an | ||||
extension since version 3.0. If neither are available, use a compatible | extension since version 3.0. If neither are available, use a compatible | ||||
structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | structure as fallback (see Clause 6.2.5.13 of the C99 standard). */ | ||||
#if (defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | |||||
(__GNUC__ >= 3 && !defined(__cplusplus))) | |||||
#if ((defined(__STDC_IEC_559_COMPLEX__) || __STDC_VERSION__ >= 199901L || \ | |||||
(__GNUC__ >= 3 && !defined(__cplusplus))) && !(defined(FORCE_OPENBLAS_COMPLEX_STRUCT))) | |||||
#define OPENBLAS_COMPLEX_C99 | #define OPENBLAS_COMPLEX_C99 | ||||
#ifndef __cplusplus | |||||
#include <complex.h> | |||||
#endif | |||||
typedef float _Complex openblas_complex_float; | typedef float _Complex openblas_complex_float; | ||||
typedef double _Complex openblas_complex_double; | typedef double _Complex openblas_complex_double; | ||||
typedef xdouble _Complex openblas_complex_xdouble; | typedef xdouble _Complex openblas_complex_xdouble; | ||||
#define openblas_make_complex_float(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#define openblas_make_complex_double(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#define openblas_make_complex_xdouble(real, imag) ((real) + ((imag) * _Complex_I)) | |||||
#else | #else | ||||
#define OPENBLAS_COMPLEX_STRUCT | #define OPENBLAS_COMPLEX_STRUCT | ||||
typedef struct { float real, imag; } openblas_complex_float; | typedef struct { float real, imag; } openblas_complex_float; | ||||
typedef struct { double real, imag; } openblas_complex_double; | typedef struct { double real, imag; } openblas_complex_double; | ||||
typedef struct { xdouble real, imag; } openblas_complex_xdouble; | typedef struct { xdouble real, imag; } openblas_complex_xdouble; | ||||
#define openblas_make_complex_float(real, imag) {(real), (imag)} | |||||
#define openblas_make_complex_double(real, imag) {(real), (imag)} | |||||
#define openblas_make_complex_xdouble(real, imag) {(real), (imag)} | |||||
#endif | |||||
#ifdef XDOUBLE | |||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_xdouble | |||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_xdouble(r,i) | |||||
#elif defined(DOUBLE) | |||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_double | |||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_double(r,i) | |||||
#else | |||||
#define OPENBLAS_COMPLEX_FLOAT openblas_complex_float | |||||
#define OPENBLAS_MAKE_COMPLEX_FLOAT(r,i) openblas_make_complex_float(r,i) | |||||
#endif | #endif | ||||
#if defined(C_PGI) || defined(C_SUN) | |||||
#define CREAL(X) (*((FLOAT *)&X + 0)) | |||||
#define CIMAG(X) (*((FLOAT *)&X + 1)) | |||||
#else | |||||
#ifdef OPENBLAS_COMPLEX_STRUCT | |||||
#define CREAL(Z) ((Z).real) | |||||
#define CIMAG(Z) ((Z).imag) | |||||
#else | |||||
#define CREAL __real__ | |||||
#define CIMAG __imag__ | |||||
#endif | |||||
#endif | |||||
#endif // ASSEMBLER | #endif // ASSEMBLER | ||||
#ifndef IFLUSH | #ifndef IFLUSH | ||||
@@ -478,6 +567,10 @@ typedef char* env_var_t; | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#if defined(C_MSVC) | |||||
#define inline __inline | |||||
#endif | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#ifndef MIN | #ifndef MIN | ||||
@@ -499,6 +592,8 @@ void blas_set_parameter(void); | |||||
int blas_get_cpu_number(void); | int blas_get_cpu_number(void); | ||||
void *blas_memory_alloc (int); | void *blas_memory_alloc (int); | ||||
void blas_memory_free (void *); | void blas_memory_free (void *); | ||||
void *blas_memory_alloc_nolock (int); //use malloc without blas_lock | |||||
void blas_memory_free_nolock (void *); | |||||
int get_num_procs (void); | int get_num_procs (void); | ||||
@@ -76,6 +76,7 @@ static void __inline blas_lock(unsigned long *address){ | |||||
"30:", address); | "30:", address); | ||||
#endif | #endif | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline unsigned int rpcc(void){ | static __inline unsigned int rpcc(void){ | ||||
@@ -89,6 +90,7 @@ static __inline unsigned int rpcc(void){ | |||||
return r0; | return r0; | ||||
} | } | ||||
#define RPCC_DEFINED | |||||
#define HALT ldq $0, 0($0) | #define HALT ldq $0, 0($0) | ||||
@@ -1,5 +1,5 @@ | |||||
/***************************************************************************** | /***************************************************************************** | ||||
Copyright (c) 2011-2014, The OpenBLAS Project | |||||
Copyright (c) 2011-2015, The OpenBLAS Project | |||||
All rights reserved. | All rights reserved. | ||||
Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
@@ -30,56 +30,29 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
**********************************************************************************/ | **********************************************************************************/ | ||||
/*********************************************************************/ | |||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
/* All rights reserved. */ | |||||
/* */ | |||||
/* Redistribution and use in source and binary forms, with or */ | |||||
/* without modification, are permitted provided that the following */ | |||||
/* conditions are met: */ | |||||
/* */ | |||||
/* 1. Redistributions of source code must retain the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer. */ | |||||
/* */ | |||||
/* 2. Redistributions in binary form must reproduce the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer in the documentation and/or other materials */ | |||||
/* provided with the distribution. */ | |||||
/* */ | |||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||||
/* */ | |||||
/* The views and conclusions contained in the software and */ | |||||
/* documentation are those of the authors and should not be */ | |||||
/* interpreted as representing official policies, either expressed */ | |||||
/* or implied, of The University of Texas at Austin. */ | |||||
/*********************************************************************/ | |||||
#ifndef COMMON_ARM | #ifndef COMMON_ARM | ||||
#define COMMON_ARM | #define COMMON_ARM | ||||
#if defined(ARMV5) || defined(ARMV6) | |||||
#define MB | #define MB | ||||
#define WMB | #define WMB | ||||
#else | |||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory") | |||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory") | |||||
#endif | |||||
#define INLINE inline | #define INLINE inline | ||||
#define RETURN_BY_COMPLEX | #define RETURN_BY_COMPLEX | ||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#if defined(ARMV6) || defined(ARMV7) || defined(ARMV8) | |||||
static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
int register ret; | int register ret; | ||||
@@ -88,37 +61,29 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
__asm__ __volatile__( | __asm__ __volatile__( | ||||
"ldrex r2, [%1] \n\t" | |||||
"mov r2, #0 \n\t" | |||||
"strex r3, r2, [%1] \n\t" | |||||
"mov %0 , r3 \n\t" | |||||
: "=r"(ret), "=r"(address) | |||||
: "1"(address) | |||||
: "memory", "r2" , "r3" | |||||
"ldrex r2, [%1] \n\t" | |||||
"strex %0, %2, [%1] \n\t" | |||||
"orr %0, r2 \n\t" | |||||
: "=&r"(ret) | |||||
: "r"(address), "r"(1) | |||||
: "memory", "r2" | |||||
); | ); | ||||
} while (ret); | } while (ret); | ||||
MB; | |||||
} | } | ||||
static inline unsigned long long rpcc(void){ | |||||
unsigned long long ret=0; | |||||
double v; | |||||
struct timeval tv; | |||||
gettimeofday(&tv,NULL); | |||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; | |||||
ret = (unsigned long long) ( v * 1000.0d ); | |||||
return ret; | |||||
} | |||||
#define BLAS_LOCK_DEFINED | |||||
#endif | |||||
static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
return x / y; | return x / y; | ||||
} | } | ||||
#if defined(DOUBLE) | |||||
#if !defined(HAVE_VFP) | |||||
/* no FPU, soft float */ | |||||
#define GET_IMAGE(res) | |||||
#elif defined(DOUBLE) | |||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | #define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | ||||
#else | #else | ||||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | #define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | ||||
@@ -166,4 +131,8 @@ REALNAME: | |||||
#define MAP_ANONYMOUS MAP_ANON | #define MAP_ANONYMOUS MAP_ANON | ||||
#endif | #endif | ||||
#if !defined(ARMV5) && !defined(ARMV6) && !defined(ARMV7) && !defined(ARMV8) | |||||
#error "you must define ARMV5, ARMV6, ARMV7 or ARMV8" | |||||
#endif | |||||
#endif | #endif |
@@ -1,5 +1,5 @@ | |||||
/***************************************************************************** | /***************************************************************************** | ||||
Copyright (c) 2011-2014, The OpenBLAS Project | |||||
Copyright (c) 2011-2015, The OpenBLAS Project | |||||
All rights reserved. | All rights reserved. | ||||
Redistribution and use in source and binary forms, with or without | Redistribution and use in source and binary forms, with or without | ||||
@@ -30,49 +30,12 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
**********************************************************************************/ | **********************************************************************************/ | ||||
/*********************************************************************/ | |||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
/* All rights reserved. */ | |||||
/* */ | |||||
/* Redistribution and use in source and binary forms, with or */ | |||||
/* without modification, are permitted provided that the following */ | |||||
/* conditions are met: */ | |||||
/* */ | |||||
/* 1. Redistributions of source code must retain the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer. */ | |||||
/* */ | |||||
/* 2. Redistributions in binary form must reproduce the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer in the documentation and/or other materials */ | |||||
/* provided with the distribution. */ | |||||
/* */ | |||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||||
/* */ | |||||
/* The views and conclusions contained in the software and */ | |||||
/* documentation are those of the authors and should not be */ | |||||
/* interpreted as representing official policies, either expressed */ | |||||
/* or implied, of The University of Texas at Austin. */ | |||||
/*********************************************************************/ | |||||
#ifndef COMMON_ARM64 | #ifndef COMMON_ARM64 | ||||
#define COMMON_ARM64 | #define COMMON_ARM64 | ||||
#define MB | |||||
#define WMB | |||||
#define MB __asm__ __volatile__ ("dmb ish" : : : "memory") | |||||
#define WMB __asm__ __volatile__ ("dmb ishst" : : : "memory") | |||||
#define INLINE inline | #define INLINE inline | ||||
@@ -81,39 +44,27 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
/* | |||||
int register ret; | |||||
long register ret; | |||||
do { | do { | ||||
while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
__asm__ __volatile__( | __asm__ __volatile__( | ||||
"ldrex r2, [%1] \n\t" | |||||
"mov r2, #0 \n\t" | |||||
"strex r3, r2, [%1] \n\t" | |||||
"mov %0 , r3 \n\t" | |||||
: "=r"(ret), "=r"(address) | |||||
: "1"(address) | |||||
: "memory", "r2" , "r3" | |||||
"ldaxr %0, [%1] \n\t" | |||||
"stlxr w2, %2, [%1] \n\t" | |||||
"orr %0, %0, x2 \n\t" | |||||
: "=r"(ret) | |||||
: "r"(address), "r"(1l) | |||||
: "memory", "x2" | |||||
); | ); | ||||
} while (ret); | } while (ret); | ||||
*/ | |||||
MB; | |||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static inline unsigned long long rpcc(void){ | |||||
unsigned long long ret=0; | |||||
double v; | |||||
struct timeval tv; | |||||
gettimeofday(&tv,NULL); | |||||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; | |||||
ret = (unsigned long long) ( v * 1000.0d ); | |||||
return ret; | |||||
} | |||||
static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
return x / y; | return x / y; | ||||
} | } | ||||
@@ -166,3 +117,4 @@ REALNAME: | |||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -220,6 +220,15 @@ | |||||
#define COMATCOPY_K_CTC comatcopy_k_ctc | #define COMATCOPY_K_CTC comatcopy_k_ctc | ||||
#define COMATCOPY_K_RTC comatcopy_k_rtc | #define COMATCOPY_K_RTC comatcopy_k_rtc | ||||
#define CIMATCOPY_K_CN cimatcopy_k_cn | |||||
#define CIMATCOPY_K_RN cimatcopy_k_rn | |||||
#define CIMATCOPY_K_CT cimatcopy_k_ct | |||||
#define CIMATCOPY_K_RT cimatcopy_k_rt | |||||
#define CIMATCOPY_K_CNC cimatcopy_k_cnc | |||||
#define CIMATCOPY_K_RNC cimatcopy_k_rnc | |||||
#define CIMATCOPY_K_CTC cimatcopy_k_ctc | |||||
#define CIMATCOPY_K_RTC cimatcopy_k_rtc | |||||
#define CGEADD_K cgeadd_k | #define CGEADD_K cgeadd_k | ||||
#else | #else | ||||
@@ -403,6 +412,16 @@ | |||||
#define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc | #define COMATCOPY_K_RNC gotoblas -> comatcopy_k_rnc | ||||
#define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc | #define COMATCOPY_K_CTC gotoblas -> comatcopy_k_ctc | ||||
#define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc | #define COMATCOPY_K_RTC gotoblas -> comatcopy_k_rtc | ||||
#define CIMATCOPY_K_CN gotoblas -> cimatcopy_k_cn | |||||
#define CIMATCOPY_K_RN gotoblas -> cimatcopy_k_rn | |||||
#define CIMATCOPY_K_CT gotoblas -> cimatcopy_k_ct | |||||
#define CIMATCOPY_K_RT gotoblas -> cimatcopy_k_rt | |||||
#define CIMATCOPY_K_CNC gotoblas -> cimatcopy_k_cnc | |||||
#define CIMATCOPY_K_RNC gotoblas -> cimatcopy_k_rnc | |||||
#define CIMATCOPY_K_CTC gotoblas -> cimatcopy_k_ctc | |||||
#define CIMATCOPY_K_RTC gotoblas -> cimatcopy_k_rtc | |||||
#define CGEADD_K gotoblas -> cgeadd_k | #define CGEADD_K gotoblas -> cgeadd_k | ||||
#endif | #endif | ||||
@@ -149,6 +149,11 @@ | |||||
#define DOMATCOPY_K_RN domatcopy_k_rn | #define DOMATCOPY_K_RN domatcopy_k_rn | ||||
#define DOMATCOPY_K_CT domatcopy_k_ct | #define DOMATCOPY_K_CT domatcopy_k_ct | ||||
#define DOMATCOPY_K_RT domatcopy_k_rt | #define DOMATCOPY_K_RT domatcopy_k_rt | ||||
#define DIMATCOPY_K_CN dimatcopy_k_cn | |||||
#define DIMATCOPY_K_RN dimatcopy_k_rn | |||||
#define DIMATCOPY_K_CT dimatcopy_k_ct | |||||
#define DIMATCOPY_K_RT dimatcopy_k_rt | |||||
#define DGEADD_K dgeadd_k | #define DGEADD_K dgeadd_k | ||||
#else | #else | ||||
@@ -267,6 +272,10 @@ | |||||
#define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn | #define DOMATCOPY_K_RN gotoblas -> domatcopy_k_rn | ||||
#define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct | #define DOMATCOPY_K_CT gotoblas -> domatcopy_k_ct | ||||
#define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt | #define DOMATCOPY_K_RT gotoblas -> domatcopy_k_rt | ||||
#define DIMATCOPY_K_CN gotoblas -> dimatcopy_k_cn | |||||
#define DIMATCOPY_K_RN gotoblas -> dimatcopy_k_rn | |||||
#define DIMATCOPY_K_CT gotoblas -> dimatcopy_k_ct | |||||
#define DIMATCOPY_K_RT gotoblas -> dimatcopy_k_rt | |||||
#define DGEADD_K gotoblas -> dgeadd_k | #define DGEADD_K gotoblas -> dgeadd_k | ||||
@@ -68,6 +68,7 @@ static __inline void blas_lock(volatile unsigned long *address){ | |||||
: "ar.ccv", "memory"); | : "ar.ccv", "memory"); | ||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline unsigned long rpcc(void) { | static __inline unsigned long rpcc(void) { | ||||
unsigned long clocks; | unsigned long clocks; | ||||
@@ -75,6 +76,7 @@ static __inline unsigned long rpcc(void) { | |||||
__asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks)); | __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks)); | ||||
return clocks; | return clocks; | ||||
} | } | ||||
#define RPCC_DEFINED | |||||
static __inline unsigned long stmxcsr(void){ | static __inline unsigned long stmxcsr(void){ | ||||
@@ -99,10 +101,12 @@ static __inline void blas_lock(volatile unsigned long *address){ | |||||
while (*address || _InterlockedCompareExchange((volatile int *) address,1,0)) | while (*address || _InterlockedCompareExchange((volatile int *) address,1,0)) | ||||
; | ; | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline unsigned int rpcc(void) { | static __inline unsigned int rpcc(void) { | ||||
return __getReg(_IA64_REG_AR_ITC); | return __getReg(_IA64_REG_AR_ITC); | ||||
} | } | ||||
#define RPCC_DEFINED | |||||
static __inline unsigned int stmxcsr(void) { | static __inline unsigned int stmxcsr(void) { | ||||
return __getReg(_IA64_REG_AR_FPSR); | return __getReg(_IA64_REG_AR_FPSR); | ||||
@@ -47,12 +47,12 @@ double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); | double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
float _Complex cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
float _Complex cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
double _Complex zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
double _Complex zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
xdouble _Complex xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
xdouble _Complex xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||||
openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||||
openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||||
int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, | int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float, | ||||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
@@ -1736,31 +1736,55 @@ int somatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLAS | |||||
int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | int somatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | ||||
int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | int somatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | ||||
int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | int somatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG); | ||||
int simatcopy_k_cn(BLASLONG, BLASLONG, float, float *, BLASLONG); | |||||
int simatcopy_k_rn(BLASLONG, BLASLONG, float, float *, BLASLONG); | |||||
int simatcopy_k_ct(BLASLONG, BLASLONG, float, float *, BLASLONG); | |||||
int simatcopy_k_rt(BLASLONG, BLASLONG, float, float *, BLASLONG); | |||||
int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | int domatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | ||||
int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | int domatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | ||||
int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | int domatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | ||||
int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | int domatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG); | ||||
int dimatcopy_k_cn(BLASLONG, BLASLONG, double, double *, BLASLONG); | |||||
int dimatcopy_k_rn(BLASLONG, BLASLONG, double, double *, BLASLONG); | |||||
int dimatcopy_k_ct(BLASLONG, BLASLONG, double, double *, BLASLONG); | |||||
int dimatcopy_k_rt(BLASLONG, BLASLONG, double, double *, BLASLONG); | |||||
int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int cimatcopy_k_cn(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_rn(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_ct(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_rt(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | int comatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG, float *, BLASLONG); | ||||
int cimatcopy_k_cnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_rnc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_ctc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int cimatcopy_k_rtc(BLASLONG, BLASLONG, float, float, float *, BLASLONG); | |||||
int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zimatcopy_k_cn(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_rn(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_ct(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_rt(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | int zomatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG, double *, BLASLONG); | ||||
int zimatcopy_k_cnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_rnc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_ctc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int zimatcopy_k_rtc(BLASLONG, BLASLONG, double, double, double *, BLASLONG); | |||||
int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); | int sgeadd_k(BLASLONG, BLASLONG, float, float*, BLASLONG, float, float *, BLASLONG); | ||||
int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); | int dgeadd_k(BLASLONG, BLASLONG, double, double*, BLASLONG, double, double *, BLASLONG); | ||||
@@ -634,6 +634,11 @@ | |||||
#define OMATCOPY_K_RN DOMATCOPY_K_RN | #define OMATCOPY_K_RN DOMATCOPY_K_RN | ||||
#define OMATCOPY_K_CT DOMATCOPY_K_CT | #define OMATCOPY_K_CT DOMATCOPY_K_CT | ||||
#define OMATCOPY_K_RT DOMATCOPY_K_RT | #define OMATCOPY_K_RT DOMATCOPY_K_RT | ||||
#define IMATCOPY_K_CN DIMATCOPY_K_CN | |||||
#define IMATCOPY_K_RN DIMATCOPY_K_RN | |||||
#define IMATCOPY_K_CT DIMATCOPY_K_CT | |||||
#define IMATCOPY_K_RT DIMATCOPY_K_RT | |||||
#define GEADD_K DGEADD_K | #define GEADD_K DGEADD_K | ||||
#else | #else | ||||
@@ -931,6 +936,10 @@ | |||||
#define OMATCOPY_K_RN SOMATCOPY_K_RN | #define OMATCOPY_K_RN SOMATCOPY_K_RN | ||||
#define OMATCOPY_K_CT SOMATCOPY_K_CT | #define OMATCOPY_K_CT SOMATCOPY_K_CT | ||||
#define OMATCOPY_K_RT SOMATCOPY_K_RT | #define OMATCOPY_K_RT SOMATCOPY_K_RT | ||||
#define IMATCOPY_K_CN SIMATCOPY_K_CN | |||||
#define IMATCOPY_K_RN SIMATCOPY_K_RN | |||||
#define IMATCOPY_K_CT SIMATCOPY_K_CT | |||||
#define IMATCOPY_K_RT SIMATCOPY_K_RT | |||||
#define GEADD_K SGEADD_K | #define GEADD_K SGEADD_K | ||||
#endif | #endif | ||||
@@ -1747,6 +1756,15 @@ | |||||
#define OMATCOPY_K_RNC ZOMATCOPY_K_RNC | #define OMATCOPY_K_RNC ZOMATCOPY_K_RNC | ||||
#define OMATCOPY_K_CTC ZOMATCOPY_K_CTC | #define OMATCOPY_K_CTC ZOMATCOPY_K_CTC | ||||
#define OMATCOPY_K_RTC ZOMATCOPY_K_RTC | #define OMATCOPY_K_RTC ZOMATCOPY_K_RTC | ||||
#define IMATCOPY_K_CN ZIMATCOPY_K_CN | |||||
#define IMATCOPY_K_RN ZIMATCOPY_K_RN | |||||
#define IMATCOPY_K_CT ZIMATCOPY_K_CT | |||||
#define IMATCOPY_K_RT ZIMATCOPY_K_RT | |||||
#define IMATCOPY_K_CNC ZIMATCOPY_K_CNC | |||||
#define IMATCOPY_K_RNC ZIMATCOPY_K_RNC | |||||
#define IMATCOPY_K_CTC ZIMATCOPY_K_CTC | |||||
#define IMATCOPY_K_RTC ZIMATCOPY_K_RTC | |||||
#define GEADD_K ZGEADD_K | #define GEADD_K ZGEADD_K | ||||
#else | #else | ||||
@@ -2160,6 +2178,14 @@ | |||||
#define OMATCOPY_K_RNC COMATCOPY_K_RNC | #define OMATCOPY_K_RNC COMATCOPY_K_RNC | ||||
#define OMATCOPY_K_CTC COMATCOPY_K_CTC | #define OMATCOPY_K_CTC COMATCOPY_K_CTC | ||||
#define OMATCOPY_K_RTC COMATCOPY_K_RTC | #define OMATCOPY_K_RTC COMATCOPY_K_RTC | ||||
#define IMATCOPY_K_CN CIMATCOPY_K_CN | |||||
#define IMATCOPY_K_RN CIMATCOPY_K_RN | |||||
#define IMATCOPY_K_CT CIMATCOPY_K_CT | |||||
#define IMATCOPY_K_RT CIMATCOPY_K_RT | |||||
#define IMATCOPY_K_CNC CIMATCOPY_K_CNC | |||||
#define IMATCOPY_K_RNC CIMATCOPY_K_RNC | |||||
#define IMATCOPY_K_CTC CIMATCOPY_K_CTC | |||||
#define IMATCOPY_K_RTC CIMATCOPY_K_RTC | |||||
#define GEADD_K CGEADD_K | #define GEADD_K CGEADD_K | ||||
@@ -98,6 +98,7 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static inline unsigned int rpcc(void){ | static inline unsigned int rpcc(void){ | ||||
unsigned long ret; | unsigned long ret; | ||||
@@ -118,6 +119,7 @@ static inline unsigned int rpcc(void){ | |||||
#endif | #endif | ||||
return ret; | return ret; | ||||
} | } | ||||
#define RPCC_DEFINED | |||||
#if defined(LOONGSON3A) || defined(LOONGSON3B) | #if defined(LOONGSON3A) || defined(LOONGSON3B) | ||||
#ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
@@ -855,6 +855,36 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG); | |||||
int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | int (*zomatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG, double*, BLASLONG); | ||||
int (*simatcopy_k_cn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
int (*simatcopy_k_ct) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
int (*simatcopy_k_rn) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
int (*simatcopy_k_rt) (BLASLONG, BLASLONG, float, float*, BLASLONG); | |||||
int (*dimatcopy_k_cn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
int (*dimatcopy_k_ct) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
int (*dimatcopy_k_rn) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
int (*dimatcopy_k_rt) (BLASLONG, BLASLONG, double, double*, BLASLONG); | |||||
int (*cimatcopy_k_cn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_ct) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_rn) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_rt) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_cnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_ctc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_rnc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*cimatcopy_k_rtc) (BLASLONG, BLASLONG, float, float, float*, BLASLONG); | |||||
int (*zimatcopy_k_cn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_ct) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_rn) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_rt) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_cnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_ctc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_rnc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*zimatcopy_k_rtc) (BLASLONG, BLASLONG, double, double, double*, BLASLONG); | |||||
int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | int (*sgeadd_k) (BLASLONG, BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | ||||
int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); | int (*dgeadd_k) (BLASLONG, BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG); | ||||
int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); | int (*cgeadd_k) (BLASLONG, BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG); | ||||
@@ -87,6 +87,7 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||||
#endif | #endif | ||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static inline unsigned long rpcc(void){ | static inline unsigned long rpcc(void){ | ||||
unsigned long ret; | unsigned long ret; | ||||
@@ -103,6 +104,7 @@ static inline unsigned long rpcc(void){ | |||||
#endif | #endif | ||||
} | } | ||||
#define RPCC_DEFINED | |||||
#ifdef __64BIT__ | #ifdef __64BIT__ | ||||
#define RPCC64BIT | #define RPCC64BIT | ||||
@@ -495,6 +497,15 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
REALNAME: | REALNAME: | ||||
#define EPILOGUE .size REALNAME, .-REALNAME | #define EPILOGUE .size REALNAME, .-REALNAME | ||||
#else | #else | ||||
#if _CALL_ELF == 2 | |||||
#define PROLOGUE \ | |||||
.section .text;\ | |||||
.align 6;\ | |||||
.globl REALNAME;\ | |||||
.type REALNAME, @function;\ | |||||
REALNAME: | |||||
#define EPILOGUE .size REALNAME, .-REALNAME | |||||
#else | |||||
#define PROLOGUE \ | #define PROLOGUE \ | ||||
.section .text;\ | .section .text;\ | ||||
.align 5;\ | .align 5;\ | ||||
@@ -514,6 +525,7 @@ REALNAME:;\ | |||||
.size .REALNAME, .-.REALNAME; \ | .size .REALNAME, .-.REALNAME; \ | ||||
.section .note.GNU-stack,"",@progbits | .section .note.GNU-stack,"",@progbits | ||||
#endif | #endif | ||||
#endif | |||||
#ifdef PROFILE | #ifdef PROFILE | ||||
#ifndef __64BIT__ | #ifndef __64BIT__ | ||||
@@ -792,4 +804,25 @@ Lmcount$lazy_ptr: | |||||
#ifndef MAP_ANONYMOUS | #ifndef MAP_ANONYMOUS | ||||
#define MAP_ANONYMOUS MAP_ANON | #define MAP_ANONYMOUS MAP_ANON | ||||
#endif | #endif | ||||
#ifdef OS_LINUX | |||||
#ifndef __64BIT__ | |||||
#define FRAMESLOT(X) (((X) * 4) + 8) | |||||
#else | |||||
#if _CALL_ELF == 2 | |||||
#define FRAMESLOT(X) (((X) * 8) + 96) | |||||
#else | |||||
#define FRAMESLOT(X) (((X) * 8) + 112) | |||||
#endif | |||||
#endif | |||||
#endif | |||||
#if defined(OS_AIX) || defined(OS_DARWIN) | |||||
#ifndef __64BIT__ | |||||
#define FRAMESLOT(X) (((X) * 4) + 56) | |||||
#else | |||||
#define FRAMESLOT(X) (((X) * 8) + 112) | |||||
#endif | |||||
#endif | |||||
#endif | #endif |
@@ -152,6 +152,10 @@ | |||||
#define SOMATCOPY_K_RN somatcopy_k_rn | #define SOMATCOPY_K_RN somatcopy_k_rn | ||||
#define SOMATCOPY_K_CT somatcopy_k_ct | #define SOMATCOPY_K_CT somatcopy_k_ct | ||||
#define SOMATCOPY_K_RT somatcopy_k_rt | #define SOMATCOPY_K_RT somatcopy_k_rt | ||||
#define SIMATCOPY_K_CN simatcopy_k_cn | |||||
#define SIMATCOPY_K_RN simatcopy_k_rn | |||||
#define SIMATCOPY_K_CT simatcopy_k_ct | |||||
#define SIMATCOPY_K_RT simatcopy_k_rt | |||||
#define SGEADD_K sgeadd_k | #define SGEADD_K sgeadd_k | ||||
@@ -274,6 +278,10 @@ | |||||
#define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn | #define SOMATCOPY_K_RN gotoblas -> somatcopy_k_rn | ||||
#define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct | #define SOMATCOPY_K_CT gotoblas -> somatcopy_k_ct | ||||
#define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt | #define SOMATCOPY_K_RT gotoblas -> somatcopy_k_rt | ||||
#define SIMATCOPY_K_CN gotoblas -> simatcopy_k_cn | |||||
#define SIMATCOPY_K_RN gotoblas -> simatcopy_k_rn | |||||
#define SIMATCOPY_K_CT gotoblas -> simatcopy_k_ct | |||||
#define SIMATCOPY_K_RT gotoblas -> simatcopy_k_rt | |||||
#define SGEADD_K gotoblas -> sgeadd_k | #define SGEADD_K gotoblas -> sgeadd_k | ||||
@@ -58,6 +58,7 @@ static void __inline blas_lock(volatile unsigned long *address){ | |||||
: "memory"); | : "memory"); | ||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline unsigned long rpcc(void){ | static __inline unsigned long rpcc(void){ | ||||
unsigned long clocks; | unsigned long clocks; | ||||
@@ -66,6 +67,7 @@ static __inline unsigned long rpcc(void){ | |||||
return clocks; | return clocks; | ||||
}; | }; | ||||
#define RPCC_DEFINED | |||||
#ifdef __64BIT__ | #ifdef __64BIT__ | ||||
#define RPCC64BIT | #define RPCC64BIT | ||||
@@ -56,41 +56,67 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
do { | do { | ||||
while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
// use intrinsic instead of inline assembly | |||||
ret = _InterlockedExchange(address, 1); | |||||
// inline assembly | |||||
/*__asm { | |||||
mov eax, address | |||||
mov ebx, 1 | |||||
xchg [eax], ebx | |||||
mov ret, ebx | |||||
}*/ | |||||
#else | |||||
__asm__ __volatile__( | __asm__ __volatile__( | ||||
"xchgl %0, %1\n" | "xchgl %0, %1\n" | ||||
: "=r"(ret), "=m"(*address) | : "=r"(ret), "=m"(*address) | ||||
: "0"(1), "m"(*address) | : "0"(1), "m"(*address) | ||||
: "memory"); | : "memory"); | ||||
#endif | |||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline unsigned long long rpcc(void){ | static __inline unsigned long long rpcc(void){ | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
return __rdtsc(); // use MSVC intrinsic | |||||
#else | |||||
unsigned int a, d; | unsigned int a, d; | ||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | ||||
return ((unsigned long long)a + ((unsigned long long)d << 32)); | return ((unsigned long long)a + ((unsigned long long)d << 32)); | ||||
#endif | |||||
}; | }; | ||||
#define RPCC_DEFINED | |||||
static __inline unsigned long getstackaddr(void){ | static __inline unsigned long getstackaddr(void){ | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
return (unsigned long)_ReturnAddress(); // use MSVC intrinsic | |||||
#else | |||||
unsigned long addr; | unsigned long addr; | ||||
__asm__ __volatile__ ("mov %%esp, %0" | __asm__ __volatile__ ("mov %%esp, %0" | ||||
: "=r"(addr) : : "memory"); | : "=r"(addr) : : "memory"); | ||||
return addr; | return addr; | ||||
#endif | |||||
}; | }; | ||||
static __inline long double sqrt_long(long double val) { | static __inline long double sqrt_long(long double val) { | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
return sqrt(val); // not sure if this will use fsqrt | |||||
#else | |||||
long double result; | long double result; | ||||
__asm__ __volatile__ ("fldt %1\n" | __asm__ __volatile__ ("fldt %1\n" | ||||
"fsqrt\n" | "fsqrt\n" | ||||
"fstpt %0\n" : "=m" (result) : "m"(val)); | "fstpt %0\n" : "=m" (result) : "m"(val)); | ||||
return result; | return result; | ||||
#endif | |||||
} | } | ||||
#define SQRT(a) sqrt_long(a) | #define SQRT(a) sqrt_long(a) | ||||
@@ -100,7 +126,7 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | |||||
#define WHEREAMI | #define WHEREAMI | ||||
static inline int WhereAmI(void){ | |||||
static __inline int WhereAmI(void){ | |||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
int apicid; | int apicid; | ||||
@@ -146,9 +172,14 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
y = blas_quick_divide_table[y]; | y = blas_quick_divide_table[y]; | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
(void*)result; | |||||
return x*y; | |||||
#else | |||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | ||||
return result; | return result; | ||||
#endif | |||||
} | } | ||||
#endif | #endif | ||||
@@ -171,7 +202,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
#define MMXSTORE movd | #define MMXSTORE movd | ||||
#endif | #endif | ||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
//Enable some optimazation for barcelona. | //Enable some optimazation for barcelona. | ||||
#define BARCELONA_OPTIMIZATION | #define BARCELONA_OPTIMIZATION | ||||
#endif | #endif | ||||
@@ -284,8 +315,12 @@ REALNAME: | |||||
#define PROFCODE | #define PROFCODE | ||||
#ifdef __clang__ | |||||
#define EPILOGUE .end | |||||
#else | |||||
#define EPILOGUE .end REALNAME | #define EPILOGUE .end REALNAME | ||||
#endif | #endif | ||||
#endif | |||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) | #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) | ||||
#define PROLOGUE \ | #define PROLOGUE \ | ||||
@@ -41,6 +41,10 @@ | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#ifdef C_MSVC | |||||
#include <intrin.h> | |||||
#endif | |||||
#ifdef C_SUN | #ifdef C_SUN | ||||
#define __asm__ __asm | #define __asm__ __asm | ||||
#define __volatile__ | #define __volatile__ | ||||
@@ -61,30 +65,45 @@ | |||||
static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
#ifndef C_MSVC | |||||
int ret; | int ret; | ||||
#else | |||||
BLASULONG ret; | |||||
#endif | |||||
do { | do { | ||||
while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
#ifndef C_MSVC | |||||
__asm__ __volatile__( | __asm__ __volatile__( | ||||
"xchgl %0, %1\n" | "xchgl %0, %1\n" | ||||
: "=r"(ret), "=m"(*address) | : "=r"(ret), "=m"(*address) | ||||
: "0"(1), "m"(*address) | : "0"(1), "m"(*address) | ||||
: "memory"); | : "memory"); | ||||
#else | |||||
ret=InterlockedExchange64((volatile LONG64 *)(address), 1); | |||||
#endif | |||||
} while (ret); | } while (ret); | ||||
} | } | ||||
#define BLAS_LOCK_DEFINED | |||||
static __inline BLASULONG rpcc(void){ | static __inline BLASULONG rpcc(void){ | ||||
#ifdef C_MSVC | |||||
return __rdtsc(); | |||||
#else | |||||
BLASULONG a, d; | BLASULONG a, d; | ||||
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | __asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d)); | ||||
return ((BLASULONG)a + ((BLASULONG)d << 32)); | return ((BLASULONG)a + ((BLASULONG)d << 32)); | ||||
#endif | |||||
} | } | ||||
#define RPCC_DEFINED | |||||
#define RPCC64BIT | #define RPCC64BIT | ||||
#ifndef C_MSVC | |||||
static __inline BLASULONG getstackaddr(void){ | static __inline BLASULONG getstackaddr(void){ | ||||
BLASULONG addr; | BLASULONG addr; | ||||
@@ -93,22 +112,32 @@ static __inline BLASULONG getstackaddr(void){ | |||||
return addr; | return addr; | ||||
} | } | ||||
#endif | |||||
static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | static __inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | ||||
#ifdef C_MSVC | |||||
int cpuinfo[4]; | |||||
__cpuid(cpuinfo, op); | |||||
*eax=cpuinfo[0]; | |||||
*ebx=cpuinfo[1]; | |||||
*ecx=cpuinfo[2]; | |||||
*edx=cpuinfo[3]; | |||||
#else | |||||
__asm__ __volatile__("cpuid" | __asm__ __volatile__("cpuid" | ||||
: "=a" (*eax), | : "=a" (*eax), | ||||
"=b" (*ebx), | "=b" (*ebx), | ||||
"=c" (*ecx), | "=c" (*ecx), | ||||
"=d" (*edx) | "=d" (*edx) | ||||
: "0" (op)); | : "0" (op)); | ||||
#endif | |||||
} | } | ||||
/* | /* | ||||
#define WHEREAMI | #define WHEREAMI | ||||
*/ | */ | ||||
static inline int WhereAmI(void){ | |||||
static __inline int WhereAmI(void){ | |||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
int apicid; | int apicid; | ||||
@@ -150,10 +179,14 @@ static inline int WhereAmI(void){ | |||||
#define GET_IMAGE_CANCEL | #define GET_IMAGE_CANCEL | ||||
#ifdef SMP | #ifdef SMP | ||||
#ifdef USE64BITINT | |||||
#if defined(USE64BITINT) | |||||
static __inline blasint blas_quickdivide(blasint x, blasint y){ | static __inline blasint blas_quickdivide(blasint x, blasint y){ | ||||
return x / y; | return x / y; | ||||
} | } | ||||
#elif defined (C_MSVC) | |||||
static __inline BLASLONG blas_quickdivide(BLASLONG x, BLASLONG y){ | |||||
return x / y; | |||||
} | |||||
#else | #else | ||||
extern unsigned int blas_quick_divide_table[]; | extern unsigned int blas_quick_divide_table[]; | ||||
@@ -226,7 +259,7 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
#ifdef ASSEMBLER | #ifdef ASSEMBLER | ||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) | |||||
#if defined(PILEDRIVER) || defined(BULLDOZER) || defined(STEAMROLLER) || defined(EXCAVATOR) | |||||
//Enable some optimazation for barcelona. | //Enable some optimazation for barcelona. | ||||
#define BARCELONA_OPTIMIZATION | #define BARCELONA_OPTIMIZATION | ||||
#endif | #endif | ||||
@@ -220,6 +220,15 @@ | |||||
#define ZOMATCOPY_K_CTC zomatcopy_k_ctc | #define ZOMATCOPY_K_CTC zomatcopy_k_ctc | ||||
#define ZOMATCOPY_K_RTC zomatcopy_k_rtc | #define ZOMATCOPY_K_RTC zomatcopy_k_rtc | ||||
#define ZIMATCOPY_K_CN zimatcopy_k_cn | |||||
#define ZIMATCOPY_K_RN zimatcopy_k_rn | |||||
#define ZIMATCOPY_K_CT zimatcopy_k_ct | |||||
#define ZIMATCOPY_K_RT zimatcopy_k_rt | |||||
#define ZIMATCOPY_K_CNC zimatcopy_k_cnc | |||||
#define ZIMATCOPY_K_RNC zimatcopy_k_rnc | |||||
#define ZIMATCOPY_K_CTC zimatcopy_k_ctc | |||||
#define ZIMATCOPY_K_RTC zimatcopy_k_rtc | |||||
#define ZGEADD_K zgeadd_k | #define ZGEADD_K zgeadd_k | ||||
#else | #else | ||||
@@ -404,6 +413,15 @@ | |||||
#define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc | #define ZOMATCOPY_K_CTC gotoblas -> zomatcopy_k_ctc | ||||
#define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc | #define ZOMATCOPY_K_RTC gotoblas -> zomatcopy_k_rtc | ||||
#define ZIMATCOPY_K_CN gotoblas -> zimatcopy_k_cn | |||||
#define ZIMATCOPY_K_RN gotoblas -> zimatcopy_k_rn | |||||
#define ZIMATCOPY_K_CT gotoblas -> zimatcopy_k_ct | |||||
#define ZIMATCOPY_K_RT gotoblas -> zimatcopy_k_rt | |||||
#define ZIMATCOPY_K_CNC gotoblas -> zimatcopy_k_cnc | |||||
#define ZIMATCOPY_K_RNC gotoblas -> zimatcopy_k_rnc | |||||
#define ZIMATCOPY_K_CTC gotoblas -> zimatcopy_k_ctc | |||||
#define ZIMATCOPY_K_RTC gotoblas -> zimatcopy_k_rtc | |||||
#define ZGEADD_K gotoblas -> zgeadd_k | #define ZGEADD_K gotoblas -> zgeadd_k | ||||
#endif | #endif | ||||
@@ -39,6 +39,10 @@ | |||||
#ifndef CPUID_H | #ifndef CPUID_H | ||||
#define CPUID_H | #define CPUID_H | ||||
#if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) | |||||
#define INTEL_AMD | |||||
#endif | |||||
#define VENDOR_INTEL 1 | #define VENDOR_INTEL 1 | ||||
#define VENDOR_UMC 2 | #define VENDOR_UMC 2 | ||||
#define VENDOR_AMD 3 | #define VENDOR_AMD 3 | ||||
@@ -59,7 +63,7 @@ | |||||
#define FAMILY_PM 7 | #define FAMILY_PM 7 | ||||
#define FAMILY_IA64 8 | #define FAMILY_IA64 8 | ||||
#if defined(__i386__) || defined(__x86_64__) | |||||
#ifdef INTEL_AMD | |||||
#define GET_EXFAMILY 1 | #define GET_EXFAMILY 1 | ||||
#define GET_EXMODEL 2 | #define GET_EXMODEL 2 | ||||
#define GET_TYPE 3 | #define GET_TYPE 3 | ||||
@@ -109,6 +113,7 @@ | |||||
#define CORE_PILEDRIVER 23 | #define CORE_PILEDRIVER 23 | ||||
#define CORE_HASWELL 24 | #define CORE_HASWELL 24 | ||||
#define CORE_STEAMROLLER 25 | #define CORE_STEAMROLLER 25 | ||||
#define CORE_EXCAVATOR 26 | |||||
#define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
#define HAVE_SSE2 (1 << 1) | #define HAVE_SSE2 (1 << 1) | ||||
@@ -203,5 +208,6 @@ typedef struct { | |||||
#define CPUTYPE_PILEDRIVER 47 | #define CPUTYPE_PILEDRIVER 47 | ||||
#define CPUTYPE_HASWELL 48 | #define CPUTYPE_HASWELL 48 | ||||
#define CPUTYPE_STEAMROLLER 49 | #define CPUTYPE_STEAMROLLER 49 | ||||
#define CPUTYPE_EXCAVATOR 50 | |||||
#endif | #endif |
@@ -192,6 +192,7 @@ void get_cpuconfig(void) | |||||
{ | { | ||||
case CPU_CORTEXA9: | case CPU_CORTEXA9: | ||||
printf("#define CORTEXA9\n"); | printf("#define CORTEXA9\n"); | ||||
printf("#define ARMV7\n"); | |||||
printf("#define HAVE_VFP\n"); | printf("#define HAVE_VFP\n"); | ||||
printf("#define HAVE_VFPV3\n"); | printf("#define HAVE_VFPV3\n"); | ||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | ||||
@@ -207,6 +208,7 @@ void get_cpuconfig(void) | |||||
case CPU_CORTEXA15: | case CPU_CORTEXA15: | ||||
printf("#define CORTEXA15\n"); | printf("#define CORTEXA15\n"); | ||||
printf("#define ARMV7\n"); | |||||
printf("#define HAVE_VFP\n"); | printf("#define HAVE_VFP\n"); | ||||
printf("#define HAVE_VFPV3\n"); | printf("#define HAVE_VFPV3\n"); | ||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | ||||
@@ -115,6 +115,7 @@ int detect(void){ | |||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | ||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | ||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; | |||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
@@ -40,6 +40,12 @@ | |||||
#include <string.h> | #include <string.h> | ||||
#include "cpuid.h" | #include "cpuid.h" | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
#define C_INLINE __inline | |||||
#else | |||||
#define C_INLINE inline | |||||
#endif | |||||
/* | /* | ||||
#ifdef NO_AVX | #ifdef NO_AVX | ||||
#define CPUTYPE_HASWELL CPUTYPE_NEHALEM | #define CPUTYPE_HASWELL CPUTYPE_NEHALEM | ||||
@@ -53,12 +59,26 @@ | |||||
#endif | #endif | ||||
*/ | */ | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) | |||||
{ | |||||
int cpuInfo[4] = {-1}; | |||||
__cpuid(cpuInfo, op); | |||||
*eax = cpuInfo[0]; | |||||
*ebx = cpuInfo[1]; | |||||
*ecx = cpuInfo[2]; | |||||
*edx = cpuInfo[3]; | |||||
} | |||||
#else | |||||
#ifndef CPUIDEMU | #ifndef CPUIDEMU | ||||
#if defined(__APPLE__) && defined(__i386__) | #if defined(__APPLE__) && defined(__i386__) | ||||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | ||||
#else | #else | ||||
static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | |||||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | |||||
#if defined(__i386__) && defined(__PIC__) | #if defined(__i386__) && defined(__PIC__) | ||||
__asm__ __volatile__ | __asm__ __volatile__ | ||||
("mov %%ebx, %%edi;" | ("mov %%ebx, %%edi;" | ||||
@@ -115,14 +135,16 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * | |||||
#endif | #endif | ||||
static inline int have_cpuid(void){ | |||||
#endif // _MSC_VER | |||||
static C_INLINE int have_cpuid(void){ | |||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
cpuid(0, &eax, &ebx, &ecx, &edx); | cpuid(0, &eax, &ebx, &ecx, &edx); | ||||
return eax; | return eax; | ||||
} | } | ||||
static inline int have_excpuid(void){ | |||||
static C_INLINE int have_excpuid(void){ | |||||
int eax, ebx, ecx, edx; | int eax, ebx, ecx, edx; | ||||
cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | cpuid(0x80000000, &eax, &ebx, &ecx, &edx); | ||||
@@ -130,10 +152,14 @@ static inline int have_excpuid(void){ | |||||
} | } | ||||
#ifndef NO_AVX | #ifndef NO_AVX | ||||
static inline void xgetbv(int op, int * eax, int * edx){ | |||||
static C_INLINE void xgetbv(int op, int * eax, int * edx){ | |||||
//Use binary code for xgetbv | //Use binary code for xgetbv | ||||
#if defined(_MSC_VER) && !defined(__clang__) | |||||
*eax = __xgetbv(op); | |||||
#else | |||||
__asm__ __volatile__ | __asm__ __volatile__ | ||||
(".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); | (".byte 0x0f, 0x01, 0xd0": "=a" (*eax), "=d" (*edx) : "c" (op) : "cc"); | ||||
#endif | |||||
} | } | ||||
#endif | #endif | ||||
@@ -1098,6 +1124,16 @@ int get_cpuname(void){ | |||||
return CPUTYPE_HASWELL; | return CPUTYPE_HASWELL; | ||||
#else | #else | ||||
return CPUTYPE_SANDYBRIDGE; | return CPUTYPE_SANDYBRIDGE; | ||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
case 13: | |||||
//Broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | #endif | ||||
else | else | ||||
return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
@@ -1112,11 +1148,57 @@ int get_cpuname(void){ | |||||
return CPUTYPE_HASWELL; | return CPUTYPE_HASWELL; | ||||
#else | #else | ||||
return CPUTYPE_SANDYBRIDGE; | return CPUTYPE_SANDYBRIDGE; | ||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
case 7: | |||||
case 15: | |||||
//Broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
case 14: | |||||
//Skylake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | #endif | ||||
else | else | ||||
return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
} | } | ||||
break; | break; | ||||
case 5: | |||||
switch (model) { | |||||
case 6: | |||||
//Broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
case 5: | |||||
case 14: | |||||
// Skylake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
} | |||||
break; | |||||
} | } | ||||
break; | break; | ||||
case 0x7: | case 0x7: | ||||
@@ -1163,11 +1245,20 @@ int get_cpuname(void){ | |||||
else | else | ||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
case 0: | case 0: | ||||
if(support_avx()) | |||||
return CPUTYPE_STEAMROLLER; | |||||
else | |||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||||
switch(exmodel){ | |||||
case 3: | |||||
if(support_avx()) | |||||
return CPUTYPE_STEAMROLLER; | |||||
else | |||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||||
case 6: | |||||
if(support_avx()) | |||||
return CPUTYPE_EXCAVATOR; | |||||
else | |||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||||
} | |||||
break; | |||||
} | } | ||||
break; | break; | ||||
case 5: | case 5: | ||||
@@ -1297,6 +1388,7 @@ static char *cpuname[] = { | |||||
"PILEDRIVER", | "PILEDRIVER", | ||||
"HASWELL", | "HASWELL", | ||||
"STEAMROLLER", | "STEAMROLLER", | ||||
"EXCAVATOR", | |||||
}; | }; | ||||
static char *lowercpuname[] = { | static char *lowercpuname[] = { | ||||
@@ -1349,6 +1441,7 @@ static char *lowercpuname[] = { | |||||
"piledriver", | "piledriver", | ||||
"haswell", | "haswell", | ||||
"steamroller", | "steamroller", | ||||
"excavator", | |||||
}; | }; | ||||
static char *corename[] = { | static char *corename[] = { | ||||
@@ -1378,6 +1471,7 @@ static char *corename[] = { | |||||
"PILEDRIVER", | "PILEDRIVER", | ||||
"HASWELL", | "HASWELL", | ||||
"STEAMROLLER", | "STEAMROLLER", | ||||
"EXCAVATOR", | |||||
}; | }; | ||||
static char *corename_lower[] = { | static char *corename_lower[] = { | ||||
@@ -1407,6 +1501,7 @@ static char *corename_lower[] = { | |||||
"piledriver", | "piledriver", | ||||
"haswell", | "haswell", | ||||
"steamroller", | "steamroller", | ||||
"excavator", | |||||
}; | }; | ||||
@@ -1525,6 +1620,16 @@ int get_coretype(void){ | |||||
return CORE_HASWELL; | return CORE_HASWELL; | ||||
#else | #else | ||||
return CORE_SANDYBRIDGE; | return CORE_SANDYBRIDGE; | ||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
case 13: | |||||
//broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | #endif | ||||
else | else | ||||
return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
@@ -1539,11 +1644,57 @@ int get_coretype(void){ | |||||
return CORE_HASWELL; | return CORE_HASWELL; | ||||
#else | #else | ||||
return CORE_SANDYBRIDGE; | return CORE_SANDYBRIDGE; | ||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
case 7: | |||||
case 15: | |||||
//broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
case 14: | |||||
//Skylake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | #endif | ||||
else | else | ||||
return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
} | } | ||||
break; | break; | ||||
case 5: | |||||
switch (model) { | |||||
case 6: | |||||
//broadwell | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
case 5: | |||||
case 14: | |||||
// Skylake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
} | |||||
break; | |||||
} | } | ||||
break; | break; | ||||
@@ -1574,10 +1725,20 @@ int get_coretype(void){ | |||||
return CORE_BARCELONA; //OS don't support AVX. | return CORE_BARCELONA; //OS don't support AVX. | ||||
case 0: | case 0: | ||||
if(support_avx()) | |||||
return CORE_STEAMROLLER; | |||||
else | |||||
return CORE_BARCELONA; //OS don't support AVX. | |||||
switch(exmodel){ | |||||
case 3: | |||||
if(support_avx()) | |||||
return CORE_STEAMROLLER; | |||||
else | |||||
return CORE_BARCELONA; //OS don't support AVX. | |||||
case 6: | |||||
if(support_avx()) | |||||
return CORE_EXCAVATOR; | |||||
else | |||||
return CORE_BARCELONA; //OS don't support AVX. | |||||
} | |||||
break; | |||||
} | } | ||||
@@ -44,6 +44,10 @@ COMPILER_DEC | |||||
COMPILER_GNU | COMPILER_GNU | ||||
#endif | #endif | ||||
#if defined(__ANDROID__) | |||||
OS_ANDROID | |||||
#endif | |||||
#if defined(__linux__) | #if defined(__linux__) | ||||
OS_LINUX | OS_LINUX | ||||
#endif | #endif | ||||
@@ -0,0 +1,46 @@ | |||||
include_directories(${CMAKE_SOURCE_DIR}) | |||||
enable_language(Fortran) | |||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -DADD${BU} -DCBLAS") | |||||
FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh | |||||
"$1 < $2\n" | |||||
) | |||||
foreach(float_type ${FLOAT_TYPES}) | |||||
string(SUBSTRING ${float_type} 0 1 float_char_upper) | |||||
string(TOLOWER ${float_char_upper} float_char) | |||||
#level1 | |||||
add_executable(x${float_char}cblat1 | |||||
c_${float_char}blat1.f | |||||
c_${float_char}blas1.c) | |||||
target_link_libraries(x${float_char}cblat1 ${OpenBLAS_LIBNAME}_static) | |||||
add_test(NAME "x${float_char}cblat1" | |||||
COMMAND "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat1") | |||||
#level2 | |||||
add_executable(x${float_char}cblat2 | |||||
c_${float_char}blat2.f | |||||
c_${float_char}blas2.c | |||||
c_${float_char}2chke.c | |||||
auxiliary.c | |||||
c_xerbla.c | |||||
constant.c) | |||||
target_link_libraries(x${float_char}cblat2 ${OpenBLAS_LIBNAME}_static) | |||||
add_test(NAME "x${float_char}cblat2" | |||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat2" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in2") | |||||
#level3 | |||||
add_executable(x${float_char}cblat3 | |||||
c_${float_char}blat3.f | |||||
c_${float_char}blas3.c | |||||
c_${float_char}3chke.c | |||||
auxiliary.c | |||||
c_xerbla.c | |||||
constant.c) | |||||
target_link_libraries(x${float_char}cblat3 ${OpenBLAS_LIBNAME}_static) | |||||
add_test(NAME "x${float_char}cblat3" | |||||
COMMAND sh "${CMAKE_CURRENT_BINARY_DIR}/test_cblas_helper.sh" "${CMAKE_CURRENT_BINARY_DIR}/x${float_char}cblat3" "${PROJECT_SOURCE_DIR}/ctest/${float_char}in3") | |||||
endforeach() |
@@ -27,12 +27,18 @@ ctestl2o = c_cblas2.o c_c2chke.o auxiliary.o c_xerbla.o constant.o | |||||
ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o | ctestl3o = c_cblas3.o c_c3chke.o auxiliary.o c_xerbla.o constant.o | ||||
ctestl3o_3m = c_cblas3_3m.o c_c3chke_3m.o auxiliary.o c_xerbla.o constant.o | |||||
ztestl1o = c_zblas1.o | ztestl1o = c_zblas1.o | ||||
ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o | ztestl2o = c_zblas2.o c_z2chke.o auxiliary.o c_xerbla.o constant.o | ||||
ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o | ztestl3o = c_zblas3.o c_z3chke.o auxiliary.o c_xerbla.o constant.o | ||||
ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o | |||||
all :: all1 all2 all3 | all :: all1 all2 all3 | ||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | ||||
@@ -115,8 +121,8 @@ xccblat2: $(ctestl2o) c_cblat2.o $(TOPDIR)/$(LIBNAME) | |||||
xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) | xccblat3: $(ctestl3o) c_cblat3.o $(TOPDIR)/$(LIBNAME) | ||||
$(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | $(FC) $(FLDFLAGS) -o xccblat3 c_cblat3.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | ||||
xccblat3_3m: $(ctestl3o) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||||
xccblat3_3m: $(ctestl3o_3m) c_cblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||||
$(FC) $(FLDFLAGS) -o xccblat3_3m c_cblat3_3m.o $(ctestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||||
# Double complex | # Double complex | ||||
xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) | xzcblat1: $(ztestl1o) c_zblat1.o $(TOPDIR)/$(LIBNAME) | ||||
@@ -127,8 +133,8 @@ xzcblat3: $(ztestl3o) c_zblat3.o $(TOPDIR)/$(LIBNAME) | |||||
$(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | $(FC) $(FLDFLAGS) -o xzcblat3 c_zblat3.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | ||||
xzcblat3_3m: $(ztestl3o) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||||
xzcblat3_3m: $(ztestl3o_3m) c_zblat3_3m.o $(TOPDIR)/$(LIBNAME) | |||||
$(FC) $(FLDFLAGS) -o xzcblat3_3m c_zblat3_3m.o $(ztestl3o_3m) $(LIB) $(EXTRALIB) $(CEXTRALIB) | |||||
include $(TOPDIR)/Makefile.tail | include $(TOPDIR)/Makefile.tail |
@@ -46,235 +46,7 @@ void F77_c3chke(char * rout) { | |||||
} | } | ||||
if (strncmp( sf,"cblas_cgemm3m" ,13)==0) { | |||||
cblas_rout = "cblas_cgemm3" ; | |||||
cblas_info = 1; | |||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasNoTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 1; | |||||
cblas_cgemm3m( INVALID, CblasNoTrans, CblasTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 1; | |||||
cblas_cgemm3m( INVALID, CblasTrans, CblasNoTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 1; | |||||
cblas_cgemm3m( INVALID, CblasTrans, CblasTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 2; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasNoTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 2; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, INVALID, CblasTrans, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 3; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, INVALID, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 3; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, INVALID, 0, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 2 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 2 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 2, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 0, 0, 2, | |||||
ALPHA, A, 2, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasNoTrans, 2, 0, 0, | |||||
ALPHA, A, 2, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasNoTrans, CblasTrans, 2, 0, 0, | |||||
ALPHA, A, 2, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasNoTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = FALSE; | |||||
cblas_cgemm3m( CblasColMajor, CblasTrans, CblasTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 4; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, INVALID, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 5; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, INVALID, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 6; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, INVALID, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 1, BETA, C, 2 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 2, BETA, C, 2 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 2, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 9; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 2, 0, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0, | |||||
ALPHA, A, 2, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 0, 2, | |||||
ALPHA, A, 2, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 11; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 0, 2, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasNoTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 2, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasNoTrans, CblasTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasNoTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 2, BETA, C, 1 ); | |||||
chkxer(); | |||||
cblas_info = 14; RowMajorStrg = TRUE; | |||||
cblas_cgemm3m( CblasRowMajor, CblasTrans, CblasTrans, 0, 2, 0, | |||||
ALPHA, A, 1, B, 1, BETA, C, 1 ); | |||||
chkxer(); | |||||
} else if (strncmp( sf,"cblas_cgemm" ,11)==0) { | |||||
if (strncmp( sf,"cblas_cgemm" ,11)==0) { | |||||
cblas_rout = "cblas_cgemm" ; | cblas_rout = "cblas_cgemm" ; | ||||
@@ -567,81 +567,3 @@ void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, | |||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n, | |||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
int i,j,LDA, LDB, LDC; | |||||
enum CBLAS_TRANSPOSE transa, transb; | |||||
get_transpose_type(transpa, &transa); | |||||
get_transpose_type(transpb, &transb); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (transa == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
if (transb == CblasNoTrans) { | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDB = *k+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA, | |||||
B, LDB, beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
else | |||||
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
} | |||||
@@ -0,0 +1,647 @@ | |||||
/* | |||||
* Written by D.P. Manley, Digital Equipment Corporation. | |||||
* Prefixed "C_" to BLAS routines and their declarations. | |||||
* | |||||
* Modified by T. H. Do, 4/15/98, SGI/CRAY Research. | |||||
*/ | |||||
#include <stdlib.h> | |||||
#include "common.h" | |||||
#include "cblas_test.h" | |||||
#define TEST_COL_MJR 0 | |||||
#define TEST_ROW_MJR 1 | |||||
#define UNDEFINED -1 | |||||
void F77_cgemm(int *order, char *transpa, char *transpb, int *m, int *n, | |||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
int i,j,LDA, LDB, LDC; | |||||
enum CBLAS_TRANSPOSE transa, transb; | |||||
get_transpose_type(transpa, &transa); | |||||
get_transpose_type(transpb, &transb); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (transa == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
if (transb == CblasNoTrans) { | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDB = *k+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_cgemm( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA, | |||||
B, LDB, beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_cgemm( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
else | |||||
cblas_cgemm( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
} | |||||
void F77_chemm(int *order, char *rtlf, char *uplow, int *m, int *n, | |||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
int i,j,LDA, LDB, LDC; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_SIDE side; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_side_type(rtlf,&side); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (side == CblasLeft) { | |||||
LDA = *m+1; | |||||
A= (CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDB*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_chemm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB, | |||||
beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_chemm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, | |||||
beta, c, *ldc ); | |||||
else | |||||
cblas_chemm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, | |||||
beta, c, *ldc ); | |||||
} | |||||
void F77_csymm(int *order, char *rtlf, char *uplow, int *m, int *n, | |||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
int i,j,LDA, LDB, LDC; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_SIDE side; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_side_type(rtlf,&side); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (side == CblasLeft) { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*m; j++ ) | |||||
A[i*LDA+j]=a[j*(*lda)+i]; | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) | |||||
A[i*LDA+j]=a[j*(*lda)+i]; | |||||
} | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX )); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*n; j++ ) | |||||
B[i*LDB+j]=b[j*(*ldb)+i]; | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) | |||||
C[i*LDC+j]=c[j*(*ldc)+i]; | |||||
cblas_csymm( CblasRowMajor, side, uplo, *m, *n, alpha, A, LDA, B, LDB, | |||||
beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) | |||||
c[j*(*ldc)+i]=C[i*LDC+j]; | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_csymm( CblasColMajor, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, | |||||
beta, c, *ldc ); | |||||
else | |||||
cblas_csymm( UNDEFINED, side, uplo, *m, *n, alpha, a, *lda, b, *ldb, | |||||
beta, c, *ldc ); | |||||
} | |||||
void F77_cherk(int *order, char *uplow, char *transp, int *n, int *k, | |||||
float *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
float *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
int i,j,LDA,LDC; | |||||
CBLAS_TEST_COMPLEX *A, *C; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (trans == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_cherk(CblasRowMajor, uplo, trans, *n, *k, *alpha, A, LDA, *beta, | |||||
C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*n; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_cherk(CblasColMajor, uplo, trans, *n, *k, *alpha, a, *lda, *beta, | |||||
c, *ldc ); | |||||
else | |||||
cblas_cherk(UNDEFINED, uplo, trans, *n, *k, *alpha, a, *lda, *beta, | |||||
c, *ldc ); | |||||
} | |||||
void F77_csyrk(int *order, char *uplow, char *transp, int *n, int *k, | |||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *beta, CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
int i,j,LDA,LDC; | |||||
CBLAS_TEST_COMPLEX *A, *C; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (trans == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_csyrk(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, beta, | |||||
C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*n; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_csyrk(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, beta, | |||||
c, *ldc ); | |||||
else | |||||
cblas_csyrk(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, beta, | |||||
c, *ldc ); | |||||
} | |||||
void F77_cher2k(int *order, char *uplow, char *transp, int *n, int *k, | |||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, float *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
int i,j,LDA,LDB,LDC; | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (trans == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
LDB = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX )); | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX )); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDA = *n+1; | |||||
LDB = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc( LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
B=(CBLAS_TEST_COMPLEX* )malloc( LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ){ | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_cher2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, | |||||
B, LDB, *beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*n; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_cher2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, | |||||
b, *ldb, *beta, c, *ldc ); | |||||
else | |||||
cblas_cher2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, | |||||
b, *ldb, *beta, c, *ldc ); | |||||
} | |||||
void F77_csyr2k(int *order, char *uplow, char *transp, int *n, int *k, | |||||
CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
int i,j,LDA,LDB,LDC; | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (trans == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
LDB = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDB*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDA = *n+1; | |||||
LDB = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*k)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ){ | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc( (*n)*LDC*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_csyr2k(CblasRowMajor, uplo, trans, *n, *k, alpha, A, LDA, | |||||
B, LDB, beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*n; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_csyr2k(CblasColMajor, uplo, trans, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
else | |||||
cblas_csyr2k(UNDEFINED, uplo, trans, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
} | |||||
void F77_ctrmm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, | |||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, | |||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { | |||||
int i,j,LDA,LDB; | |||||
CBLAS_TEST_COMPLEX *A, *B; | |||||
enum CBLAS_SIDE side; | |||||
enum CBLAS_DIAG diag; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
get_diag_type(diagn,&diag); | |||||
get_side_type(rtlf,&side); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (side == CblasLeft) { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
cblas_ctrmm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, | |||||
A, LDA, B, LDB ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
b[j*(*ldb)+i].real=B[i*LDB+j].real; | |||||
b[j*(*ldb)+i].imag=B[i*LDB+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_ctrmm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, | |||||
a, *lda, b, *ldb); | |||||
else | |||||
cblas_ctrmm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, | |||||
a, *lda, b, *ldb); | |||||
} | |||||
void F77_ctrsm(int *order, char *rtlf, char *uplow, char *transp, char *diagn, | |||||
int *m, int *n, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, | |||||
int *lda, CBLAS_TEST_COMPLEX *b, int *ldb) { | |||||
int i,j,LDA,LDB; | |||||
CBLAS_TEST_COMPLEX *A, *B; | |||||
enum CBLAS_SIDE side; | |||||
enum CBLAS_DIAG diag; | |||||
enum CBLAS_UPLO uplo; | |||||
enum CBLAS_TRANSPOSE trans; | |||||
get_uplo_type(uplow,&uplo); | |||||
get_transpose_type(transp,&trans); | |||||
get_diag_type(diagn,&diag); | |||||
get_side_type(rtlf,&side); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (side == CblasLeft) { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc( (*m)*LDA*sizeof(CBLAS_TEST_COMPLEX ) ); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else{ | |||||
LDA = *n+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc((*n)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDB*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
cblas_ctrsm(CblasRowMajor, side, uplo, trans, diag, *m, *n, alpha, | |||||
A, LDA, B, LDB ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
b[j*(*ldb)+i].real=B[i*LDB+j].real; | |||||
b[j*(*ldb)+i].imag=B[i*LDB+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_ctrsm(CblasColMajor, side, uplo, trans, diag, *m, *n, alpha, | |||||
a, *lda, b, *ldb); | |||||
else | |||||
cblas_ctrsm(UNDEFINED, side, uplo, trans, diag, *m, *n, alpha, | |||||
a, *lda, b, *ldb); | |||||
} | |||||
void F77_cgemm3m(int *order, char *transpa, char *transpb, int *m, int *n, | |||||
int *k, CBLAS_TEST_COMPLEX *alpha, CBLAS_TEST_COMPLEX *a, int *lda, | |||||
CBLAS_TEST_COMPLEX *b, int *ldb, CBLAS_TEST_COMPLEX *beta, | |||||
CBLAS_TEST_COMPLEX *c, int *ldc ) { | |||||
CBLAS_TEST_COMPLEX *A, *B, *C; | |||||
int i,j,LDA, LDB, LDC; | |||||
enum CBLAS_TRANSPOSE transa, transb; | |||||
get_transpose_type(transpa, &transa); | |||||
get_transpose_type(transpb, &transb); | |||||
if (*order == TEST_ROW_MJR) { | |||||
if (transa == CblasNoTrans) { | |||||
LDA = *k+1; | |||||
A=(CBLAS_TEST_COMPLEX*)malloc((*m)*LDA*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*m; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDA = *m+1; | |||||
A=(CBLAS_TEST_COMPLEX* )malloc(LDA*(*k)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*m; j++ ) { | |||||
A[i*LDA+j].real=a[j*(*lda)+i].real; | |||||
A[i*LDA+j].imag=a[j*(*lda)+i].imag; | |||||
} | |||||
} | |||||
if (transb == CblasNoTrans) { | |||||
LDB = *n+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc((*k)*LDB*sizeof(CBLAS_TEST_COMPLEX) ); | |||||
for( i=0; i<*k; i++ ) | |||||
for( j=0; j<*n; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
else { | |||||
LDB = *k+1; | |||||
B=(CBLAS_TEST_COMPLEX* )malloc(LDB*(*n)*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( i=0; i<*n; i++ ) | |||||
for( j=0; j<*k; j++ ) { | |||||
B[i*LDB+j].real=b[j*(*ldb)+i].real; | |||||
B[i*LDB+j].imag=b[j*(*ldb)+i].imag; | |||||
} | |||||
} | |||||
LDC = *n+1; | |||||
C=(CBLAS_TEST_COMPLEX* )malloc((*m)*LDC*sizeof(CBLAS_TEST_COMPLEX)); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
C[i*LDC+j].real=c[j*(*ldc)+i].real; | |||||
C[i*LDC+j].imag=c[j*(*ldc)+i].imag; | |||||
} | |||||
cblas_cgemm3m( CblasRowMajor, transa, transb, *m, *n, *k, alpha, A, LDA, | |||||
B, LDB, beta, C, LDC ); | |||||
for( j=0; j<*n; j++ ) | |||||
for( i=0; i<*m; i++ ) { | |||||
c[j*(*ldc)+i].real=C[i*LDC+j].real; | |||||
c[j*(*ldc)+i].imag=C[i*LDC+j].imag; | |||||
} | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
} | |||||
else if (*order == TEST_COL_MJR) | |||||
cblas_cgemm3m( CblasColMajor, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
else | |||||
cblas_cgemm3m( UNDEFINED, transa, transb, *m, *n, *k, alpha, a, *lda, | |||||
b, *ldb, beta, c, *ldc ); | |||||
} | |||||