@@ -14,6 +14,21 @@ lapack-3.4.2.tgz | |||||
lapack-netlib/make.inc | lapack-netlib/make.inc | ||||
lapack-netlib/lapacke/include/lapacke_mangling.h | lapack-netlib/lapacke/include/lapacke_mangling.h | ||||
lapack-netlib/TESTING/testing_results.txt | lapack-netlib/TESTING/testing_results.txt | ||||
lapack-netlib/INSTALL/test* | |||||
lapack-netlib/TESTING/xeigtstc | |||||
lapack-netlib/TESTING/xeigtstd | |||||
lapack-netlib/TESTING/xeigtsts | |||||
lapack-netlib/TESTING/xeigtstz | |||||
lapack-netlib/TESTING/xlintstc | |||||
lapack-netlib/TESTING/xlintstd | |||||
lapack-netlib/TESTING/xlintstds | |||||
lapack-netlib/TESTING/xlintstrfc | |||||
lapack-netlib/TESTING/xlintstrfd | |||||
lapack-netlib/TESTING/xlintstrfs | |||||
lapack-netlib/TESTING/xlintstrfz | |||||
lapack-netlib/TESTING/xlintsts | |||||
lapack-netlib/TESTING/xlintstz | |||||
lapack-netlib/TESTING/xlintstzc | |||||
*.so | *.so | ||||
*.so.* | *.so.* | ||||
*.a | *.a | ||||
@@ -69,3 +84,6 @@ test/zblat3 | |||||
build | build | ||||
build.* | build.* | ||||
*.swp | *.swp | ||||
benchmark/*.goto | |||||
benchmark/smallscaling | |||||
@@ -24,7 +24,12 @@ before_install: | |||||
- if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi | - if [[ "$TARGET_BOX" == "WIN64" ]]; then sudo apt-get install -qq binutils-mingw-w64-x86-64 gcc-mingw-w64-x86-64 gfortran-mingw-w64-x86-64; fi | ||||
- if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi | - if [[ "$TARGET_BOX" == "LINUX32" ]]; then sudo apt-get install -qq gcc-multilib gfortran-multilib; fi | ||||
script: make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE | |||||
script: | |||||
- set -e | |||||
- make QUIET_MAKE=1 DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE | |||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C test DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C ctest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
- if [ "$TARGET_BOX" == "LINUX32" ] || [ "$TARGET_BOX" == "LINUX64" ]; then make -C utest DYNAMIC_ARCH=1 TARGET=NEHALEM NUM_THREADS=32 $BTYPE; fi | |||||
# whitelist | # whitelist | ||||
branches: | branches: | ||||
@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.4) | |||||
project(OpenBLAS) | project(OpenBLAS) | ||||
set(OpenBLAS_MAJOR_VERSION 0) | set(OpenBLAS_MAJOR_VERSION 0) | ||||
set(OpenBLAS_MINOR_VERSION 2) | set(OpenBLAS_MINOR_VERSION 2) | ||||
set(OpenBLAS_PATCH_VERSION 16.dev) | |||||
set(OpenBLAS_PATCH_VERSION 20.dev) | |||||
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | ||||
enable_language(ASM) | enable_language(ASM) | ||||
@@ -30,10 +30,20 @@ set(NO_LAPACK 1) | |||||
set(NO_LAPACKE 1) | set(NO_LAPACKE 1) | ||||
endif() | endif() | ||||
if(BUILD_DEBUG) | |||||
set(CMAKE_BUILD_TYPE Debug) | |||||
if(CMAKE_CONFIGURATION_TYPES) # multiconfig generator? | |||||
set(CMAKE_CONFIGURATION_TYPES "Debug;Release" CACHE STRING "" FORCE) | |||||
set(CMAKE_BUILD_TYPE | |||||
Debug Debug | |||||
Release Release | |||||
) | |||||
else() | else() | ||||
set(CMAKE_BUILD_TYPE Release) | |||||
if( NOT CMAKE_BUILD_TYPE ) | |||||
if(BUILD_DEBUG) | |||||
set(CMAKE_BUILD_TYPE Debug) | |||||
else() | |||||
set(CMAKE_BUILD_TYPE Release) | |||||
endif() | |||||
endif() | |||||
endif() | endif() | ||||
if(BUILD_WITHOUT_CBLAS) | if(BUILD_WITHOUT_CBLAS) | ||||
@@ -45,8 +55,8 @@ endif() | |||||
message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") | message(WARNING "CMake support is experimental. This will not produce the same Makefiles that OpenBLAS ships with. Only x86 support is currently available.") | ||||
include("${CMAKE_SOURCE_DIR}/cmake/utils.cmake") | |||||
include("${CMAKE_SOURCE_DIR}/cmake/system.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/utils.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/system.cmake") | |||||
set(BLASDIRS interface driver/level2 driver/level3 driver/others) | set(BLASDIRS interface driver/level2 driver/level3 driver/others) | ||||
@@ -54,10 +64,6 @@ if (NOT DYNAMIC_ARCH) | |||||
list(APPEND BLASDIRS kernel) | list(APPEND BLASDIRS kernel) | ||||
endif () | endif () | ||||
if (DEFINED UTEST_CHECK) | |||||
set(SANITY_CHECK 1) | |||||
endif () | |||||
if (DEFINED SANITY_CHECK) | if (DEFINED SANITY_CHECK) | ||||
list(APPEND BLASDIRS reference) | list(APPEND BLASDIRS reference) | ||||
endif () | endif () | ||||
@@ -110,6 +116,10 @@ if (${NO_STATIC} AND ${NO_SHARED}) | |||||
message(FATAL_ERROR "Neither static nor shared are enabled.") | message(FATAL_ERROR "Neither static nor shared are enabled.") | ||||
endif () | endif () | ||||
#Set default output directory | |||||
set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib ) | |||||
set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib ) | |||||
# get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | # get obj vars into format that add_library likes: $<TARGET_OBJS:objlib> (see http://www.cmake.org/cmake/help/v3.0/command/add_library.html) | ||||
set(TARGET_OBJS "") | set(TARGET_OBJS "") | ||||
foreach (SUBDIR ${SUBDIRS}) | foreach (SUBDIR ${SUBDIRS}) | ||||
@@ -123,9 +133,9 @@ endforeach () | |||||
# Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | # Can't just use lapack-netlib's CMake files, since they are set up to search for BLAS, build and install a binary. We just want to build a couple of lib files out of lapack and lapacke. | ||||
# Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | # Not using add_subdirectory here because lapack-netlib already has its own CMakeLists.txt. Instead include a cmake script with the sources we want. | ||||
if (NOT NOFORTRAN AND NOT NO_LAPACK) | if (NOT NOFORTRAN AND NOT NO_LAPACK) | ||||
include("${CMAKE_SOURCE_DIR}/cmake/lapack.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/lapack.cmake") | |||||
if (NOT NO_LAPACKE) | if (NOT NO_LAPACKE) | ||||
include("${CMAKE_SOURCE_DIR}/cmake/lapacke.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/lapacke.cmake") | |||||
endif () | endif () | ||||
endif () | endif () | ||||
@@ -137,22 +147,36 @@ endif() | |||||
# add objects to the openblas lib | # add objects to the openblas lib | ||||
add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | add_library(${OpenBLAS_LIBNAME} SHARED ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS} ${OpenBLAS_DEF_FILE}) | ||||
include("${CMAKE_SOURCE_DIR}/cmake/export.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/export.cmake") | |||||
# Set output for libopenblas | |||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib) | |||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_NAME_DEBUG "${OpenBLAS_LIBNAME}_d") | |||||
foreach (OUTPUTCONFIG ${CMAKE_CONFIGURATION_TYPES}) | |||||
string( TOUPPER ${OUTPUTCONFIG} OUTPUTCONFIG ) | |||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES LIBRARY_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
set_target_properties( ${OpenBLAS_LIBNAME} PROPERTIES ARCHIVE_OUTPUT_DIRECTORY_${OUTPUTCONFIG} ${PROJECT_BINARY_DIR}/lib) | |||||
endforeach() | |||||
enable_testing() | |||||
add_subdirectory(utest) | |||||
if(NOT MSVC) | if(NOT MSVC) | ||||
#only build shared library for MSVC | |||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) | |||||
if(SMP) | |||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread) | |||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) | |||||
#only build shared library for MSVC | |||||
add_library(${OpenBLAS_LIBNAME}_static STATIC ${LA_SOURCES} ${LAPACKE_SOURCES} ${TARGET_OBJS}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES OUTPUT_NAME ${OpenBLAS_LIBNAME}) | |||||
set_target_properties(${OpenBLAS_LIBNAME}_static PROPERTIES CLEAN_DIRECT_OUTPUT 1) | |||||
if(SMP) | |||||
target_link_libraries(${OpenBLAS_LIBNAME} pthread) | |||||
target_link_libraries(${OpenBLAS_LIBNAME}_static pthread) | |||||
endif() | endif() | ||||
#build test and ctest | #build test and ctest | ||||
enable_testing() | |||||
add_subdirectory(test) | add_subdirectory(test) | ||||
if(NOT NO_CBLAS) | if(NOT NO_CBLAS) | ||||
add_subdirectory(ctest) | add_subdirectory(ctest) | ||||
@@ -188,3 +212,27 @@ set_target_properties(${OpenBLAS_LIBNAME} PROPERTIES | |||||
#endif | #endif | ||||
# @touch lib.grd | # @touch lib.grd | ||||
# Install project | |||||
# Install libraries | |||||
install(TARGETS ${OpenBLAS_LIBNAME} | |||||
RUNTIME DESTINATION bin | |||||
ARCHIVE DESTINATION lib | |||||
LIBRARY DESTINATION lib ) | |||||
# Install include files | |||||
FILE(GLOB_RECURSE INCLUDE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/*.h") | |||||
install (FILES ${INCLUDE_FILES} DESTINATION include) | |||||
if(NOT MSVC) | |||||
install (TARGETS ${OpenBLAS_LIBNAME}_static DESTINATION lib) | |||||
endif() | |||||
include(FindPkgConfig QUIET) | |||||
if(PKG_CONFIG_FOUND) | |||||
set(prefix ${CMAKE_INSTALL_PREFIX}) | |||||
set(libdir ${CMAKE_INSTALL_PREFIX}/lib) | |||||
set(includedir ${CMAKE_INSTALL_PREFIX}/include) | |||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/openblas.pc.in ${PROJECT_BINARY_DIR}/openblas.pc @ONLY) | |||||
install (FILES ${PROJECT_BINARY_DIR}/openblas.pc DESTINATION lib/pkgconfig/) | |||||
endif() |
@@ -121,6 +121,17 @@ In chronological order: | |||||
* [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | * [2014-10-10] trmm and sgemm kernels (optimized for APM's X-Gene 1). | ||||
ARMv8 support. | ARMv8 support. | ||||
* Jerome Robert <jeromerobert@gmx.com> | |||||
* [2015-01-01] Speed-up small `ger` and `gemv` using stack allocation (bug #478) | |||||
* [2015-12-23] `stack_check` in `gemv.c` (bug #722) | |||||
* [2015-12-28] Allow to force the number of parallel make job | |||||
* [2015-12-28] Fix detection of AMD E2-3200 detection | |||||
* [2015-12-31] Let `make MAX_STACK_ALLOC=0` do what expected | |||||
* [2016-01-19] Disable multi-threading in `ger` and `swap` for small matrices (bug #731) | |||||
* [2016-01-24] Use `GEMM_MULTITHREAD_THRESHOLD` as a number of ops (bug #742) | |||||
* [2016-01-26] Let `openblas_get_num_threads` return the number of active threads (bug #760) | |||||
* [2016-01-30] Speed-up small `zger`, `zgemv`, `ztrmv` using stack allocation (bug #727) | |||||
* Dan Kortschak | * Dan Kortschak | ||||
* [2015-01-07] Added test for drotmg bug #484. | * [2015-01-07] Added test for drotmg bug #484. | ||||
@@ -130,5 +141,29 @@ In chronological order: | |||||
* Martin Koehler <https://github.com/grisuthedragon/> | * Martin Koehler <https://github.com/grisuthedragon/> | ||||
* [2015-09-07] Improved imatcopy | * [2015-09-07] Improved imatcopy | ||||
* [Your name or handle] <[email or website]> | |||||
* [Date] [Brief summary of your changes] | |||||
* Ashwin Sekhar T K <https://github.com/ashwinyes/> | |||||
* [2015-11-09] Assembly kernels for Cortex-A57 (ARMv8) | |||||
* [2015-11-20] lapack-test fixes for Cortex-A57 | |||||
* [2016-03-14] Additional functional Assembly Kernels for Cortex-A57 | |||||
* [2016-03-14] Optimize Dgemm 4x4 for Cortex-A57 | |||||
* theoractice <https://github.com/theoractice/> | |||||
* [2016-03-20] Fix compiler error in VisualStudio with CMake | |||||
* [2016-03-22] Fix access violation on Windows while static linking | |||||
* Paul Mustière <https://github.com/buffer51/> | |||||
* [2016-02-04] Fix Android build on ARMV7 | |||||
* [2016-04-26] Android build with LAPACK for ARMV7 & ARMV8 | |||||
* Shivraj Patil <https://github.com/sva-img/> | |||||
* [2016-05-03] DGEMM optimization for MIPS P5600 and I6400 using MSA | |||||
* Kaustubh Raste <https://github.com/ksraste/> | |||||
* [2016-05-09] DTRSM optimization for MIPS P5600 and I6400 using MSA | |||||
* [2016-05-20] STRSM optimization for MIPS P5600 and I6400 using MSA | |||||
* Abdelrauf <https://github.com/quickwritereader> | |||||
* [2017-01-01] dgemm and dtrmm kernels for IBM z13 | |||||
* [2017-02-26] ztrmm kernel for IBM z13 | |||||
@@ -1,4 +1,99 @@ | |||||
OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
==================================================================== | |||||
Version 0.2.19 | |||||
1-Sep-2016 | |||||
common: | |||||
* Improved cross compiling. | |||||
* Fix the bug on musl libc. | |||||
POWER: | |||||
* Optimize BLAS on Power8 | |||||
* Fixed Julia+OpenBLAS bugs on Power8 | |||||
MIPS: | |||||
* Optimize BLAS on MIPS P5600 and I6400 (Thanks, Shivraj Patil, Kaustubh Raste) | |||||
ARM: | |||||
* Improved on ARM Cortex-A57. (Thanks, Ashwin Sekhar T K) | |||||
==================================================================== | |||||
Version 0.2.18 | |||||
12-Apr-2016 | |||||
common: | |||||
* If you set MAKE_NB_JOBS flag less or equal than zero, | |||||
make will be without -j. | |||||
x86/x86_64: | |||||
* Support building Visual Studio static library. (#813, Thanks, theoractice) | |||||
* Fix bugs to pass buidbot CI tests (http://build.openblas.net) | |||||
ARM: | |||||
* Provide DGEMM 8x4 kernel for Cortex-A57 (Thanks, Ashwin Sekhar T K) | |||||
POWER: | |||||
* Optimize S and C BLAS3 on Power8 | |||||
* Optimize BLAS2/1 on Power8 | |||||
==================================================================== | |||||
Version 0.2.17 | |||||
20-Mar-2016 | |||||
common: | |||||
* Enable BUILD_LAPACK_DEPRECATED=1 by default. | |||||
==================================================================== | |||||
Version 0.2.16 | |||||
15-Mar-2016 | |||||
common: | |||||
* Avoid potential getenv segfault. (#716) | |||||
* Import LAPACK svn bugfix #142-#147,#150-#155 | |||||
x86/x86_64: | |||||
* Optimize c/zgemv for AMD Bulldozer, Piledriver, Steamroller | |||||
* Fix bug with scipy linalg test. | |||||
ARM: | |||||
* Improve DGEMM for ARM Cortex-A57. (Thanks, Ashwin Sekhar T K) | |||||
POWER: | |||||
* Optimize D and Z BLAS3 functions for Power8. | |||||
==================================================================== | |||||
Version 0.2.16.rc1 | |||||
23-Feb-2016 | |||||
common: | |||||
* Upgrade LAPACK to 3.6.0 version. | |||||
Add BUILD_LAPACK_DEPRECATED option in Makefile.rule to build | |||||
LAPACK deprecated functions. | |||||
* Add MAKE_NB_JOBS option in Makefile. | |||||
Force number of make jobs.This is particularly | |||||
useful when using distcc. (#735. Thanks, Jerome Robert.) | |||||
* Redesign unit test. Run unit/regression test at every build (Travis-CI and Appveyor). | |||||
* Disable multi-threading for small size swap and ger. (#744. Thanks, Jerome Robert) | |||||
* Improve small zger, zgemv, ztrmv using stack alloction (#727. Thanks, Jerome Robert) | |||||
* Let openblas_get_num_threads return the number of active threads. | |||||
(#760. Thanks, Jerome Robert) | |||||
* Support illumos(OmniOS). (#749. Thanks, Lauri Tirkkonen) | |||||
* Fix LAPACK Dormbr, Dormlq bug. (#711, #713. Thanks, Brendan Tracey) | |||||
* Update scipy benchmark script. (#745. Thanks, John Kirkham) | |||||
x86/x86_64: | |||||
* Optimize trsm kernels for AMD Bulldozer, Piledriver, Steamroller. | |||||
* Detect Intel Avoton. | |||||
* Detect AMD Trinity, Richland, E2-3200. | |||||
* Fix gemv performance bug on Mac OSX Intel Haswell. | |||||
* Fix some bugs with CMake and Visual Studio | |||||
ARM: | |||||
* Support and optimize Cortex-A57 AArch64. | |||||
(#686. Thanks, Ashwin Sekhar TK) | |||||
* Fix Android build on ARMV7 (#778. Thanks, Paul Mustiere) | |||||
* Update ARMV6 kernels. | |||||
POWER: | |||||
* Fix detection of POWER architecture | |||||
(#684. Thanks, Sebastien Villemot) | |||||
==================================================================== | ==================================================================== | ||||
Version 0.2.15 | Version 0.2.15 | ||||
27-Oct-2015 | 27-Oct-2015 | ||||
@@ -7,10 +7,6 @@ ifneq ($(DYNAMIC_ARCH), 1) | |||||
BLASDIRS += kernel | BLASDIRS += kernel | ||||
endif | endif | ||||
ifdef UTEST_CHECK | |||||
SANITY_CHECK = 1 | |||||
endif | |||||
ifdef SANITY_CHECK | ifdef SANITY_CHECK | ||||
BLASDIRS += reference | BLASDIRS += reference | ||||
endif | endif | ||||
@@ -85,22 +81,22 @@ endif | |||||
shared : | shared : | ||||
ifndef NO_SHARED | ifndef NO_SHARED | ||||
ifeq ($(OSNAME), Linux) | |||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS)) | |||||
@$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | |||||
endif | endif | ||||
ifeq ($(OSNAME), FreeBSD) | ifeq ($(OSNAME), FreeBSD) | ||||
@$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
endif | endif | ||||
ifeq ($(OSNAME), NetBSD) | ifeq ($(OSNAME), NetBSD) | ||||
@$(MAKE) -C exports so | @$(MAKE) -C exports so | ||||
@-ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
@ln -fs $(LIBSONAME) $(LIBPREFIX).so | |||||
endif | endif | ||||
ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
@$(MAKE) -C exports dyn | @$(MAKE) -C exports dyn | ||||
@-ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | |||||
@ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | |||||
endif | endif | ||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
@$(MAKE) -C exports dll | @$(MAKE) -C exports dll | ||||
@@ -112,21 +108,15 @@ endif | |||||
tests : | tests : | ||||
ifndef NOFORTRAN | ifndef NOFORTRAN | ||||
ifndef TARGET | |||||
ifndef CROSS | |||||
touch $(LIBNAME) | touch $(LIBNAME) | ||||
ifndef NO_FBLAS | ifndef NO_FBLAS | ||||
$(MAKE) -C test all | $(MAKE) -C test all | ||||
ifdef UTEST_CHECK | |||||
$(MAKE) -C utest all | $(MAKE) -C utest all | ||||
endif | endif | ||||
endif | |||||
ifndef NO_CBLAS | ifndef NO_CBLAS | ||||
$(MAKE) -C ctest all | $(MAKE) -C ctest all | ||||
endif | endif | ||||
endif | endif | ||||
endif | |||||
endif | |||||
libs : | libs : | ||||
ifeq ($(CORE), UNKOWN) | ifeq ($(CORE), UNKOWN) | ||||
@@ -249,16 +239,23 @@ ifndef NOFORTRAN | |||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
ifeq ($(FC), gfortran) | |||||
ifeq ($(F_COMPILER), GFORTRAN) | |||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
ifdef SMP | ifdef SMP | ||||
ifeq ($(OSNAME), WINNT) | |||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
else | |||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
endif | |||||
else | else | ||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
endif | endif | ||||
else | else | ||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
endif | |||||
ifeq ($(BUILD_LAPACK_DEPRECATED), 1) | |||||
-@echo "BUILD_DEPRECATED = 1" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
endif | endif | ||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
endif | endif | ||||
@@ -281,18 +278,28 @@ lapack-timing : large.tgz timing.tgz | |||||
ifndef NOFORTRAN | ifndef NOFORTRAN | ||||
(cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) | (cd $(NETLIB_LAPACK_DIR); $(TAR) zxf ../timing.tgz TIMING) | ||||
(cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) | (cd $(NETLIB_LAPACK_DIR)/TIMING; $(TAR) zxf ../../large.tgz ) | ||||
make -C $(NETLIB_LAPACK_DIR)/TIMING | |||||
$(MAKE) -C $(NETLIB_LAPACK_DIR)/TIMING | |||||
endif | endif | ||||
lapack-test : | lapack-test : | ||||
(cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | (cd $(NETLIB_LAPACK_DIR)/TESTING && rm -f x* *.out) | ||||
make -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR)/TESTING xeigtstc xeigtstd xeigtsts xeigtstz xlintstc xlintstd xlintstds xlintstrfd xlintstrfz xlintsts xlintstz xlintstzc xlintstrfs xlintstrfc | |||||
ifneq ($(CROSS), 1) | |||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | (cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | ||||
endif | |||||
lapack-runtest: | |||||
( cd $(NETLIB_LAPACK_DIR)/INSTALL; ./testlsame; ./testslamch; ./testdlamch; \ | |||||
./testsecond; ./testdsecnd; ./testieee; ./testversion ) | |||||
(cd $(NETLIB_LAPACK_DIR); ./lapack_testing.py -r ) | |||||
blas-test: | blas-test: | ||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | (cd $(NETLIB_LAPACK_DIR)/BLAS && rm -f x* *.out) | ||||
make -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | |||||
$(MAKE) -j 1 -C $(NETLIB_LAPACK_DIR) blas_testing | |||||
(cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out) | (cd $(NETLIB_LAPACK_DIR)/BLAS && cat *.out) | ||||
@@ -1,4 +1,4 @@ | |||||
# ifeq logical or | |||||
ifeq logical or | |||||
ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ifeq ($(CORE), $(filter $(CORE),CORTEXA9 CORTEXA15)) | ||||
ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a | CCOMMON_OPT += -mfpu=neon -march=armv7-a | ||||
@@ -11,9 +11,14 @@ endif | |||||
ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
ifeq ($(ARM_SOFTFP), 1) | |||||
CCOMMON_OPT += -mfpu=neon -march=armv7-a | CCOMMON_OPT += -mfpu=neon -march=armv7-a | ||||
FCOMMON_OPT += -mfpu=neon -march=armv7-a | FCOMMON_OPT += -mfpu=neon -march=armv7-a | ||||
else | else | ||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
endif | |||||
else | |||||
CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | CCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | ||||
FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | FCOMMON_OPT += -mfpu=vfpv3 -march=armv7-a | ||||
endif | endif | ||||
@@ -29,5 +34,3 @@ ifeq ($(CORE), ARMV5) | |||||
CCOMMON_OPT += -marm -march=armv5 | CCOMMON_OPT += -marm -march=armv5 | ||||
FCOMMON_OPT += -marm -march=armv5 | FCOMMON_OPT += -marm -march=armv5 | ||||
endif | endif | ||||
@@ -9,3 +9,17 @@ CCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | |||||
FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | FCOMMON_OPT += -march=armv8-a+crc+crypto+fp+simd -mtune=cortex-a57 | ||||
endif | endif | ||||
ifeq ($(CORE), VULCAN) | |||||
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
endif | |||||
ifeq ($(CORE), THUNDERX) | |||||
CCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||||
FCOMMON_OPT += -mtune=thunderx -mcpu=thunderx | |||||
endif | |||||
ifeq ($(CORE), THUNDERX2T99) | |||||
CCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
FCOMMON_OPT += -mtune=vulcan -mcpu=vulcan | |||||
endif |
@@ -12,6 +12,7 @@ OPENBLAS_BUILD_DIR := $(CURDIR) | |||||
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas | OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas | ||||
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | ||||
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | ||||
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig | |||||
.PHONY : install | .PHONY : install | ||||
.NOTPARALLEL : install | .NOTPARALLEL : install | ||||
@@ -20,110 +21,122 @@ lib.grd : | |||||
$(error OpenBLAS: Please run "make" firstly) | $(error OpenBLAS: Please run "make" firstly) | ||||
install : lib.grd | install : lib.grd | ||||
@-mkdir -p $(DESTDIR)$(PREFIX) | |||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | |||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||||
@-mkdir -p $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | |||||
@-mkdir -p "$(DESTDIR)$(PREFIX)" | |||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)" | |||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)" | |||||
@-mkdir -p "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | |||||
@echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Generating openblas_config.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
#for inc | #for inc | ||||
@echo \#ifndef OPENBLAS_CONFIG_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@echo \#define OPENBLAS_CONFIG_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@awk 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@cat openblas_config_template.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h | |||||
@echo \#ifndef OPENBLAS_CONFIG_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@echo \#define OPENBLAS_CONFIG_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@$(AWK) 'NF {print $$1, "OPENBLAS_"$$2, $$3}' config_last.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@echo \#define OPENBLAS_VERSION \" OpenBLAS $(VERSION) \" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@cat openblas_config_template.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@echo \#endif \/\* OPENBLAS_CONFIG_H \*\/ >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/openblas_config.h" | |||||
@echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Generating f77blas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h | |||||
@echo \#define OPENBLAS_F77BLAS_H >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h | |||||
@echo \#include \"openblas_config.h\" >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h | |||||
@cat common_interface.h >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h | |||||
@echo \#endif >> $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h | |||||
@echo \#ifndef OPENBLAS_F77BLAS_H > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||||
@echo \#define OPENBLAS_F77BLAS_H >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||||
@echo \#include \"openblas_config.h\" >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||||
@cat common_interface.h >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||||
@echo \#endif >> "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/f77blas.h" | |||||
ifndef NO_CBLAS | ifndef NO_CBLAS | ||||
@echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Generating cblas.h in $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
@sed 's/common/openblas_config/g' cblas.h > $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h | |||||
@sed 's/common/openblas_config/g' cblas.h > "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/cblas.h" | |||||
endif | endif | ||||
ifndef NO_LAPACKE | ifndef NO_LAPACKE | ||||
@echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | @echo Copying LAPACKE header files to $(DESTDIR)$(OPENBLAS_INCLUDE_DIR) | ||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_config.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_mangling_with_flags.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/lapacke/include/lapacke_utils.h $(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke.h" | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_config.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_config.h" | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_mangling_with_flags.h.in "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_mangling.h" | |||||
@-install -pm644 $(NETLIB_LAPACK_DIR)/LAPACKE/include/lapacke_utils.h "$(DESTDIR)$(OPENBLAS_INCLUDE_DIR)/lapacke_utils.h" | |||||
endif | endif | ||||
#for install static library | #for install static library | ||||
ifndef NO_STATIC | ifndef NO_STATIC | ||||
@echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | @echo Copying the static library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | ||||
@install -pm644 $(LIBNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | |||||
@install -pm644 $(LIBNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | ln -fs $(LIBNAME) $(LIBPREFIX).$(LIBSUFFIX) | ||||
endif | endif | ||||
#for install shared library | #for install shared library | ||||
ifndef NO_SHARED | ifndef NO_SHARED | ||||
@echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | @echo Copying the shared library to $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | ||||
ifeq ($(OSNAME), Linux) | |||||
@install -pm755 $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | |||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux SunOS)) | |||||
@install -pm755 $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ln -fs $(LIBSONAME) $(LIBPREFIX).so ; \ | ||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | ln -fs $(LIBSONAME) $(LIBPREFIX).so.$(MAJOR_VERSION) | ||||
endif | endif | ||||
ifeq ($(OSNAME), FreeBSD) | ifeq ($(OSNAME), FreeBSD) | ||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | |||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so | ln -fs $(LIBSONAME) $(LIBPREFIX).so | ||||
endif | endif | ||||
ifeq ($(OSNAME), NetBSD) | ifeq ($(OSNAME), NetBSD) | ||||
@cp $(LIBSONAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | |||||
@cp $(LIBSONAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
ln -fs $(LIBSONAME) $(LIBPREFIX).so | ln -fs $(LIBSONAME) $(LIBPREFIX).so | ||||
endif | endif | ||||
ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
@-cp $(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@-install_name_tool -id $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) $(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME) | |||||
@cd $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) ; \ | |||||
@-cp $(LIBDYNNAME) "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
@-install_name_tool -id "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)/$(LIBDYNNAME)" | |||||
@cd "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" ; \ | |||||
ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | ln -fs $(LIBDYNNAME) $(LIBPREFIX).dylib | ||||
endif | endif | ||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
@-cp $(LIBDLLNAME) $(DESTDIR)$(OPENBLAS_BINARY_DIR) | |||||
@-cp $(LIBDLLNAME).a $(DESTDIR)$(OPENBLAS_LIBRARY_DIR) | |||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||||
@-cp $(LIBDLLNAME).a "$(DESTDIR)$(OPENBLAS_LIBRARY_DIR)" | |||||
endif | endif | ||||
ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
@-cp $(LIBDLLNAME) $(OPENBLAS_BINARY_DIR) | |||||
@-cp $(LIBDLLNAME) "$(DESTDIR)$(OPENBLAS_BINARY_DIR)" | |||||
endif | endif | ||||
endif | endif | ||||
#Generating openblas.pc | |||||
@echo Generating openblas.pc in $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR) | |||||
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc | |||||
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc | |||||
@echo 'version='$(VERSION) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc | |||||
@echo 'extralib='$(EXTRALIB) >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc | |||||
@cat openblas.pc.in >> $(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc | |||||
#Generating OpenBLASConfig.cmake | #Generating OpenBLASConfig.cmake | ||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | @echo Generating $(OPENBLAS_CMAKE_CONFIG) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | ||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
@echo "SET(OpenBLAS_INCLUDE_DIRS ${OPENBLAS_INCLUDE_DIR})" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
ifndef NO_SHARED | ifndef NO_SHARED | ||||
#ifeq logical or | #ifeq logical or | ||||
ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) | ifeq ($(OSNAME), $(filter $(OSNAME),Linux FreeBSD NetBSD)) | ||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).so)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
endif | endif | ||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT)) | ||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_BINARY_DIR}/$(LIBDLLNAME))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
endif | endif | ||||
ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).dylib)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
endif | endif | ||||
else | else | ||||
#only static | #only static | ||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG) | |||||
@echo "SET(OpenBLAS_LIBRARIES ${OPENBLAS_LIBRARY_DIR}/$(LIBPREFIX).$(LIBSUFFIX))" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG)" | |||||
endif | endif | ||||
#Generating OpenBLASConfigVersion.cmake | #Generating OpenBLASConfigVersion.cmake | ||||
@echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | @echo Generating $(OPENBLAS_CMAKE_CONFIG_VERSION) in $(DESTDIR)$(OPENBLAS_CMAKE_DIR) | ||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "else ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo " endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "endif ()" >> $(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION) | |||||
@echo "set (PACKAGE_VERSION \"${VERSION}\")" > "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo "if (PACKAGE_VERSION VERSION_LESS PACKAGE_FIND_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE FALSE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo "else ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo " set (PACKAGE_VERSION_COMPATIBLE TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo " if (PACKAGE_FIND_VERSION STREQUAL PACKAGE_VERSION)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo " set (PACKAGE_VERSION_EXACT TRUE)" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo " endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo "endif ()" >> "$(DESTDIR)$(OPENBLAS_CMAKE_DIR)/$(OPENBLAS_CMAKE_CONFIG_VERSION)" | |||||
@echo Install OK! | @echo Install OK! | ||||
@@ -0,0 +1,3 @@ | |||||
ifdef BINARY64 | |||||
else | |||||
endif |
@@ -1,4 +1,26 @@ | |||||
# CCOMMON_OPT += -DALLOC_SHM | |||||
ifdef USE_THREAD | |||||
ifeq ($(USE_THREAD), 0) | |||||
USE_OPENMP = 0 | |||||
else | |||||
USE_OPENMP = 1 | |||||
endif | |||||
else | |||||
USE_OPENMP = 1 | |||||
endif | |||||
ifeq ($(CORE), POWER8) | |||||
ifeq ($(USE_OPENMP), 1) | |||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||||
else | |||||
COMMON_OPT += -Ofast -mcpu=power8 -mtune=power8 -mvsx -malign-power -fno-fast-math | |||||
FCOMMON_OPT += -O2 -frecursive -mcpu=power8 -mtune=power8 -malign-power -fno-fast-math | |||||
endif | |||||
endif | |||||
FLAMEPATH = $(HOME)/flame/lib | FLAMEPATH = $(HOME)/flame/lib | ||||
@@ -16,6 +38,16 @@ else | |||||
endif | endif | ||||
endif | endif | ||||
#Either uncomment below line or run make with `USE_MASS=1` to enable support of MASS library | |||||
#USE_MASS = 1 | |||||
ifeq ($(USE_MASS), 1) | |||||
# Path to MASS libs, change it if the libs are installed at any other location | |||||
MASSPATH = /opt/ibm/xlmass/8.1.3/lib | |||||
COMMON_OPT += -mveclibabi=mass -ftree-vectorize -funsafe-math-optimizations -DUSE_MASS | |||||
EXTRALIB += -L$(MASSPATH) -lmass -lmassvp8 -lmass_simdp8 | |||||
endif | |||||
ifdef BINARY64 | ifdef BINARY64 | ||||
@@ -17,14 +17,26 @@ ifdef CPUIDEMU | |||||
EXFLAGS = -DCPUIDEMU -DVENDOR=99 | EXFLAGS = -DCPUIDEMU -DVENDOR=99 | ||||
endif | endif | ||||
ifeq ($(TARGET), P5600) | |||||
TARGET_FLAGS = -mips32r5 | |||||
endif | |||||
ifeq ($(TARGET), I6400) | |||||
TARGET_FLAGS = -mips64r6 | |||||
endif | |||||
ifeq ($(TARGET), P6600) | |||||
TARGET_FLAGS = -mips64r6 | |||||
endif | |||||
all: getarch_2nd | all: getarch_2nd | ||||
./getarch_2nd 0 >> $(TARGET_MAKE) | ./getarch_2nd 0 >> $(TARGET_MAKE) | ||||
./getarch_2nd 1 >> $(TARGET_CONF) | ./getarch_2nd 1 >> $(TARGET_CONF) | ||||
config.h : c_check f_check getarch | config.h : c_check f_check getarch | ||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) | |||||
perl ./c_check $(TARGET_MAKE) $(TARGET_CONF) $(CC) $(TARGET_FLAGS) | |||||
ifneq ($(ONLY_CBLAS), 1) | ifneq ($(ONLY_CBLAS), 1) | ||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) | |||||
perl ./f_check $(TARGET_MAKE) $(TARGET_CONF) $(FC) $(TARGET_FLAGS) | |||||
else | else | ||||
#When we only build CBLAS, we set NOFORTRAN=2 | #When we only build CBLAS, we set NOFORTRAN=2 | ||||
echo "NOFORTRAN=2" >> $(TARGET_MAKE) | echo "NOFORTRAN=2" >> $(TARGET_MAKE) | ||||
@@ -3,7 +3,7 @@ | |||||
# | # | ||||
# This library's version | # This library's version | ||||
VERSION = 0.2.16.dev | |||||
VERSION = 0.2.20.dev | |||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
@@ -52,6 +52,7 @@ VERSION = 0.2.16.dev | |||||
# USE_THREAD = 0 | # USE_THREAD = 0 | ||||
# If you're going to use this library with OpenMP, please comment it in. | # If you're going to use this library with OpenMP, please comment it in. | ||||
# This flag is always set for POWER8. Don't modify the flag | |||||
# USE_OPENMP = 1 | # USE_OPENMP = 1 | ||||
# You can define maximum number of threads. Basically it should be | # You can define maximum number of threads. Basically it should be | ||||
@@ -79,6 +80,9 @@ VERSION = 0.2.16.dev | |||||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | # If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | ||||
# NO_LAPACKE = 1 | # NO_LAPACKE = 1 | ||||
# Build LAPACK Deprecated functions since LAPACK 3.6.0 | |||||
BUILD_LAPACK_DEPRECATED = 1 | |||||
# If you want to use legacy threaded Level 3 implementation. | # If you want to use legacy threaded Level 3 implementation. | ||||
# USE_SIMPLE_THREADED_LEVEL3 = 1 | # USE_SIMPLE_THREADED_LEVEL3 = 1 | ||||
@@ -108,6 +112,13 @@ NO_AFFINITY = 1 | |||||
# Don't use parallel make. | # Don't use parallel make. | ||||
# NO_PARALLEL_MAKE = 1 | # NO_PARALLEL_MAKE = 1 | ||||
# Force number of make jobs. The default is the number of logical CPU of the host. | |||||
# This is particularly useful when using distcc. | |||||
# A negative value will disable adding a -j flag to make, allowing to use a parent | |||||
# make -j value. This is useful to call OpenBLAS make from an other project | |||||
# makefile | |||||
# MAKE_NB_JOBS = 2 | |||||
# If you would like to know minute performance report of GotoBLAS. | # If you would like to know minute performance report of GotoBLAS. | ||||
# FUNCTION_PROFILE = 1 | # FUNCTION_PROFILE = 1 | ||||
@@ -138,19 +149,17 @@ NO_AFFINITY = 1 | |||||
# slow (Not implemented yet). | # slow (Not implemented yet). | ||||
# SANITY_CHECK = 1 | # SANITY_CHECK = 1 | ||||
# Run testcases in utest/ . When you enable UTEST_CHECK, it would enable | |||||
# SANITY_CHECK to compare the result with reference BLAS. | |||||
# UTEST_CHECK = 1 | |||||
# The installation directory. | # The installation directory. | ||||
# PREFIX = /opt/OpenBLAS | # PREFIX = /opt/OpenBLAS | ||||
# Common Optimization Flag; | # Common Optimization Flag; | ||||
# The default -O2 is enough. | # The default -O2 is enough. | ||||
# Flags for POWER8 are defined in Makefile.power. Don't modify COMMON_OPT | |||||
# COMMON_OPT = -O2 | # COMMON_OPT = -O2 | ||||
# gfortran option for LAPACK | # gfortran option for LAPACK | ||||
# enable this flag only on 64bit Linux and if you need a thread safe lapack library | # enable this flag only on 64bit Linux and if you need a thread safe lapack library | ||||
# Flags for POWER8 are defined in Makefile.power. Don't modify FCOMMON_OPT | |||||
# FCOMMON_OPT = -frecursive | # FCOMMON_OPT = -frecursive | ||||
# Profiling flags | # Profiling flags | ||||
@@ -159,10 +168,11 @@ COMMON_PROF = -pg | |||||
# Build Debug version | # Build Debug version | ||||
# DEBUG = 1 | # DEBUG = 1 | ||||
# Improve GEMV and GER for small matrices by stack allocation. | |||||
# For details, https://github.com/xianyi/OpenBLAS/pull/482 | |||||
# Set maximum stack allocation. | |||||
# The default value is 2048. 0 disable stack allocation a may reduce GER and GEMV | |||||
# performance. For details, https://github.com/xianyi/OpenBLAS/pull/482 | |||||
# | # | ||||
MAX_STACK_ALLOC=2048 | |||||
# MAX_STACK_ALLOC = 0 | |||||
# Add a prefix or suffix to all exported symbol names in the shared library. | # Add a prefix or suffix to all exported symbol names in the shared library. | ||||
# Avoid conflicts with other BLAS libraries, especially when using | # Avoid conflicts with other BLAS libraries, especially when using | ||||
@@ -139,6 +139,10 @@ NO_PARALLEL_MAKE=0 | |||||
endif | endif | ||||
GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) | GETARCH_FLAGS += -DNO_PARALLEL_MAKE=$(NO_PARALLEL_MAKE) | ||||
ifdef MAKE_NB_JOBS | |||||
GETARCH_FLAGS += -DMAKE_NB_JOBS=$(MAKE_NB_JOBS) | |||||
endif | |||||
ifeq ($(HOSTCC), loongcc) | ifeq ($(HOSTCC), loongcc) | ||||
GETARCH_FLAGS += -static | GETARCH_FLAGS += -static | ||||
endif | endif | ||||
@@ -155,7 +159,7 @@ ifndef GOTOBLAS_MAKEFILE | |||||
export GOTOBLAS_MAKEFILE = 1 | export GOTOBLAS_MAKEFILE = 1 | ||||
# Generating Makefile.conf and config.h | # Generating Makefile.conf and config.h | ||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) | |||||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) TARGET=$(TARGET) all) | |||||
ifndef TARGET_CORE | ifndef TARGET_CORE | ||||
include $(TOPDIR)/Makefile.conf | include $(TOPDIR)/Makefile.conf | ||||
@@ -213,7 +217,9 @@ endif | |||||
# | # | ||||
ifeq ($(OSNAME), Darwin) | ifeq ($(OSNAME), Darwin) | ||||
ifndef MACOSX_DEPLOYMENT_TARGET | |||||
export MACOSX_DEPLOYMENT_TARGET=10.6 | export MACOSX_DEPLOYMENT_TARGET=10.6 | ||||
endif | |||||
MD5SUM = md5 -r | MD5SUM = md5 -r | ||||
endif | endif | ||||
@@ -292,12 +298,14 @@ endif | |||||
ifneq ($(OSNAME), WINNT) | ifneq ($(OSNAME), WINNT) | ||||
ifneq ($(OSNAME), CYGWIN_NT) | ifneq ($(OSNAME), CYGWIN_NT) | ||||
ifneq ($(OSNAME), Interix) | ifneq ($(OSNAME), Interix) | ||||
ifneq ($(OSNAME), Android) | |||||
ifdef SMP | ifdef SMP | ||||
EXTRALIB += -lpthread | EXTRALIB += -lpthread | ||||
endif | endif | ||||
endif | endif | ||||
endif | endif | ||||
endif | endif | ||||
endif | |||||
# ifeq logical or | # ifeq logical or | ||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ||||
@@ -324,7 +332,8 @@ ifdef SANITY_CHECK | |||||
CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) | CCOMMON_OPT += -DSANITY_CHECK -DREFNAME=$(*F)f$(BU) | ||||
endif | endif | ||||
ifdef MAX_STACK_ALLOC | |||||
MAX_STACK_ALLOC ?= 2048 | |||||
ifneq ($(MAX_STACK_ALLOC), 0) | |||||
CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) | CCOMMON_OPT += -DMAX_STACK_ALLOC=$(MAX_STACK_ALLOC) | ||||
endif | endif | ||||
@@ -374,7 +383,7 @@ FCOMMON_OPT += -m128bit-long-double | |||||
endif | endif | ||||
ifeq ($(C_COMPILER), CLANG) | ifeq ($(C_COMPILER), CLANG) | ||||
EXPRECISION = 1 | EXPRECISION = 1 | ||||
CCOMMON_OPT += -DEXPRECISION | |||||
CCOMMON_OPT += -DEXPRECISION | |||||
FCOMMON_OPT += -m128bit-long-double | FCOMMON_OPT += -m128bit-long-double | ||||
endif | endif | ||||
endif | endif | ||||
@@ -388,7 +397,7 @@ endif | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
#check | |||||
#check | |||||
ifeq ($(USE_THREAD), 0) | ifeq ($(USE_THREAD), 0) | ||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | $(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | ||||
endif | endif | ||||
@@ -455,7 +464,7 @@ endif | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(ARCH), mips64) | |||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) | |||||
NO_BINARY_MODE = 1 | NO_BINARY_MODE = 1 | ||||
endif | endif | ||||
@@ -506,13 +515,16 @@ endif | |||||
ifdef NO_BINARY_MODE | ifdef NO_BINARY_MODE | ||||
ifeq ($(ARCH), mips64) | |||||
ifeq ($(ARCH), $(filter $(ARCH),mips64)) | |||||
ifdef BINARY64 | ifdef BINARY64 | ||||
CCOMMON_OPT += -mabi=64 | CCOMMON_OPT += -mabi=64 | ||||
else | else | ||||
CCOMMON_OPT += -mabi=n32 | CCOMMON_OPT += -mabi=n32 | ||||
endif | endif | ||||
BINARY_DEFINED = 1 | BINARY_DEFINED = 1 | ||||
else ifeq ($(ARCH), $(filter $(ARCH),mips)) | |||||
CCOMMON_OPT += -mabi=32 | |||||
BINARY_DEFINED = 1 | |||||
endif | endif | ||||
ifeq ($(CORE), LOONGSON3A) | ifeq ($(CORE), LOONGSON3A) | ||||
@@ -525,6 +537,21 @@ CCOMMON_OPT += -march=mips64 | |||||
FCOMMON_OPT += -march=mips64 | FCOMMON_OPT += -march=mips64 | ||||
endif | endif | ||||
ifeq ($(CORE), P5600) | |||||
CCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS) | |||||
FCOMMON_OPT += -mips32r5 -mnan=2008 -mtune=p5600 $(MSA_FLAGS) | |||||
endif | |||||
ifeq ($(CORE), I6400) | |||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) | |||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=i6400 $(MSA_FLAGS) | |||||
endif | |||||
ifeq ($(CORE), P6600) | |||||
CCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) | |||||
FCOMMON_OPT += -mips64r6 -mnan=2008 -mtune=p6600 $(MSA_FLAGS) | |||||
endif | |||||
ifeq ($(OSNAME), AIX) | ifeq ($(OSNAME), AIX) | ||||
BINARY_DEFINED = 1 | BINARY_DEFINED = 1 | ||||
endif | endif | ||||
@@ -593,12 +620,14 @@ ifneq ($(NO_LAPACK), 1) | |||||
EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
endif | endif | ||||
ifdef NO_BINARY_MODE | ifdef NO_BINARY_MODE | ||||
ifeq ($(ARCH), mips64) | |||||
ifeq ($(ARCH), $(filter $(ARCH),mips64)) | |||||
ifdef BINARY64 | ifdef BINARY64 | ||||
FCOMMON_OPT += -mabi=64 | FCOMMON_OPT += -mabi=64 | ||||
else | else | ||||
FCOMMON_OPT += -mabi=n32 | FCOMMON_OPT += -mabi=n32 | ||||
endif | endif | ||||
else ifeq ($(ARCH), $(filter $(ARCH),mips)) | |||||
FCOMMON_OPT += -mabi=32 | |||||
endif | endif | ||||
else | else | ||||
ifdef BINARY64 | ifdef BINARY64 | ||||
@@ -681,21 +710,7 @@ FCOMMON_OPT += -i8 | |||||
endif | endif | ||||
endif | endif | ||||
endif | endif | ||||
ifneq ($(ARCH), mips64) | |||||
ifndef BINARY64 | |||||
FCOMMON_OPT += -m32 | |||||
else | |||||
FCOMMON_OPT += -m64 | |||||
endif | |||||
else | |||||
ifdef BINARY64 | |||||
FCOMMON_OPT += -mabi=64 | |||||
else | |||||
FCOMMON_OPT += -mabi=n32 | |||||
endif | |||||
endif | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
FCOMMON_OPT += -mp | FCOMMON_OPT += -mp | ||||
endif | endif | ||||
@@ -711,7 +726,7 @@ endif | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(ARCH), mips64) | |||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) | |||||
ifndef BINARY64 | ifndef BINARY64 | ||||
FCOMMON_OPT += -n32 | FCOMMON_OPT += -n32 | ||||
else | else | ||||
@@ -741,7 +756,7 @@ endif | |||||
ifeq ($(C_COMPILER), OPEN64) | ifeq ($(C_COMPILER), OPEN64) | ||||
ifeq ($(ARCH), mips64) | |||||
ifeq ($(ARCH), $(filter $(ARCH),mips64 mips)) | |||||
ifndef BINARY64 | ifndef BINARY64 | ||||
CCOMMON_OPT += -n32 | CCOMMON_OPT += -n32 | ||||
else | else | ||||
@@ -963,17 +978,18 @@ ifeq ($(OSNAME), SunOS) | |||||
TAR = gtar | TAR = gtar | ||||
PATCH = gpatch | PATCH = gpatch | ||||
GREP = ggrep | GREP = ggrep | ||||
AWK = nawk | |||||
else | else | ||||
TAR = tar | TAR = tar | ||||
PATCH = patch | PATCH = patch | ||||
GREP = grep | GREP = grep | ||||
AWK = awk | |||||
endif | endif | ||||
ifndef MD5SUM | ifndef MD5SUM | ||||
MD5SUM = md5sum | MD5SUM = md5sum | ||||
endif | endif | ||||
AWK = awk | |||||
REVISION = -r$(VERSION) | REVISION = -r$(VERSION) | ||||
MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION))) | MAJOR_VERSION = $(word 1,$(subst ., ,$(VERSION))) | ||||
@@ -982,16 +998,25 @@ ifeq ($(DEBUG), 1) | |||||
COMMON_OPT += -g | COMMON_OPT += -g | ||||
endif | endif | ||||
ifeq ($(DEBUG), 1) | |||||
FCOMMON_OPT += -g | |||||
endif | |||||
ifndef COMMON_OPT | ifndef COMMON_OPT | ||||
COMMON_OPT = -O2 | COMMON_OPT = -O2 | ||||
endif | endif | ||||
ifndef FCOMMON_OPT | |||||
FCOMMON_OPT = -O2 -frecursive | |||||
endif | |||||
override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) | override CFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) | ||||
override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) | override PFLAGS += $(COMMON_OPT) $(CCOMMON_OPT) -I$(TOPDIR) -DPROFILE $(COMMON_PROF) | ||||
override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | override FFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) | ||||
override FPFLAGS += $(COMMON_OPT) $(FCOMMON_OPT) $(COMMON_PROF) | |||||
override FPFLAGS += $(FCOMMON_OPT) $(COMMON_PROF) | |||||
#MAKEOVERRIDES = | #MAKEOVERRIDES = | ||||
#For LAPACK Fortran codes. | #For LAPACK Fortran codes. | ||||
@@ -1120,6 +1145,8 @@ export HAVE_VFP | |||||
export HAVE_VFPV3 | export HAVE_VFPV3 | ||||
export HAVE_VFPV4 | export HAVE_VFPV4 | ||||
export HAVE_NEON | export HAVE_NEON | ||||
export HAVE_MSA | |||||
export MSA_FLAGS | |||||
export KERNELDIR | export KERNELDIR | ||||
export FUNCTION_PROFILE | export FUNCTION_PROFILE | ||||
export TARGET_CORE | export TARGET_CORE | ||||
@@ -1181,4 +1208,3 @@ SUNPATH = /opt/sunstudio12.1 | |||||
else | else | ||||
SUNPATH = /opt/SUNWspro | SUNPATH = /opt/SUNWspro | ||||
endif | endif | ||||
@@ -0,0 +1,6 @@ | |||||
ifeq ($(CORE), Z13) | |||||
CCOMMON_OPT += -march=z13 -mzvector | |||||
FCOMMON_OPT += -march=z13 -mzvector | |||||
endif | |||||
@@ -43,6 +43,35 @@ On X86 box, compile this library for loongson3a CPU with loongcc (based on Open6 | |||||
make DEBUG=1 | make DEBUG=1 | ||||
### Compile with MASS Support on Power CPU (Optional dependency) | |||||
[IBM MASS](http://www-01.ibm.com/software/awdtools/mass/linux/mass-linux.html) library consists of a set of mathematical functions for C, C++, and | |||||
Fortran-language applications that are tuned for optimum performance on POWER architectures. OpenBLAS with MASS requires 64-bit, little-endian OS on POWER. | |||||
The library can be installed as below - | |||||
* On Ubuntu: | |||||
wget -q http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/public.gpg -O- | sudo apt-key add - | |||||
echo "deb http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/ubuntu/ trusty main" | sudo tee /etc/apt/sources.list.d/ibm-xl-compiler-eval.list | |||||
sudo apt-get update | |||||
sudo apt-get install libxlmass-devel.8.1.3 | |||||
* On RHEL/CentOS: | |||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/repodata/repomd.xml.key | |||||
sudo rpm --import repomd.xml.key | |||||
wget http://public.dhe.ibm.com/software/server/POWER/Linux/xl-compiler/eval/ppc64le/rhel7/ibm-xl-compiler-eval.repo | |||||
sudo cp ibm-xl-compiler-eval.repo /etc/yum.repos.d/ | |||||
sudo yum install libxlmass-devel.8.1.3 | |||||
After installing MASS library, compile openblas with USE_MASS=1. | |||||
Example: | |||||
Compiling on Power8 with MASS support - | |||||
make USE_MASS=1 TARGET=POWER8 | |||||
### Install to the directory (optional) | ### Install to the directory (optional) | ||||
Example: | Example: | ||||
@@ -75,12 +104,18 @@ Please read GotoBLAS_01Readme.txt | |||||
#### ARM64: | #### ARM64: | ||||
- **ARMV8**: Experimental | - **ARMV8**: Experimental | ||||
- **ARM Cortex-A57**: Experimental | |||||
#### IBM zEnterprise System: | |||||
- **Z13**: blas3 for double | |||||
### Support OS: | ### Support OS: | ||||
- **GNU/Linux** | - **GNU/Linux** | ||||
- **MingWin/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. | |||||
- **MingWin or Visual Studio(CMake)/Windows**: Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-use-OpenBLAS-in-Microsoft-Visual-Studio>. | |||||
- **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X. | - **Darwin/Mac OS X**: Experimental. Although GotoBLAS2 supports Darwin, we are the beginner on Mac OS X. | ||||
- **FreeBSD**: Supported by community. We didn't test the library on this OS. | - **FreeBSD**: Supported by community. We didn't test the library on this OS. | ||||
- **Android**: Supported by community. Please read <https://github.com/xianyi/OpenBLAS/wiki/How-to-build-OpenBLAS-for-Android>. | |||||
## Usages | ## Usages | ||||
Link with libopenblas.a or -lopenblas for shared library. | Link with libopenblas.a or -lopenblas for shared library. | ||||
@@ -53,26 +53,34 @@ PPC440 | |||||
PPC440FP2 | PPC440FP2 | ||||
CELL | CELL | ||||
3.MIPS64 CPU: | |||||
3.MIPS CPU: | |||||
P5600 | |||||
4.MIPS64 CPU: | |||||
SICORTEX | SICORTEX | ||||
LOONGSON3A | LOONGSON3A | ||||
LOONGSON3B | LOONGSON3B | ||||
I6400 | |||||
P6600 | |||||
4.IA64 CPU: | |||||
5.IA64 CPU: | |||||
ITANIUM2 | ITANIUM2 | ||||
5.SPARC CPU: | |||||
6.SPARC CPU: | |||||
SPARC | SPARC | ||||
SPARCV7 | SPARCV7 | ||||
6.ARM CPU: | |||||
7.ARM CPU: | |||||
CORTEXA15 | CORTEXA15 | ||||
CORTEXA9 | CORTEXA9 | ||||
ARMV7 | ARMV7 | ||||
ARMV6 | ARMV6 | ||||
ARMV5 | ARMV5 | ||||
7.ARM 64-bit CPU: | |||||
8.ARM 64-bit CPU: | |||||
ARMV8 | ARMV8 | ||||
CORTEXA57 | CORTEXA57 | ||||
VULCAN | |||||
THUNDERX | |||||
THUNDERX2T99 | |||||
@@ -0,0 +1,199 @@ | |||||
# Notes on OpenBLAS usage | |||||
## Usage | |||||
#### Program is Terminated. Because you tried to allocate too many memory regions | |||||
In OpenBLAS, we mange a pool of memory buffers and allocate the number of | |||||
buffers as the following. | |||||
``` | |||||
#define NUM_BUFFERS (MAX_CPU_NUMBER * 2) | |||||
``` | |||||
This error indicates that the program exceeded the number of buffers. | |||||
Please build OpenBLAS with larger `NUM_THREADS`. For example, `make | |||||
NUM_THREADS=32` or `make NUM_THREADS=64`. In `Makefile.system`, we will set | |||||
`MAX_CPU_NUMBER=NUM_THREADS`. | |||||
#### How can I use OpenBLAS in multi-threaded applications? | |||||
If your application is already multi-threaded, it will conflict with OpenBLAS | |||||
multi-threading. Thus, you must set OpenBLAS to use single thread in any of the | |||||
following ways: | |||||
* `export OPENBLAS_NUM_THREADS=1` in the environment variables. | |||||
* Call `openblas_set_num_threads(1)` in the application on runtime. | |||||
* Build OpenBLAS single thread version, e.g. `make USE_THREAD=0` | |||||
If the application is parallelized by OpenMP, please use OpenBLAS built with | |||||
`USE_OPENMP=1` | |||||
#### How to choose TARGET manually at runtime when compiled with DYNAMIC_ARCH | |||||
The environment variable which control the kernel selection is | |||||
`OPENBLAS_CORETYPE` (see `driver/others/dynamic.c`) e.g. `export | |||||
OPENBLAS_CORETYPE=Haswell` and the function `char* openblas_get_corename()` | |||||
returns the used target. | |||||
#### How could I disable OpenBLAS threading affinity on runtime? | |||||
You can define the `OPENBLAS_MAIN_FREE` or `GOTOBLAS_MAIN_FREE` environment | |||||
variable to disable threading affinity on runtime. For example, before the | |||||
running, | |||||
``` | |||||
export OPENBLAS_MAIN_FREE=1 | |||||
``` | |||||
Alternatively, you can disable affinity feature with enabling `NO_AFFINITY=1` | |||||
in `Makefile.rule`. | |||||
## Linking with the library | |||||
* Link with shared library | |||||
`gcc -o test test.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas` | |||||
If the library is multithreaded, please add `-lpthread`. If the library | |||||
contains LAPACK functions, please add `-lgfortran` or other Fortran libs. | |||||
* Link with static library | |||||
`gcc -o test test.c /your/path/libopenblas.a` | |||||
You can download `test.c` from https://gist.github.com/xianyi/5780018 | |||||
On Linux, if OpenBLAS was compiled with threading support (`USE_THREAD=1` by | |||||
default), custom programs statically linked against `libopenblas.a` should also | |||||
link with the pthread library e.g.: | |||||
``` | |||||
gcc -static -I/opt/OpenBLAS/include -L/opt/OpenBLAS/lib -o my_program my_program.c -lopenblas -lpthread | |||||
``` | |||||
Failing to add the `-lpthread` flag will cause errors such as: | |||||
``` | |||||
/opt/OpenBLAS/libopenblas.a(memory.o): In function `_touch_memory': | |||||
memory.c:(.text+0x15): undefined reference to `pthread_mutex_lock' | |||||
memory.c:(.text+0x41): undefined reference to `pthread_mutex_unlock' | |||||
... | |||||
``` | |||||
## Code examples | |||||
#### Call CBLAS interface | |||||
This example shows calling cblas_dgemm in C. https://gist.github.com/xianyi/6930656 | |||||
``` | |||||
#include <cblas.h> | |||||
#include <stdio.h> | |||||
void main() | |||||
{ | |||||
int i=0; | |||||
double A[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||||
double B[6] = {1.0,2.0,1.0,-3.0,4.0,-1.0}; | |||||
double C[9] = {.5,.5,.5,.5,.5,.5,.5,.5,.5}; | |||||
cblas_dgemm(CblasColMajor, CblasNoTrans, CblasTrans,3,3,2,1,A, 3, B, 3,2,C,3); | |||||
for(i=0; i<9; i++) | |||||
printf("%lf ", C[i]); | |||||
printf("\n"); | |||||
} | |||||
``` | |||||
`gcc -o test_cblas_open test_cblas_dgemm.c -I /your_path/OpenBLAS/include/ -L/your_path/OpenBLAS/lib -lopenblas -lpthread -lgfortran` | |||||
#### Call BLAS Fortran interface | |||||
This example shows calling dgemm Fortran interface in C. https://gist.github.com/xianyi/5780018 | |||||
``` | |||||
#include "stdio.h" | |||||
#include "stdlib.h" | |||||
#include "sys/time.h" | |||||
#include "time.h" | |||||
extern void dgemm_(char*, char*, int*, int*,int*, double*, double*, int*, double*, int*, double*, double*, int*); | |||||
int main(int argc, char* argv[]) | |||||
{ | |||||
int i; | |||||
printf("test!\n"); | |||||
if(argc<4){ | |||||
printf("Input Error\n"); | |||||
return 1; | |||||
} | |||||
int m = atoi(argv[1]); | |||||
int n = atoi(argv[2]); | |||||
int k = atoi(argv[3]); | |||||
int sizeofa = m * k; | |||||
int sizeofb = k * n; | |||||
int sizeofc = m * n; | |||||
char ta = 'N'; | |||||
char tb = 'N'; | |||||
double alpha = 1.2; | |||||
double beta = 0.001; | |||||
struct timeval start,finish; | |||||
double duration; | |||||
double* A = (double*)malloc(sizeof(double) * sizeofa); | |||||
double* B = (double*)malloc(sizeof(double) * sizeofb); | |||||
double* C = (double*)malloc(sizeof(double) * sizeofc); | |||||
srand((unsigned)time(NULL)); | |||||
for (i=0; i<sizeofa; i++) | |||||
A[i] = i%3+1;//(rand()%100)/10.0; | |||||
for (i=0; i<sizeofb; i++) | |||||
B[i] = i%3+1;//(rand()%100)/10.0; | |||||
for (i=0; i<sizeofc; i++) | |||||
C[i] = i%3+1;//(rand()%100)/10.0; | |||||
//#if 0 | |||||
printf("m=%d,n=%d,k=%d,alpha=%lf,beta=%lf,sizeofc=%d\n",m,n,k,alpha,beta,sizeofc); | |||||
gettimeofday(&start, NULL); | |||||
dgemm_(&ta, &tb, &m, &n, &k, &alpha, A, &m, B, &k, &beta, C, &m); | |||||
gettimeofday(&finish, NULL); | |||||
duration = ((double)(finish.tv_sec-start.tv_sec)*1000000 + (double)(finish.tv_usec-start.tv_usec)) / 1000000; | |||||
double gflops = 2.0 * m *n*k; | |||||
gflops = gflops/duration*1.0e-6; | |||||
FILE *fp; | |||||
fp = fopen("timeDGEMM.txt", "a"); | |||||
fprintf(fp, "%dx%dx%d\t%lf s\t%lf MFLOPS\n", m, n, k, duration, gflops); | |||||
fclose(fp); | |||||
free(A); | |||||
free(B); | |||||
free(C); | |||||
return 0; | |||||
} | |||||
``` | |||||
` gcc -o time_dgemm time_dgemm.c /your/path/libopenblas.a` | |||||
` ./time_dgemm <m> <n> <k> ` | |||||
## Troubleshooting | |||||
* Please read [Faq](https://github.com/xianyi/OpenBLAS/wiki/Faq) at first. | |||||
* Please use gcc version 4.6 and above to compile Sandy Bridge AVX kernels on Linux/MingW/BSD. | |||||
* Please use Clang version 3.1 and above to compile the library on Sandy Bridge microarchitecture. The Clang 3.0 will generate the wrong AVX binary code. | |||||
* The number of CPUs/Cores should less than or equal to 256. On Linux x86_64(amd64), there is experimental support for up to 1024 CPUs/Cores and 128 numa nodes if you build the library with BIGNUMA=1. | |||||
* OpenBLAS does not set processor affinity by default. On Linux, you can enable processor affinity by commenting the line NO_AFFINITY=1 in Makefile.rule. But this may cause [the conflict with R parallel](https://stat.ethz.ch/pipermail/r-sig-hpc/2012-April/001348.html). | |||||
* On Loongson 3A. make test would be failed because of pthread_create error. The error code is EAGAIN. However, it will be OK when you run the same testcase on shell. | |||||
## BLAS reference manual | |||||
If you want to understand every BLAS function and definition, please read | |||||
[Intel MKL reference manual](https://software.intel.com/sites/products/documentation/doclib/iss/2013/mkl/mklman/GUID-F7ED9FB8-6663-4F44-A62B-61B63C4F0491.htm) | |||||
or [netlib.org](http://netlib.org/blas/) | |||||
Here are [OpenBLAS extension functions](https://github.com/xianyi/OpenBLAS/wiki/OpenBLAS-Extensions) | |||||
## How to reference OpenBLAS. | |||||
You can reference our [papers](https://github.com/xianyi/OpenBLAS/wiki/publications). | |||||
Alternatively, you can cite the OpenBLAS homepage http://www.openblas.net directly. | |||||
@@ -1,4 +1,4 @@ | |||||
version: 0.2.15.{build} | |||||
version: 0.2.19.{build} | |||||
#environment: | #environment: | ||||
@@ -39,4 +39,6 @@ before_build: | |||||
- cmake -G "Visual Studio 12 Win64" . | - cmake -G "Visual Studio 12 Win64" . | ||||
test_script: | test_script: | ||||
- echo Build OK! | |||||
- echo Running Test | |||||
- cd c:\projects\OpenBLAS\utest | |||||
- openblas_utest |
@@ -33,6 +33,22 @@ LIBMKL = -L$(MKL) -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -lgomp -lpthread | |||||
# Apple vecLib | # Apple vecLib | ||||
LIBVECLIB = -framework Accelerate | LIBVECLIB = -framework Accelerate | ||||
ESSL=/opt/ibm/lib | |||||
#LIBESSL = -lesslsmp $(ESSL)/libxlomp_ser.so.1 $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a | |||||
LIBESSL = -lesslsmp $(ESSL)/libxlf90_r.so.1 $(ESSL)/libxlfmath.so.1 $(ESSL)/libxlsmp.so.1 /opt/ibm/xlC/13.1.3/lib/libxl.a | |||||
ifneq ($(NO_LAPACK), 1) | |||||
GOTO_LAPACK_TARGETS=slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | |||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \ | |||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ | |||||
csymv.goto zsymv.goto \ | |||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ | |||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto | |||||
else | |||||
GOTO_LAPACK_TARGETS= | |||||
endif | |||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | ||||
@@ -44,6 +60,7 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | ||||
sger.goto dger.goto cger.goto zger.goto \ | sger.goto dger.goto cger.goto zger.goto \ | ||||
sdot.goto ddot.goto \ | sdot.goto ddot.goto \ | ||||
srot.goto drot.goto \ | |||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ | saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ | ||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \ | scopy.goto dcopy.goto ccopy.goto zcopy.goto \ | ||||
sswap.goto dswap.goto cswap.goto zswap.goto \ | sswap.goto dswap.goto cswap.goto zswap.goto \ | ||||
@@ -142,31 +159,29 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||||
else | else | ||||
goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | |||||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | |||||
goto :: sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | |||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | ||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ | strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ | ||||
ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ | ssyrk.goto dsyrk.goto csyrk.goto zsyrk.goto \ | ||||
ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | ssyr2k.goto dsyr2k.goto csyr2k.goto zsyr2k.goto \ | ||||
sger.goto dger.goto cger.goto zger.goto \ | sger.goto dger.goto cger.goto zger.goto \ | ||||
sdot.goto ddot.goto cdot.goto zdot.goto \ | sdot.goto ddot.goto cdot.goto zdot.goto \ | ||||
srot.goto drot.goto \ | |||||
saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ | saxpy.goto daxpy.goto caxpy.goto zaxpy.goto \ | ||||
scopy.goto dcopy.goto ccopy.goto zcopy.goto \ | scopy.goto dcopy.goto ccopy.goto zcopy.goto \ | ||||
sswap.goto dswap.goto cswap.goto zswap.goto \ | sswap.goto dswap.goto cswap.goto zswap.goto \ | ||||
sscal.goto dscal.goto cscal.goto zscal.goto \ | sscal.goto dscal.goto cscal.goto zscal.goto \ | ||||
sasum.goto dasum.goto casum.goto zasum.goto \ | sasum.goto dasum.goto casum.goto zasum.goto \ | ||||
ssymv.goto dsymv.goto csymv.goto zsymv.goto \ | |||||
ssymv.goto dsymv.goto \ | |||||
chemv.goto zhemv.goto \ | chemv.goto zhemv.goto \ | ||||
chemm.goto zhemm.goto \ | chemm.goto zhemm.goto \ | ||||
cherk.goto zherk.goto \ | cherk.goto zherk.goto \ | ||||
cher2k.goto zher2k.goto \ | cher2k.goto zher2k.goto \ | ||||
sgemv.goto dgemv.goto cgemv.goto zgemv.goto \ | sgemv.goto dgemv.goto cgemv.goto zgemv.goto \ | ||||
sgesv.goto dgesv.goto cgesv.goto zgesv.goto \ | |||||
sgeev.goto dgeev.goto cgeev.goto zgeev.goto \ | |||||
sgetri.goto dgetri.goto cgetri.goto zgetri.goto \ | |||||
spotrf.goto dpotrf.goto cpotrf.goto zpotrf.goto \ | |||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto | |||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto \ | |||||
smallscaling \ | |||||
isamax.goto idamax.goto icamax.goto izamax.goto \ | |||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto $(GOTO_LAPACK_TARGETS) | |||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | ||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | ||||
@@ -219,7 +234,9 @@ atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | |||||
sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \ | sgesv.atlas dgesv.atlas cgesv.atlas zgesv.atlas \ | ||||
sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \ | sgetri.atlas dgetri.atlas cgetri.atlas zgetri.atlas \ | ||||
spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \ | spotrf.atlas dpotrf.atlas cpotrf.atlas zpotrf.atlas \ | ||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | |||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas \ | |||||
isamax.atlas idamax.atlas icamax.atlas izamax.atlas \ | |||||
snrm2.goto dnrm2.goto scnrm2.goto dznrm2.goto | |||||
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | ||||
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | ||||
@@ -252,7 +269,11 @@ mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | |||||
endif | endif | ||||
essl :: sgemm.essl strmm.essl dgemm.essl dtrmm.essl \ | |||||
cgemm.essl ctrmm.essl zgemm.essl ztrmm.essl \ | |||||
slinpack.essl clinpack.essl dlinpack.essl zlinpack.essl \ | |||||
scholesky.essl ccholesky.essl dcholesky.essl zcholesky.essl \ | |||||
strsm.essl dtrsm.essl ctrsm.essl ztrsm.essl | |||||
veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \ | veclib :: slinpack.veclib dlinpack.veclib clinpack.veclib zlinpack.veclib \ | ||||
scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \ | scholesky.veclib dcholesky.veclib ccholesky.veclib zcholesky.veclib \ | ||||
@@ -305,6 +326,9 @@ slinpack.mkl : slinpack.$(SUFFIX) | |||||
slinpack.veclib : slinpack.$(SUFFIX) | slinpack.veclib : slinpack.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
slinpack.essl : slinpack.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dlinpack #################################################### | ##################################### Dlinpack #################################################### | ||||
dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) | dlinpack.goto : dlinpack.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -321,6 +345,9 @@ dlinpack.mkl : dlinpack.$(SUFFIX) | |||||
dlinpack.veclib : dlinpack.$(SUFFIX) | dlinpack.veclib : dlinpack.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
dlinpack.essl : dlinpack.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Clinpack #################################################### | ##################################### Clinpack #################################################### | ||||
clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) | clinpack.goto : clinpack.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -338,6 +365,9 @@ clinpack.mkl : clinpack.$(SUFFIX) | |||||
clinpack.veclib : clinpack.$(SUFFIX) | clinpack.veclib : clinpack.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
clinpack.essl : clinpack.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Zlinpack #################################################### | ##################################### Zlinpack #################################################### | ||||
zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) | zlinpack.goto : zlinpack.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -355,6 +385,9 @@ zlinpack.mkl : zlinpack.$(SUFFIX) | |||||
zlinpack.veclib : zlinpack.$(SUFFIX) | zlinpack.veclib : zlinpack.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
zlinpack.essl : zlinpack.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Scholesky ################################################### | ##################################### Scholesky ################################################### | ||||
scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) | scholesky.goto : scholesky.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -372,6 +405,9 @@ scholesky.mkl : scholesky.$(SUFFIX) | |||||
scholesky.veclib : scholesky.$(SUFFIX) | scholesky.veclib : scholesky.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
scholesky.essl : scholesky.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dcholesky ################################################### | ##################################### Dcholesky ################################################### | ||||
dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) | dcholesky.goto : dcholesky.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -389,6 +425,9 @@ dcholesky.mkl : dcholesky.$(SUFFIX) | |||||
dcholesky.veclib : dcholesky.$(SUFFIX) | dcholesky.veclib : dcholesky.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
dcholesky.essl : dcholesky.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ccholesky ################################################### | ##################################### Ccholesky ################################################### | ||||
ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) | ccholesky.goto : ccholesky.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -406,6 +445,9 @@ ccholesky.mkl : ccholesky.$(SUFFIX) | |||||
ccholesky.veclib : ccholesky.$(SUFFIX) | ccholesky.veclib : ccholesky.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
ccholesky.essl : ccholesky.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Zcholesky ################################################### | ##################################### Zcholesky ################################################### | ||||
@@ -424,6 +466,9 @@ zcholesky.mkl : zcholesky.$(SUFFIX) | |||||
zcholesky.veclib : zcholesky.$(SUFFIX) | zcholesky.veclib : zcholesky.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
zcholesky.essl : zcholesky.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Sgemm #################################################### | ##################################### Sgemm #################################################### | ||||
sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) | sgemm.goto : sgemm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -440,6 +485,9 @@ sgemm.mkl : sgemm.$(SUFFIX) | |||||
sgemm.veclib : sgemm.$(SUFFIX) | sgemm.veclib : sgemm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
sgemm.essl : sgemm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dgemm #################################################### | ##################################### Dgemm #################################################### | ||||
dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) | dgemm.goto : dgemm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -456,6 +504,9 @@ dgemm.mkl : dgemm.$(SUFFIX) | |||||
dgemm.veclib : dgemm.$(SUFFIX) | dgemm.veclib : dgemm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
dgemm.essl : dgemm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Cgemm #################################################### | ##################################### Cgemm #################################################### | ||||
cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) | cgemm.goto : cgemm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -473,6 +524,9 @@ cgemm.mkl : cgemm.$(SUFFIX) | |||||
cgemm.veclib : cgemm.$(SUFFIX) | cgemm.veclib : cgemm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
cgemm.essl : cgemm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Zgemm #################################################### | ##################################### Zgemm #################################################### | ||||
zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) | zgemm.goto : zgemm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -490,6 +544,9 @@ zgemm.mkl : zgemm.$(SUFFIX) | |||||
zgemm.veclib : zgemm.$(SUFFIX) | zgemm.veclib : zgemm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
zgemm.essl : zgemm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ssymm #################################################### | ##################################### Ssymm #################################################### | ||||
ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) | ssymm.goto : ssymm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -572,6 +629,9 @@ strmm.mkl : strmm.$(SUFFIX) | |||||
strmm.veclib : strmm.$(SUFFIX) | strmm.veclib : strmm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
strmm.essl : strmm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dtrmm #################################################### | ##################################### Dtrmm #################################################### | ||||
dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME) | dtrmm.goto : dtrmm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -588,6 +648,9 @@ dtrmm.mkl : dtrmm.$(SUFFIX) | |||||
dtrmm.veclib : dtrmm.$(SUFFIX) | dtrmm.veclib : dtrmm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
dtrmm.essl : dtrmm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ctrmm #################################################### | ##################################### Ctrmm #################################################### | ||||
ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME) | ctrmm.goto : ctrmm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -605,6 +668,9 @@ ctrmm.mkl : ctrmm.$(SUFFIX) | |||||
ctrmm.veclib : ctrmm.$(SUFFIX) | ctrmm.veclib : ctrmm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
ctrmm.essl : ctrmm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ztrmm #################################################### | ##################################### Ztrmm #################################################### | ||||
ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME) | ztrmm.goto : ztrmm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -622,6 +688,9 @@ ztrmm.mkl : ztrmm.$(SUFFIX) | |||||
ztrmm.veclib : ztrmm.$(SUFFIX) | ztrmm.veclib : ztrmm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
ztrmm.essl : ztrmm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Strsm #################################################### | ##################################### Strsm #################################################### | ||||
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) | strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -638,6 +707,9 @@ strsm.mkl : strsm.$(SUFFIX) | |||||
strsm.veclib : strsm.$(SUFFIX) | strsm.veclib : strsm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
strsm.essl : strsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dtrsm #################################################### | ##################################### Dtrsm #################################################### | ||||
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) | dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -654,6 +726,9 @@ dtrsm.mkl : dtrsm.$(SUFFIX) | |||||
dtrsm.veclib : dtrsm.$(SUFFIX) | dtrsm.veclib : dtrsm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
dtrsm.essl : dtrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ctrsm #################################################### | ##################################### Ctrsm #################################################### | ||||
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) | ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -671,6 +746,9 @@ ctrsm.mkl : ctrsm.$(SUFFIX) | |||||
ctrsm.veclib : ctrsm.$(SUFFIX) | ctrsm.veclib : ctrsm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
ctrsm.essl : ctrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ztrsm #################################################### | ##################################### Ztrsm #################################################### | ||||
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) | ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) | ||||
@@ -688,6 +766,9 @@ ztrsm.mkl : ztrsm.$(SUFFIX) | |||||
ztrsm.veclib : ztrsm.$(SUFFIX) | ztrsm.veclib : ztrsm.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
ztrsm.essl : ztrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBESSL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ssyrk #################################################### | ##################################### Ssyrk #################################################### | ||||
ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME) | ssyrk.goto : ssyrk.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -1412,6 +1493,39 @@ zdot.mkl : zdot-intel.$(SUFFIX) | |||||
zdot.veclib : zdot-intel.$(SUFFIX) | zdot.veclib : zdot-intel.$(SUFFIX) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | $(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
##################################### Srot #################################################### | |||||
srot.goto : srot.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
srot.acml : srot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
srot.atlas : srot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
srot.mkl : srot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
srot.veclib : srot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Drot #################################################### | |||||
drot.goto : drot.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
drot.acml : drot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
drot.atlas : drot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
drot.mkl : drot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
drot.veclib : drot.$(SUFFIX) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Saxpy #################################################### | ##################################### Saxpy #################################################### | ||||
saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME) | saxpy.goto : saxpy.$(SUFFIX) ../$(LIBNAME) | ||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | $(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | ||||
@@ -1833,6 +1947,63 @@ zgemm3m.mkl : zgemm3m.$(SUFFIX) | |||||
zgemm3m.veclib : zgemm3m.$(SUFFIX) | zgemm3m.veclib : zgemm3m.$(SUFFIX) | ||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBVECLIB) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
############################################## ISAMAX ############################################## | |||||
isamax.goto : isamax.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
isamax.atlas : isamax.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## IDAMAX ############################################## | |||||
idamax.goto : idamax.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
idamax.atlas : idamax.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## ICAMAX ############################################## | |||||
icamax.goto : icamax.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
icamax.atlas : icamax.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## IZAMAX ############################################## | |||||
izamax.goto : izamax.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
izamax.atlas : izamax.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## SNRM2 ############################################## | |||||
snrm2.goto : snrm2.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
snrm2.atlas : snrm2.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## DNRM2 ############################################## | |||||
dnrm2.goto : dnrm2.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
dnrm2.atlas : dnrm2.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## Sscnrm2 ############################################## | |||||
scnrm2.goto : scnrm2.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
scnrm2.atlas : scnrm2.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
############################################## Ddznrm2 ############################################## | |||||
dznrm2.goto : dznrm2.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) -lm | |||||
dznrm2.atlas : dznrm2.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
################################################################################################### | ################################################################################################### | ||||
slinpack.$(SUFFIX) : linpack.c | slinpack.$(SUFFIX) : linpack.c | ||||
@@ -2123,6 +2294,13 @@ cgesv.$(SUFFIX) : gesv.c | |||||
zgesv.$(SUFFIX) : gesv.c | zgesv.$(SUFFIX) : gesv.c | ||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
srot.$(SUFFIX) : rot.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
drot.$(SUFFIX) : rot.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
@@ -2133,8 +2311,37 @@ zgemm3m.$(SUFFIX) : gemm3m.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
isamax.$(SUFFIX) : iamax.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
idamax.$(SUFFIX) : iamax.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
icamax.$(SUFFIX) : iamax.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
izamax.$(SUFFIX) : iamax.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
snrm2.$(SUFFIX) : nrm2.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
dnrm2.$(SUFFIX) : nrm2.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
scnrm2.$(SUFFIX) : nrm2.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
dznrm2.$(SUFFIX) : nrm2.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
smallscaling: smallscaling.c ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(EXTRALIB) -fopenmp -lm -lpthread | |||||
clean :: | clean :: | ||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib | |||||
@rm -f *.goto *.mkl *.acml *.atlas *.veclib *.essl smallscaling | |||||
include $(TOPDIR)/Makefile.tail | include $(TOPDIR)/Makefile.tail | ||||
@@ -183,9 +183,9 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
#ifdef COMPLEX | #ifdef COMPLEX | ||||
fprintf(stderr, " %10.2f MFlops\n", 4. * (double)m / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); | |||||
#else | #else | ||||
fprintf(stderr, " %10.2f MFlops\n", 2. * (double)m / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); | |||||
#endif | #endif | ||||
} | } | ||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||||
} | } | ||||
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MBytes\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); | |||||
" %10.2f MBytes %10.6f sec\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
} | } | ||||
@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||||
} | } | ||||
@@ -221,7 +221,7 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg); | |||||
} | } | ||||
} | } | ||||
@@ -258,7 +258,7 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, " %10.2f MFlops\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", COMPSIZE * COMPSIZE * 2. * (double)m * (double)n / timeg * 1.e-6, timeg); | |||||
} | } | ||||
} | } | ||||
@@ -0,0 +1,192 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef IAMAX | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define IAMAX BLASFUNC(izamax) | |||||
#else | |||||
#define IAMAX BLASFUNC(icamax) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define IAMAX BLASFUNC(idamax) | |||||
#else | |||||
#define IAMAX BLASFUNC(isamax) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x; | |||||
blasint m, i; | |||||
blasint inc_x=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
IAMAX (&m, x, &inc_x); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -0,0 +1,193 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef NRM2 | |||||
#ifdef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define NRM2 BLASFUNC(dznrm2) | |||||
#else | |||||
#define NRM2 BLASFUNC(scnrm2) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define NRM2 BLASFUNC(dnrm2) | |||||
#else | |||||
#define NRM2 BLASFUNC(snrm2) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x; | |||||
blasint m, i; | |||||
blasint inc_x=1; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
NRM2 (&m, x, &inc_x); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -0,0 +1,197 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef DOT | |||||
#ifdef DOUBLE | |||||
#define ROT BLASFUNC(drot) | |||||
#else | |||||
#define ROT BLASFUNC(srot) | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int main(int argc, char *argv[]){ | |||||
FLOAT *x, *y; | |||||
// FLOAT result; | |||||
blasint m, i; | |||||
blasint inc_x=1,inc_y=1; | |||||
FLOAT c[1] = { 2.0 }; | |||||
FLOAT s[1] = { 2.0 }; | |||||
int loops = 1; | |||||
int l; | |||||
char *p; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1,timeg; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
if ((p = getenv("OPENBLAS_INCY"))) inc_y = atoi(p); | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Inc_y = %d Loops = %d\n", from, to, step,inc_x,inc_y,loops); | |||||
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( y = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_y) * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
timeg=0; | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for (l=0; l<loops; l++) | |||||
{ | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
ROT (&m, x, &inc_x, y, &inc_y, c, s); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
timeg += time1; | |||||
} | |||||
timeg /= loops; | |||||
fprintf(stderr, | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 6. * (double)m / timeg * 1.e-6, timeg); | |||||
} | |||||
return 0; | |||||
} | |||||
// void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |
@@ -189,9 +189,9 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
#ifdef COMPLEX | #ifdef COMPLEX | ||||
fprintf(stderr, " %10.2f MFlops\n", 6. * (double)m / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 6. * (double)m / timeg * 1.e-6, timeg); | |||||
#else | #else | ||||
fprintf(stderr, " %10.2f MFlops\n", 1. * (double)m / timeg * 1.e-6); | |||||
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 1. * (double)m / timeg * 1.e-6, timeg); | |||||
#endif | #endif | ||||
} | } | ||||
@@ -2,61 +2,54 @@ | |||||
argv <- commandArgs(trailingOnly = TRUE) | argv <- commandArgs(trailingOnly = TRUE) | ||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
nfrom <- 128 | |||||
nto <- 2048 | |||||
nstep <- 128 | |||||
loops <- 1 | |||||
if (length(argv) > 0) { | |||||
for (z in 1:length(argv)) { | |||||
if (z == 1) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if (z == 2) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if (z == 3) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if (z == 4) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | } | ||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
p <- Sys.getenv("OPENBLAS_LOOPS") | |||||
if (p != "") { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf( | |||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n", | |||||
nfrom, | |||||
nto, | |||||
nstep, | |||||
loops | |||||
)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | cat(sprintf(" SIZE Flops Time\n")) | ||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
while ( l <= loops ) { | |||||
n <- nfrom | |||||
while (n <= nto) { | |||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n) | |||||
ev <- 0 | |||||
z <- system.time(for (l in 1:loops) { | |||||
ev <- eigen(A) | |||||
}) | |||||
ev <- eigen(A) | |||||
l = l + 1 | |||||
} | |||||
mflops <- (26.66 * n * n * n) * loops / (z[3] * 1.0e6) | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = (26.66 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st <- sprintf("%.0fx%.0f :", n, n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3])) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
n <- n + nstep | |||||
} | } | ||||
@@ -2,62 +2,63 @@ | |||||
argv <- commandArgs(trailingOnly = TRUE) | argv <- commandArgs(trailingOnly = TRUE) | ||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
nfrom <- 128 | |||||
nto <- 2048 | |||||
nstep <- 128 | |||||
loops <- 1 | |||||
if (length(argv) > 0) { | |||||
for (z in 1:length(argv)) { | |||||
if (z == 1) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if (z == 2) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if (z == 3) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if (z == 4) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | } | ||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
p <- Sys.getenv("OPENBLAS_LOOPS") | |||||
if (p != "") { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf( | |||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n", | |||||
nfrom, | |||||
nto, | |||||
nstep, | |||||
loops | |||||
)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | cat(sprintf(" SIZE Flops Time\n")) | ||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
n <- nfrom | |||||
while (n <= nto) { | |||||
A <- matrix(runif(n * n), | |||||
ncol = n, | |||||
nrow = n, | |||||
byrow = TRUE) | |||||
B <- matrix(runif(n * n), | |||||
ncol = n, | |||||
nrow = n, | |||||
byrow = TRUE) | |||||
C <- 1 | |||||
while ( l <= loops ) { | |||||
z <- system.time(for (l in 1:loops) { | |||||
C <- A %*% B | |||||
l <- l + 1 | |||||
}) | |||||
C <- A %*% B | |||||
l = l + 1 | |||||
} | |||||
mflops <- (2.0 * n * n * n) * loops / (z[3] * 1.0e6) | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = ( 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st <- sprintf("%.0fx%.0f :", n, n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3])) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
n <- n + nstep | |||||
} | } | ||||
@@ -2,62 +2,56 @@ | |||||
argv <- commandArgs(trailingOnly = TRUE) | argv <- commandArgs(trailingOnly = TRUE) | ||||
nfrom = 128 | |||||
nto = 2048 | |||||
nstep = 128 | |||||
loops = 1 | |||||
if ( length(argv) > 0 ) { | |||||
for ( z in 1:length(argv) ) { | |||||
if ( z == 1 ) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if ( z==2 ) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if ( z==3 ) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if ( z==4 ) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
nfrom <- 128 | |||||
nto <- 2048 | |||||
nstep <- 128 | |||||
loops <- 1 | |||||
if (length(argv) > 0) { | |||||
for (z in 1:length(argv)) { | |||||
if (z == 1) { | |||||
nfrom <- as.numeric(argv[z]) | |||||
} else if (z == 2) { | |||||
nto <- as.numeric(argv[z]) | |||||
} else if (z == 3) { | |||||
nstep <- as.numeric(argv[z]) | |||||
} else if (z == 4) { | |||||
loops <- as.numeric(argv[z]) | |||||
} | |||||
} | |||||
} | } | ||||
p=Sys.getenv("OPENBLAS_LOOPS") | |||||
if ( p != "" ) { | |||||
loops <- as.numeric(p) | |||||
} | |||||
p <- Sys.getenv("OPENBLAS_LOOPS") | |||||
if (p != "") { | |||||
loops <- as.numeric(p) | |||||
} | |||||
cat(sprintf("From %.0f To %.0f Step=%.0f Loops=%.0f\n",nfrom, nto, nstep, loops)) | |||||
cat(sprintf( | |||||
"From %.0f To %.0f Step=%.0f Loops=%.0f\n", | |||||
nfrom, | |||||
nto, | |||||
nstep, | |||||
loops | |||||
)) | |||||
cat(sprintf(" SIZE Flops Time\n")) | cat(sprintf(" SIZE Flops Time\n")) | ||||
n = nfrom | |||||
while ( n <= nto ) { | |||||
A <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
B <- matrix(runif(n*n), ncol = n, nrow = n, byrow = TRUE) | |||||
l = 1 | |||||
start <- proc.time()[3] | |||||
n <- nfrom | |||||
while (n <= nto) { | |||||
A <- matrix(rnorm(n * n), ncol = n, nrow = n) | |||||
B <- matrix(rnorm(n * n), ncol = n, nrow = n) | |||||
while ( l <= loops ) { | |||||
z <- system.time(for (l in 1:loops) { | |||||
solve(A, B) | |||||
}) | |||||
solve(A,B) | |||||
l = l + 1 | |||||
} | |||||
mflops <- | |||||
(2.0 / 3.0 * n * n * n + 2.0 * n * n * n) * loops / (z[3] * 1.0e6) | |||||
end <- proc.time()[3] | |||||
timeg = end - start | |||||
mflops = (2.0/3.0 *n*n*n + 2.0 *n*n*n ) * loops / ( timeg * 1.0e6 ) | |||||
st <- sprintf("%.0fx%.0f :", n, n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, z[3])) | |||||
st = sprintf("%.0fx%.0f :",n , n) | |||||
cat(sprintf("%20s %10.2f MFlops %10.6f sec\n", st, mflops, timeg)) | |||||
n = n + nstep | |||||
n <- n + nstep | |||||
} | } | ||||
@@ -0,0 +1,58 @@ | |||||
#!/usr/bin/env python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy import zeros | |||||
from numpy.random import randn | |||||
from scipy.linalg import blas | |||||
def run_dsyrk(N, l): | |||||
A = randn(N, N).astype('float64', order='F') | |||||
C = zeros((N, N), dtype='float64', order='F') | |||||
start = time.time() | |||||
for i in range(0, l): | |||||
blas.dsyrk(1.0, A, c=C, overwrite_c=True) | |||||
end = time.time() | |||||
timediff = (end - start) | |||||
mflops = (N * N * N) * l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N, N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff)) | |||||
if __name__ == "__main__": | |||||
N = 128 | |||||
NMAX = 2048 | |||||
NINC = 128 | |||||
LOOPS = 1 | |||||
z = 0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p) | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range(N, NMAX + NINC, NINC): | |||||
run_dsyrk(i, LOOPS) |
@@ -0,0 +1,58 @@ | |||||
#!/usr/bin/env python | |||||
import os | |||||
import sys | |||||
import time | |||||
import numpy | |||||
from numpy import zeros | |||||
from numpy.random import randn | |||||
from scipy.linalg import blas | |||||
def run_ssyrk(N, l): | |||||
A = randn(N, N).astype('float32', order='F') | |||||
C = zeros((N, N), dtype='float32', order='F') | |||||
start = time.time() | |||||
for i in range(0, l): | |||||
blas.ssyrk(1.0, A, c=C, overwrite_c=True) | |||||
end = time.time() | |||||
timediff = (end - start) | |||||
mflops = (N * N * N) * l / timediff | |||||
mflops *= 1e-6 | |||||
size = "%dx%d" % (N, N) | |||||
print("%14s :\t%20f MFlops\t%20f sec" % (size, mflops, timediff)) | |||||
if __name__ == "__main__": | |||||
N = 128 | |||||
NMAX = 2048 | |||||
NINC = 128 | |||||
LOOPS = 1 | |||||
z = 0 | |||||
for arg in sys.argv: | |||||
if z == 1: | |||||
N = int(arg) | |||||
elif z == 2: | |||||
NMAX = int(arg) | |||||
elif z == 3: | |||||
NINC = int(arg) | |||||
elif z == 4: | |||||
LOOPS = int(arg) | |||||
z = z + 1 | |||||
if 'OPENBLAS_LOOPS' in os.environ: | |||||
p = os.environ['OPENBLAS_LOOPS'] | |||||
if p: | |||||
LOOPS = int(p) | |||||
print("From: %d To: %d Step=%d Loops=%d" % (N, NMAX, NINC, LOOPS)) | |||||
print("\tSIZE\t\t\tFlops\t\t\t\t\tTime") | |||||
for i in range(N, NMAX + NINC, NINC): | |||||
run_ssyrk(i, LOOPS) |
@@ -0,0 +1,197 @@ | |||||
// run with OPENBLAS_NUM_THREADS=1 and OMP_NUM_THREADS=n | |||||
#include <math.h> | |||||
#include <stdlib.h> | |||||
#include <stdio.h> | |||||
#include <time.h> | |||||
#include <cblas.h> | |||||
#include <omp.h> | |||||
#include <pthread.h> | |||||
#define MIN_SIZE 5 | |||||
#define MAX_SIZE 60 | |||||
#define NB_SIZE 10 | |||||
// number of loop for a 1x1 matrix. Lower it if the test is | |||||
// too slow on you computer. | |||||
#define NLOOP 2e7 | |||||
typedef struct { | |||||
int matrix_size; | |||||
int n_loop; | |||||
void (* bench_func)(); | |||||
void (* blas_func)(); | |||||
void * (* create_matrix)(int size); | |||||
} BenchParam; | |||||
void * s_create_matrix(int size) { | |||||
float * r = malloc(size * sizeof(double)); | |||||
int i; | |||||
for(i = 0; i < size; i++) | |||||
r[i] = 1e3 * i / size; | |||||
return r; | |||||
} | |||||
void * c_create_matrix(int size) { | |||||
float * r = malloc(size * 2 * sizeof(double)); | |||||
int i; | |||||
for(i = 0; i < 2 * size; i++) | |||||
r[i] = 1e3 * i / size; | |||||
return r; | |||||
} | |||||
void * z_create_matrix(int size) { | |||||
double * r = malloc(size * 2 * sizeof(double)); | |||||
int i; | |||||
for(i = 0; i < 2 * size; i++) | |||||
r[i] = 1e3 * i / size; | |||||
return r; | |||||
} | |||||
void * d_create_matrix(int size) { | |||||
double * r = malloc(size * sizeof(double)); | |||||
int i; | |||||
for(i = 0; i < size; i++) | |||||
r[i] = 1e3 * i / size; | |||||
return r; | |||||
} | |||||
void trmv_bench(BenchParam * param) | |||||
{ | |||||
int i, n; | |||||
int size = param->matrix_size; | |||||
n = param->n_loop / size; | |||||
int one = 1; | |||||
void * A = param->create_matrix(size * size); | |||||
void * y = param->create_matrix(size); | |||||
for(i = 0; i < n; i++) { | |||||
param->blas_func("U", "N", "N", &size, A, &size, y, &one); | |||||
} | |||||
free(A); | |||||
free(y); | |||||
} | |||||
void gemv_bench(BenchParam * param) | |||||
{ | |||||
int i, n; | |||||
int size = param->matrix_size; | |||||
n = param->n_loop / size; | |||||
double v = 1.01; | |||||
int one = 1; | |||||
void * A = param->create_matrix(size * size); | |||||
void * y = param->create_matrix(size); | |||||
for(i = 0; i < n; i++) { | |||||
param->blas_func("N", &size, &size, &v, A, &size, y, &one, &v, y, &one); | |||||
} | |||||
free(A); | |||||
free(y); | |||||
} | |||||
void ger_bench(BenchParam * param) { | |||||
int i, n; | |||||
int size = param->matrix_size; | |||||
n = param->n_loop / size; | |||||
double v = 1.01; | |||||
int one = 1; | |||||
void * A = param->create_matrix(size * size); | |||||
void * y = param->create_matrix(size); | |||||
for(i = 0; i < n; i++) { | |||||
param->blas_func(&size, &size, &v, y, &one, y, &one, A, &size); | |||||
} | |||||
free(A); | |||||
free(y); | |||||
} | |||||
#ifndef _WIN32 | |||||
void * pthread_func_wrapper(void * param) { | |||||
((BenchParam *)param)->bench_func(param); | |||||
pthread_exit(NULL); | |||||
} | |||||
#endif | |||||
#define NB_TESTS 5 | |||||
void * TESTS[4 * NB_TESTS] = { | |||||
trmv_bench, ztrmv_, z_create_matrix, "ztrmv", | |||||
gemv_bench, dgemv_, d_create_matrix, "dgemv", | |||||
gemv_bench, zgemv_, z_create_matrix, "zgemv", | |||||
ger_bench, dger_, d_create_matrix, "dger", | |||||
ger_bench, zgerc_, z_create_matrix, "zgerc", | |||||
}; | |||||
inline static double delta_time(struct timespec tick) { | |||||
struct timespec tock; | |||||
clock_gettime(CLOCK_MONOTONIC, &tock); | |||||
return (tock.tv_sec - tick.tv_sec) + (tock.tv_nsec - tick.tv_nsec) / 1e9; | |||||
} | |||||
double pthread_bench(BenchParam * param, int nb_threads) | |||||
{ | |||||
#ifdef _WIN32 | |||||
return 0; | |||||
#else | |||||
BenchParam threaded_param = *param; | |||||
pthread_t threads[nb_threads]; | |||||
int t, rc; | |||||
struct timespec tick; | |||||
threaded_param.n_loop /= nb_threads; | |||||
clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
for(t=0; t<nb_threads; t++){ | |||||
rc = pthread_create(&threads[t], NULL, pthread_func_wrapper, &threaded_param); | |||||
if (rc){ | |||||
printf("ERROR; return code from pthread_create() is %d\n", rc); | |||||
exit(-1); | |||||
} | |||||
} | |||||
for(t=0; t<nb_threads; t++){ | |||||
pthread_join(threads[t], NULL); | |||||
} | |||||
return delta_time(tick); | |||||
#endif | |||||
} | |||||
double seq_bench(BenchParam * param) { | |||||
struct timespec tick; | |||||
clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
param->bench_func(param); | |||||
return delta_time(tick); | |||||
} | |||||
double omp_bench(BenchParam * param) { | |||||
BenchParam threaded_param = *param; | |||||
struct timespec tick; | |||||
int t; | |||||
int nb_threads = omp_get_max_threads(); | |||||
threaded_param.n_loop /= nb_threads; | |||||
clock_gettime(CLOCK_MONOTONIC, &tick); | |||||
#pragma omp parallel for | |||||
for(t = 0; t < nb_threads; t ++){ | |||||
param->bench_func(&threaded_param); | |||||
} | |||||
return delta_time(tick); | |||||
} | |||||
int main(int argc, char * argv[]) { | |||||
double inc_factor = exp(log((double)MAX_SIZE / MIN_SIZE) / NB_SIZE); | |||||
BenchParam param; | |||||
int test_id; | |||||
printf ("Running on %d threads\n", omp_get_max_threads()); | |||||
for(test_id = 0; test_id < NB_TESTS; test_id ++) { | |||||
double size = MIN_SIZE; | |||||
param.bench_func = TESTS[test_id * 4]; | |||||
param.blas_func = TESTS[test_id * 4 + 1]; | |||||
param.create_matrix = TESTS[test_id * 4 + 2]; | |||||
printf("\nBenchmark of %s\n", (char*)TESTS[test_id * 4 + 3]); | |||||
param.n_loop = NLOOP; | |||||
while(size <= MAX_SIZE) { | |||||
param.matrix_size = (int)(size + 0.5); | |||||
double seq_time = seq_bench(¶m); | |||||
double omp_time = omp_bench(¶m); | |||||
double pthread_time = pthread_bench(¶m, omp_get_max_threads()); | |||||
printf("matrix size %d, sequential %gs, openmp %gs, speedup %g, " | |||||
"pthread %gs, speedup %g\n", | |||||
param.matrix_size, seq_time, | |||||
omp_time, seq_time / omp_time, | |||||
pthread_time, seq_time / pthread_time); | |||||
size *= inc_factor; | |||||
} | |||||
} | |||||
return(0); | |||||
} |
@@ -190,8 +190,8 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MBytes\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6); | |||||
" %10.2f MBytes %10.6f sec\n", | |||||
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
} | } | ||||
@@ -191,8 +191,8 @@ int main(int argc, char *argv[]){ | |||||
gettimeofday( &start, (struct timezone *)0); | gettimeofday( &start, (struct timezone *)0); | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6, time1); | |||||
} | } | ||||
@@ -184,8 +184,8 @@ int main(int argc, char *argv[]){ | |||||
timeg /= loops; | timeg /= loops; | ||||
fprintf(stderr, | fprintf(stderr, | ||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6); | |||||
" %10.2f MFlops %10.6f sec\n", | |||||
COMPSIZE * COMPSIZE * 2. * (double)m / timeg * 1.e-6, timeg); | |||||
} | } | ||||
@@ -1,5 +1,8 @@ | |||||
#!/usr/bin/perl | #!/usr/bin/perl | ||||
use File::Basename; | |||||
use File::Temp qw(tempfile); | |||||
# Checking cross compile | # Checking cross compile | ||||
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | ||||
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | ||||
@@ -7,7 +10,9 @@ $hostarch = "x86_64" if ($hostarch eq "amd64"); | |||||
$hostarch = "arm" if ($hostarch =~ /^arm.*/); | $hostarch = "arm" if ($hostarch =~ /^arm.*/); | ||||
$hostarch = "arm64" if ($hostarch eq "aarch64"); | $hostarch = "arm64" if ($hostarch eq "aarch64"); | ||||
$hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); | $hostarch = "power" if ($hostarch =~ /^(powerpc|ppc).*/); | ||||
$hostarch = "zarch" if ($hostarch eq "s390x"); | |||||
$tmpf = new File::Temp( UNLINK => 1 ); | |||||
$binary = $ENV{"BINARY"}; | $binary = $ENV{"BINARY"}; | ||||
$makefile = shift(@ARGV); | $makefile = shift(@ARGV); | ||||
@@ -26,14 +31,12 @@ if ($?) { | |||||
$cross_suffix = ""; | $cross_suffix = ""; | ||||
if ($ARGV[0] =~ /(.*)(-[.\d]+)/) { | |||||
if ($1 =~ /(.*-)(.*)/) { | |||||
$cross_suffix = $1; | |||||
} | |||||
} else { | |||||
if ($ARGV[0] =~ /([^\/]*-)([^\/]*$)/) { | |||||
$cross_suffix = $1; | |||||
} | |||||
if (dirname($compiler_name) ne ".") { | |||||
$cross_suffix .= dirname($compiler_name) . "/"; | |||||
} | |||||
if (basename($compiler_name) =~ /([^\s]*-)(.*)/) { | |||||
$cross_suffix .= $1; | |||||
} | } | ||||
$compiler = ""; | $compiler = ""; | ||||
@@ -63,13 +66,14 @@ $os = Android if ($data =~ /OS_ANDROID/); | |||||
$architecture = x86 if ($data =~ /ARCH_X86/); | $architecture = x86 if ($data =~ /ARCH_X86/); | ||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | $architecture = x86_64 if ($data =~ /ARCH_X86_64/); | ||||
$architecture = power if ($data =~ /ARCH_POWER/); | $architecture = power if ($data =~ /ARCH_POWER/); | ||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/); | |||||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | $architecture = mips64 if ($data =~ /ARCH_MIPS64/); | ||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | $architecture = alpha if ($data =~ /ARCH_ALPHA/); | ||||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | $architecture = sparc if ($data =~ /ARCH_SPARC/); | ||||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | $architecture = ia64 if ($data =~ /ARCH_IA64/); | ||||
$architecture = arm if ($data =~ /ARCH_ARM/); | $architecture = arm if ($data =~ /ARCH_ARM/); | ||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | $architecture = arm64 if ($data =~ /ARCH_ARM64/); | ||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||||
$defined = 0; | $defined = 0; | ||||
@@ -79,7 +83,12 @@ if ($os eq "AIX") { | |||||
$defined = 1; | $defined = 1; | ||||
} | } | ||||
if (($architecture eq "mips32") || ($architecture eq "mips64")) { | |||||
if ($architecture eq "mips") { | |||||
$compiler_name .= " -mabi=32"; | |||||
$defined = 1; | |||||
} | |||||
if ($architecture eq "mips64") { | |||||
$compiler_name .= " -mabi=n32" if ($binary eq "32"); | $compiler_name .= " -mabi=n32" if ($binary eq "32"); | ||||
$compiler_name .= " -mabi=64" if ($binary eq "64"); | $compiler_name .= " -mabi=64" if ($binary eq "64"); | ||||
$defined = 1; | $defined = 1; | ||||
@@ -89,6 +98,11 @@ if (($architecture eq "arm") || ($architecture eq "arm64")) { | |||||
$defined = 1; | $defined = 1; | ||||
} | } | ||||
if ($architecture eq "zarch") { | |||||
$defined = 1; | |||||
$binary = 64; | |||||
} | |||||
if ($architecture eq "alpha") { | if ($architecture eq "alpha") { | ||||
$defined = 1; | $defined = 1; | ||||
$binary = 64; | $binary = 64; | ||||
@@ -152,16 +166,35 @@ if ($?) { | |||||
die 1; | die 1; | ||||
} | } | ||||
$have_msa = 0; | |||||
if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||||
$code = '"addvi.b $w0, $w1, 1"'; | |||||
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs"; | |||||
print $tmpf "#include <msa.h>\n\n"; | |||||
print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | |||||
$args = "$msa_flags -o $tmpf.o -x c $tmpf"; | |||||
my @cmd = ("$compiler_name $args"); | |||||
system(@cmd) == 0; | |||||
if ($? != 0) { | |||||
$have_msa = 0; | |||||
} else { | |||||
$have_msa = 1; | |||||
} | |||||
unlink("$tmpf.o"); | |||||
} | |||||
$architecture = x86 if ($data =~ /ARCH_X86/); | $architecture = x86 if ($data =~ /ARCH_X86/); | ||||
$architecture = x86_64 if ($data =~ /ARCH_X86_64/); | $architecture = x86_64 if ($data =~ /ARCH_X86_64/); | ||||
$architecture = power if ($data =~ /ARCH_POWER/); | $architecture = power if ($data =~ /ARCH_POWER/); | ||||
$architecture = mips32 if ($data =~ /ARCH_MIPS32/); | |||||
$architecture = mips if ($data =~ /ARCH_MIPS/); | |||||
$architecture = mips64 if ($data =~ /ARCH_MIPS64/); | $architecture = mips64 if ($data =~ /ARCH_MIPS64/); | ||||
$architecture = alpha if ($data =~ /ARCH_ALPHA/); | $architecture = alpha if ($data =~ /ARCH_ALPHA/); | ||||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | $architecture = sparc if ($data =~ /ARCH_SPARC/); | ||||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | $architecture = ia64 if ($data =~ /ARCH_IA64/); | ||||
$architecture = arm if ($data =~ /ARCH_ARM/); | $architecture = arm if ($data =~ /ARCH_ARM/); | ||||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | $architecture = arm64 if ($data =~ /ARCH_ARM64/); | ||||
$architecture = zarch if ($data =~ /ARCH_ZARCH/); | |||||
$binformat = bin32; | $binformat = bin32; | ||||
$binformat = bin64 if ($data =~ /BINARY_64/); | $binformat = bin64 if ($data =~ /BINARY_64/); | ||||
@@ -209,6 +242,11 @@ $linker_a = ""; | |||||
$linker_L .= "-Wl,". $flags . " " | $linker_L .= "-Wl,". $flags . " " | ||||
} | } | ||||
if ($flags =~ /^\--exclude-libs/) { | |||||
$linker_L .= "-Wl,". $flags . " "; | |||||
$flags=""; | |||||
} | |||||
if ( | if ( | ||||
($flags =~ /^\-l/) | ($flags =~ /^\-l/) | ||||
&& ($flags !~ /gfortranbegin/) | && ($flags !~ /gfortranbegin/) | ||||
@@ -243,9 +281,11 @@ print MAKEFILE "BINARY64=\n" if $binformat ne bin64; | |||||
print MAKEFILE "BINARY32=1\n" if $binformat eq bin32; | print MAKEFILE "BINARY32=1\n" if $binformat eq bin32; | ||||
print MAKEFILE "BINARY64=1\n" if $binformat eq bin64; | print MAKEFILE "BINARY64=1\n" if $binformat eq bin64; | ||||
print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; | print MAKEFILE "FU=$need_fu\n" if $need_fu ne ""; | ||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross_suffix ne ""; | |||||
print MAKEFILE "CROSS_SUFFIX=$cross_suffix\n" if $cross != 0 && $cross_suffix ne ""; | |||||
print MAKEFILE "CROSS=1\n" if $cross != 0; | print MAKEFILE "CROSS=1\n" if $cross != 0; | ||||
print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | ||||
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | |||||
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | |||||
$os =~ tr/[a-z]/[A-Z]/; | $os =~ tr/[a-z]/[A-Z]/; | ||||
$architecture =~ tr/[a-z]/[A-Z]/; | $architecture =~ tr/[a-z]/[A-Z]/; | ||||
@@ -257,6 +297,7 @@ print CONFFILE "#define C_$compiler\t1\n"; | |||||
print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; | print CONFFILE "#define __32BIT__\t1\n" if $binformat eq bin32; | ||||
print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | print CONFFILE "#define __64BIT__\t1\n" if $binformat eq bin64; | ||||
print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | print CONFFILE "#define FUNDERSCORE\t$need_fu\n" if $need_fu ne ""; | ||||
print CONFFILE "#define HAVE_MSA\t1\n" if $have_msa eq 1; | |||||
if ($os eq "LINUX") { | if ($os eq "LINUX") { | ||||
@@ -14,12 +14,12 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
if (NOT NO_EXPRECISION) | if (NOT NO_EXPRECISION) | ||||
if (${F_COMPILER} MATCHES "GFORTRAN") | if (${F_COMPILER} MATCHES "GFORTRAN") | ||||
# N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | # N.B. I'm not sure if CMake differentiates between GCC and LSB -hpa | ||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB") | |||||
set(EXPRECISION 1) | set(EXPRECISION 1) | ||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION -m128bit-long-double") | ||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
set(EXPRECISION 1) | set(EXPRECISION 1) | ||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | set(CCOMMON_OPT "${CCOMMON_OPT} -DEXPRECISION") | ||||
set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | set(FCOMMON_OPT "${FCOMMON_OPT} -m128bit-long-double") | ||||
@@ -28,35 +28,35 @@ if (${ARCH} STREQUAL "x86" OR ${ARCH} STREQUAL "x86_64") | |||||
endif () | endif () | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | set(CCOMMON_OPT "${CCOMMON_OPT} -wd981") | ||||
endif () | endif () | ||||
if (USE_OPENMP) | if (USE_OPENMP) | ||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU" OR ${CMAKE_C_COMPILER} STREQUAL "LSB") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_C_COMPILER_ID} STREQUAL "LSB") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "Clang") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Clang") | |||||
message(WARNING "Clang doesn't support OpenMP yet.") | message(WARNING "Clang doesn't support OpenMP yet.") | ||||
set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -fopenmp") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "Intel") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "Intel") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | set(CCOMMON_OPT "${CCOMMON_OPT} -openmp") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "PGI") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PGI") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "OPEN64") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "OPEN64") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
set(CEXTRALIB "${CEXTRALIB} -lstdc++") | set(CEXTRALIB "${CEXTRALIB} -lstdc++") | ||||
endif () | endif () | ||||
if (${CMAKE_C_COMPILER} STREQUAL "PATHSCALE") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "PATHSCALE") | |||||
set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | set(CCOMMON_OPT "${CCOMMON_OPT} -mp") | ||||
endif () | endif () | ||||
endif () | endif () | ||||
@@ -87,7 +87,7 @@ if (${ARCH} STREQUAL "ia64") | |||||
set(BINARY_DEFINED 1) | set(BINARY_DEFINED 1) | ||||
if (${F_COMPILER} MATCHES "GFORTRAN") | if (${F_COMPILER} MATCHES "GFORTRAN") | ||||
if (${CMAKE_C_COMPILER} STREQUAL "GNU") | |||||
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
# EXPRECISION = 1 | # EXPRECISION = 1 | ||||
# CCOMMON_OPT += -DEXPRECISION | # CCOMMON_OPT += -DEXPRECISION | ||||
endif () | endif () | ||||
@@ -53,7 +53,7 @@ endif() | |||||
add_custom_command( | add_custom_command( | ||||
TARGET ${OpenBLAS_LIBNAME} PRE_LINK | TARGET ${OpenBLAS_LIBNAME} PRE_LINK | ||||
COMMAND perl | COMMAND perl | ||||
ARGS "${CMAKE_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||||
ARGS "${PROJECT_SOURCE_DIR}/exports/gensymbol" "win2k" "${ARCH_IN}" "dummy" "${EXPRECISION_IN}" "${NO_CBLAS_IN}" "${NO_LAPACK_IN}" "${NO_LAPACKE_IN}" "${NEED2UNDERSCORES_IN}" "${ONLY_CBLAS_IN}" "${SYMBOLPREFIX}" "${SYMBOLSUFFIX}" > "${PROJECT_BINARY_DIR}/openblas.def" | |||||
COMMENT "Create openblas.def file" | COMMENT "Create openblas.def file" | ||||
VERBATIM) | VERBATIM) | ||||
@@ -2,7 +2,7 @@ | |||||
set(ALLAUX | set(ALLAUX | ||||
ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f | ilaenv.f ieeeck.f lsamen.f xerbla_array.f iparmq.f | ||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f chla_transtype.f | |||||
ilaprec.f ilatrans.f ilauplo.f iladiag.f iparam2stage.F chla_transtype.f | |||||
../INSTALL/ilaver.f ../INSTALL/slamch.f | ../INSTALL/ilaver.f ../INSTALL/slamch.f | ||||
) | ) | ||||
@@ -26,7 +26,7 @@ set(SCLAUX | |||||
) | ) | ||||
set(DZLAUX | set(DZLAUX | ||||
dbdsdc.f | |||||
dbdsdc.f dbdsvdx.f | |||||
dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f | dbdsqr.f ddisna.f dlabad.f dlacpy.f dladiv.f dlae2.f dlaebz.f | ||||
dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f | dlaed0.f dlaed1.f dlaed2.f dlaed3.f dlaed4.f dlaed5.f dlaed6.f | ||||
dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f | dlaed7.f dlaed8.f dlaed9.f dlaeda.f dlaev2.f dlagtf.f | ||||
@@ -42,24 +42,32 @@ set(DZLAUX | |||||
dsteqr.f dsterf.f dlaisnan.f disnan.f | dsteqr.f dsterf.f dlaisnan.f disnan.f | ||||
dlartgp.f dlartgs.f | dlartgp.f dlartgs.f | ||||
../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f | ../INSTALL/dlamch.f ../INSTALL/dsecnd_${TIMER}.f | ||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f | |||||
dgetsls.f dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f | |||||
dtplqt2.f dtpmlqt.f dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f | |||||
dsytf2_rk.f dlasyf_rk.f dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f | |||||
dsytri_3x.f dsysv_rk.f dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f | |||||
dsbevx_2stage.f dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f | |||||
dsyevx_2stage.f dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F | |||||
dsytrd_sy2sb.f dlarfy.f | |||||
) | ) | ||||
set(SLASRC | set(SLASRC | ||||
sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||||
sbdsvdx.f sgbbrd.f sgbcon.f sgbequ.f sgbrfs.f sgbsv.f | |||||
sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | sgbsvx.f sgbtf2.f sgbtrf.f sgbtrs.f sgebak.f sgebal.f sgebd2.f | ||||
sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | sgebrd.f sgecon.f sgeequ.f sgees.f sgeesx.f sgeev.f sgeevx.f | ||||
sgegs.f sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
sgels.f sgelsd.f sgelss.f sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
sgeqp3.f sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvx.f | |||||
sgetc2.f sgetri.f | |||||
sggbak.f sggbal.f sgges.f sggesx.f sggev.f sggevx.f | |||||
DEPRECATED/sgegs.f DEPRECATED/sgegv.f sgehd2.f sgehrd.f sgelq2.f sgelqf.f | |||||
sgels.f sgelsd.f sgelss.f DEPRECATED/sgelsx.f sgelsy.f sgeql2.f sgeqlf.f | |||||
sgeqp3.f DEPRECATED/sgeqpf.f sgeqr2.f sgeqr2p.f sgeqrf.f sgeqrfp.f sgerfs.f | |||||
sgerq2.f sgerqf.f sgesc2.f sgesdd.f sgesvd.f sgesvdx.f sgesvx.f | |||||
sgetc2.f sgetri.f sgetrf2.f | |||||
sggbak.f sggbal.f sgghd3.f sgges.f sgges3.f sggesx.f sggev.f sggev3.f sggevx.f | |||||
sggglm.f sgghrd.f sgglse.f sggqrf.f | sggglm.f sgghrd.f sgglse.f sggqrf.f | ||||
sggrqf.f sggsvd.f sggsvp.f sgtcon.f sgtrfs.f sgtsv.f | |||||
sggrqf.f DEPRECATED/sggsvd.f sggsvd3.f DEPRECATED/sggsvp.f sggsvp3.f sgtcon.f sgtrfs.f sgtsv.f | |||||
sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | sgtsvx.f sgttrf.f sgttrs.f sgtts2.f shgeqz.f | ||||
shsein.f shseqr.f slabrd.f slacon.f slacn2.f | shsein.f shseqr.f slabrd.f slacon.f slacn2.f | ||||
slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | slaein.f slaexc.f slag2.f slags2.f slagtm.f slagv2.f slahqr.f | ||||
slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
DEPRECATED/slahrd.f slahr2.f slaic1.f slaln2.f slals0.f slalsa.f slalsd.f | |||||
slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | slangb.f slange.f slangt.f slanhs.f slansb.f slansp.f | ||||
slansy.f slantb.f slantp.f slantr.f slanv2.f | slansy.f slantb.f slantp.f slantr.f slanv2.f | ||||
slapll.f slapmt.f | slapll.f slapmt.f | ||||
@@ -69,10 +77,10 @@ set(SLASRC | |||||
slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | slarf.f slarfb.f slarfg.f slarfgp.f slarft.f slarfx.f slargv.f | ||||
slarrv.f slartv.f | slarrv.f slartv.f | ||||
slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | slarz.f slarzb.f slarzt.f slasy2.f slasyf.f slasyf_rook.f | ||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f slatzm.f | |||||
slatbs.f slatdf.f slatps.f slatrd.f slatrs.f slatrz.f DEPRECATED/slatzm.f | |||||
sopgtr.f sopmtr.f sorg2l.f sorg2r.f | sopgtr.f sopmtr.f sorg2l.f sorg2r.f | ||||
sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | sorgbr.f sorghr.f sorgl2.f sorglq.f sorgql.f sorgqr.f sorgr2.f | ||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f | |||||
sorgrq.f sorgtr.f sorm2l.f sorm2r.f sorm22.f | |||||
sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f | sormbr.f sormhr.f sorml2.f sormlq.f sormql.f sormqr.f sormr2.f | ||||
sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f | sormr3.f sormrq.f sormrz.f sormtr.f spbcon.f spbequ.f spbrfs.f | ||||
spbstf.f spbsv.f spbsvx.f | spbstf.f spbsv.f spbsvx.f | ||||
@@ -96,8 +104,8 @@ set(SLASRC | |||||
stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f | stbrfs.f stbtrs.f stgevc.f stgex2.f stgexc.f stgsen.f | ||||
stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | stgsja.f stgsna.f stgsy2.f stgsyl.f stpcon.f stprfs.f stptri.f | ||||
stptrs.f | stptrs.f | ||||
strcon.f strevc.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | |||||
strtrs.f stzrqf.f stzrzf.f sstemr.f | |||||
strcon.f strevc.f strevc3.f strexc.f strrfs.f strsen.f strsna.f strsyl.f | |||||
strtrs.f DEPRECATED/stzrqf.f stzrzf.f sstemr.f | |||||
slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | slansf.f spftrf.f spftri.f spftrs.f ssfrk.f stfsm.f stftri.f stfttp.f | ||||
stfttr.f stpttf.f stpttr.f strttf.f strttp.f | stfttr.f stpttf.f stpttr.f strttf.f strttp.f | ||||
sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | sgejsv.f sgesvj.f sgsvj0.f sgsvj1.f | ||||
@@ -106,22 +114,29 @@ set(SLASRC | |||||
sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f | sorbdb5.f sorbdb6.f sorcsd.f sorcsd2by1.f | ||||
sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f | sgeqrt.f sgeqrt2.f sgeqrt3.f sgemqrt.f | ||||
stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f | stpqrt.f stpqrt2.f stpmqrt.f stprfb.f spotri.f | ||||
sgelq.f sgelqt.f sgelqt3.f sgemlq.f sgemlqt.f sgemqr.f sgeqr.f sgetsls.f | |||||
slamswlq.f slamtsqr.f slaswlq.f slatsqr.f stplqt.f stplqt2.f stpmlqt.f | |||||
ssysv_aa.f ssytrf_aa.f ssytrs_aa.f slasyf_aa.f ssytf2_rk.f slasyf_rk.f | |||||
ssytrf_rk.f ssytrs_3.f ssycon_3.f ssytri_3.f ssytri_3x.f ssysv_rk.f | |||||
ssb2st_kernels.f ssbev_2stage.f ssbevd_2stage.f ssbevx_2stage.f | |||||
ssyev_2stage.f ssyevd_2stage.f ssyevr_2stage.f ssyevx_2stage.f | |||||
ssygv_2stage.f ssytrd_2stage.f ssytrd_sb2st.F ssytrd_sy2sb.f slarfy.f | |||||
) | ) | ||||
set(DSLASRC spotrs.f) | |||||
set(DSLASRC spotrs.f spotrf2.f) | |||||
set(CLASRC | set(CLASRC | ||||
cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | cbdsqr.f cgbbrd.f cgbcon.f cgbequ.f cgbrfs.f cgbsv.f cgbsvx.f | ||||
cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | cgbtf2.f cgbtrf.f cgbtrs.f cgebak.f cgebal.f cgebd2.f cgebrd.f | ||||
cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | cgecon.f cgeequ.f cgees.f cgeesx.f cgeev.f cgeevx.f | ||||
cgegs.f cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
cgels.f cgelsd.f cgelss.f cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
DEPRECATED/cgegs.f DEPRECATED/cgegv.f cgehd2.f cgehrd.f cgelq2.f cgelqf.f | |||||
cgels.f cgelsd.f cgelss.f DEPRECATED/cgelsx.f cgelsy.f cgeql2.f cgeqlf.f cgeqp3.f | |||||
DEPRECATED/cgeqpf.f cgeqr2.f cgeqr2p.f cgeqrf.f cgeqrfp.f cgerfs.f | |||||
cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | cgerq2.f cgerqf.f cgesc2.f cgesdd.f cgesvd.f | ||||
cgesvx.f cgetc2.f cgetri.f | cgesvx.f cgetc2.f cgetri.f | ||||
cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | cggbak.f cggbal.f cgges.f cggesx.f cggev.f cggevx.f cggglm.f | ||||
cgghrd.f cgglse.f cggqrf.f cggrqf.f | cgghrd.f cgglse.f cggqrf.f cggrqf.f | ||||
cggsvd.f cggsvp.f | |||||
DEPRECATED/cggsvd.f DEPRECATED/cggsvp.f | |||||
cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | cgtcon.f cgtrfs.f cgtsv.f cgtsvx.f cgttrf.f cgttrs.f cgtts2.f chbev.f | ||||
chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | chbevd.f chbevx.f chbgst.f chbgv.f chbgvd.f chbgvx.f chbtrd.f | ||||
checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | checon.f cheev.f cheevd.f cheevr.f cheevx.f chegs2.f chegst.f | ||||
@@ -138,7 +153,7 @@ set(CLASRC | |||||
claed0.f claed7.f claed8.f | claed0.f claed7.f claed8.f | ||||
claein.f claesy.f claev2.f clags2.f clagtm.f | claein.f claesy.f claev2.f clags2.f clagtm.f | ||||
clahef.f clahef_rook.f clahqr.f | clahef.f clahef_rook.f clahqr.f | ||||
clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
DEPRECATED/clahrd.f clahr2.f claic1.f clals0.f clalsa.f clalsd.f clangb.f clange.f clangt.f | |||||
clanhb.f clanhe.f | clanhb.f clanhe.f | ||||
clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | clanhp.f clanhs.f clanht.f clansb.f clansp.f clansy.f clantb.f | ||||
clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | clantp.f clantr.f clapll.f clapmt.f clarcm.f claqgb.f claqge.f | ||||
@@ -149,7 +164,7 @@ set(CLASRC | |||||
clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | clarfx.f clargv.f clarnv.f clarrv.f clartg.f clartv.f | ||||
clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | clarz.f clarzb.f clarzt.f clascl.f claset.f clasr.f classq.f | ||||
clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | clasyf.f clasyf_rook.f clatbs.f clatdf.f clatps.f clatrd.f clatrs.f clatrz.f | ||||
clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
DEPRECATED/clatzm.f cpbcon.f cpbequ.f cpbrfs.f cpbstf.f cpbsv.f | |||||
cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | cpbsvx.f cpbtf2.f cpbtrf.f cpbtrs.f cpocon.f cpoequ.f cporfs.f | ||||
cposv.f cposvx.f cpstrf.f cpstf2.f | cposv.f cposvx.f cpstrf.f cpstf2.f | ||||
cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | cppcon.f cppequ.f cpprfs.f cppsv.f cppsvx.f cpptrf.f cpptri.f cpptrs.f | ||||
@@ -165,8 +180,8 @@ set(CLASRC | |||||
ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f | ctbcon.f ctbrfs.f ctbtrs.f ctgevc.f ctgex2.f | ||||
ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | ctgexc.f ctgsen.f ctgsja.f ctgsna.f ctgsy2.f ctgsyl.f ctpcon.f | ||||
ctprfs.f ctptri.f | ctprfs.f ctptri.f | ||||
ctptrs.f ctrcon.f ctrevc.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | |||||
ctrsyl.f ctrtrs.f ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
ctptrs.f ctrcon.f ctrevc.f ctrevc3.f ctrexc.f ctrrfs.f ctrsen.f ctrsna.f | |||||
ctrsyl.f ctrtrs.f DEPRECATED/ctzrqf.f ctzrzf.f cung2l.f cung2r.f | |||||
cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | cungbr.f cunghr.f cungl2.f cunglq.f cungql.f cungqr.f cungr2.f | ||||
cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | cungrq.f cungtr.f cunm2l.f cunm2r.f cunmbr.f cunmhr.f cunml2.f | ||||
cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | cunmlq.f cunmql.f cunmqr.f cunmr2.f cunmr3.f cunmrq.f cunmrz.f | ||||
@@ -178,6 +193,14 @@ set(CLASRC | |||||
cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f | cunbdb5.f cunbdb6.f cuncsd.f cuncsd2by1.f | ||||
cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f | cgeqrt.f cgeqrt2.f cgeqrt3.f cgemqrt.f | ||||
ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f | ctpqrt.f ctpqrt2.f ctpmqrt.f ctprfb.f cpotri.f | ||||
cgelq.f cgelqt.f cgelqt3.f cgemlq.f cgemlqt.f cgemqr.f cgeqr.f cgetsls.f | |||||
clamswlq.f clamtsqr.f claswlq.f clatsqr.f ctplqt.f ctplqt2.f ctpmlqt.f | |||||
chesv_aa.f chetrf_aa.f chetrs_aa.f clahef_aa.f csytf2_rk.f clasyf_rk.f | |||||
csytrf_rk.f csytrs_3.f csycon_3.f csytri_3.f csytri_3x.f csysv_rk.f | |||||
chetf2_rk.f clahef_rk.f chetrf_rk.f chetrs_3.f checon_3.f chetri_3.f | |||||
chetri_3x.f chesv_rk.f chb2st_kernels.f chbev_2stage.f chbevd_2stage.f | |||||
chbevx_2stage.f cheev_2stage.f cheevd_2stage.f cheevr_2stage.f cheevx_2stage.f | |||||
chegv_2stage.f chetrd_2stage.f chetrd_hb2st.F chetrd_he2hb.f clarfy.f | |||||
) | ) | ||||
set(ZCLASRC cpotrs.f) | set(ZCLASRC cpotrs.f) | ||||
@@ -186,18 +209,18 @@ set(DLASRC | |||||
dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | dgbbrd.f dgbcon.f dgbequ.f dgbrfs.f dgbsv.f | ||||
dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | dgbsvx.f dgbtf2.f dgbtrf.f dgbtrs.f dgebak.f dgebal.f dgebd2.f | ||||
dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | dgebrd.f dgecon.f dgeequ.f dgees.f dgeesx.f dgeev.f dgeevx.f | ||||
dgegs.f dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
dgels.f dgelsd.f dgelss.f dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
dgeqp3.f dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvx.f | |||||
dgetc2.f dgetri.f | |||||
dggbak.f dggbal.f dgges.f dggesx.f dggev.f dggevx.f | |||||
dggglm.f dgghrd.f dgglse.f dggqrf.f | |||||
dggrqf.f dggsvd.f dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
DEPRECATED/dgegs.f DEPRECATED/dgegv.f dgehd2.f dgehrd.f dgelq2.f dgelqf.f | |||||
dgels.f dgelsd.f dgelss.f DEPRECATED/dgelsx.f dgelsy.f dgeql2.f dgeqlf.f | |||||
dgeqp3.f DEPRECATED/dgeqpf.f dgeqr2.f dgeqr2p.f dgeqrf.f dgeqrfp.f dgerfs.f | |||||
dgerq2.f dgerqf.f dgesc2.f dgesdd.f dgesvd.f dgesvdx.f dgesvx.f | |||||
dgetc2.f dgetri.f dgetrf2.f | |||||
dggbak.f dggbal.f dgges.f dgges3.f dggesx.f dggev.f dggev3.f dggevx.f | |||||
dggglm.f dgghd3.f dgghrd.f dgglse.f dggqrf.f | |||||
dggrqf.f dggsvd3.f dggsvp3.f DEPRECATED/dggsvd.f DEPRECATED/dggsvp.f dgtcon.f dgtrfs.f dgtsv.f | |||||
dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | dgtsvx.f dgttrf.f dgttrs.f dgtts2.f dhgeqz.f | ||||
dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | dhsein.f dhseqr.f dlabrd.f dlacon.f dlacn2.f | ||||
dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | dlaein.f dlaexc.f dlag2.f dlags2.f dlagtm.f dlagv2.f dlahqr.f | ||||
dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
DEPRECATED/dlahrd.f dlahr2.f dlaic1.f dlaln2.f dlals0.f dlalsa.f dlalsd.f | |||||
dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | dlangb.f dlange.f dlangt.f dlanhs.f dlansb.f dlansp.f | ||||
dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | dlansy.f dlantb.f dlantp.f dlantr.f dlanv2.f | ||||
dlapll.f dlapmt.f | dlapll.f dlapmt.f | ||||
@@ -207,15 +230,15 @@ set(DLASRC | |||||
dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | dlarf.f dlarfb.f dlarfg.f dlarfgp.f dlarft.f dlarfx.f | ||||
dlargv.f dlarrv.f dlartv.f | dlargv.f dlarrv.f dlartv.f | ||||
dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | dlarz.f dlarzb.f dlarzt.f dlasy2.f dlasyf.f dlasyf_rook.f | ||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f dlatzm.f | |||||
dlatbs.f dlatdf.f dlatps.f dlatrd.f dlatrs.f dlatrz.f DEPRECATED/dlatzm.f | |||||
dopgtr.f dopmtr.f dorg2l.f dorg2r.f | dopgtr.f dopmtr.f dorg2l.f dorg2r.f | ||||
dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | dorgbr.f dorghr.f dorgl2.f dorglq.f dorgql.f dorgqr.f dorgr2.f | ||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f | |||||
dorgrq.f dorgtr.f dorm2l.f dorm2r.f dorm22.f | |||||
dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f | dormbr.f dormhr.f dorml2.f dormlq.f dormql.f dormqr.f dormr2.f | ||||
dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f | dormr3.f dormrq.f dormrz.f dormtr.f dpbcon.f dpbequ.f dpbrfs.f | ||||
dpbstf.f dpbsv.f dpbsvx.f | dpbstf.f dpbsv.f dpbsvx.f | ||||
dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f | dpbtf2.f dpbtrf.f dpbtrs.f dpocon.f dpoequ.f dporfs.f dposv.f | ||||
dposvx.f dpotrs.f dpstrf.f dpstf2.f | |||||
dposvx.f dpotrf2.f dpotrs.f dpstrf.f dpstf2.f | |||||
dppcon.f dppequ.f | dppcon.f dppequ.f | ||||
dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f | dpprfs.f dppsv.f dppsvx.f dpptrf.f dpptri.f dpptrs.f dptcon.f | ||||
dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f | dpteqr.f dptrfs.f dptsv.f dptsvx.f dpttrs.f dptts2.f drscl.f | ||||
@@ -234,8 +257,8 @@ set(DLASRC | |||||
dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f | dtbcon.f dtbrfs.f dtbtrs.f dtgevc.f dtgex2.f dtgexc.f dtgsen.f | ||||
dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | dtgsja.f dtgsna.f dtgsy2.f dtgsyl.f dtpcon.f dtprfs.f dtptri.f | ||||
dtptrs.f | dtptrs.f | ||||
dtrcon.f dtrevc.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | |||||
dtrtrs.f dtzrqf.f dtzrzf.f dstemr.f | |||||
dtrcon.f dtrevc.f dtrevc3.f dtrexc.f dtrrfs.f dtrsen.f dtrsna.f dtrsyl.f | |||||
dtrtrs.f DEPRECATED/dtzrqf.f dtzrzf.f dstemr.f | |||||
dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | dsgesv.f dsposv.f dlag2s.f slag2d.f dlat2s.f | ||||
dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | dlansf.f dpftrf.f dpftri.f dpftrs.f dsfrk.f dtfsm.f dtftri.f dtfttp.f | ||||
dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | dtfttr.f dtpttf.f dtpttr.f dtrttf.f dtrttp.f | ||||
@@ -245,20 +268,28 @@ set(DLASRC | |||||
dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f | dorbdb5.f dorbdb6.f dorcsd.f dorcsd2by1.f | ||||
dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f | dgeqrt.f dgeqrt2.f dgeqrt3.f dgemqrt.f | ||||
dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f | dtpqrt.f dtpqrt2.f dtpmqrt.f dtprfb.f dpotri.f | ||||
dgelq.f dgelqt.f dgelqt3.f dgemlq.f dgemlqt.f dgemqr.f dgeqr.f dgetsls.f | |||||
dlamswlq.f dlamtsqr.f dlaswlq.f dlatsqr.f dtplqt.f dtplqt2.f dtpmlqt.f | |||||
dsysv_aa.f dsytrf_aa.f dsytrs_aa.f dlasyf_aa.f dsytf2_rk.f dlasyf_rk.f | |||||
dsytrf_rk.f dsytrs_3.f dsycon_3.f dsytri_3.f dsytri_3x.f dsysv_rk.f | |||||
dsb2st_kernels.f dsbev_2stage.f dsbevd_2stage.f dsbevx_2stage.f | |||||
dsyev_2stage.f dsyevd_2stage.f dsyevr_2stage.f dsyevx_2stage.f | |||||
dsygv_2stage.f dsytrd_2stage.f dsytrd_sb2st.F dsytrd_sy2sb.f dlarfy.f | |||||
) | ) | ||||
set(ZLASRC | set(ZLASRC | ||||
zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | zbdsqr.f zgbbrd.f zgbcon.f zgbequ.f zgbrfs.f zgbsv.f zgbsvx.f | ||||
zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | zgbtf2.f zgbtrf.f zgbtrs.f zgebak.f zgebal.f zgebd2.f zgebrd.f | ||||
zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | zgecon.f zgeequ.f zgees.f zgeesx.f zgeev.f zgeevx.f | ||||
zgegs.f zgegv.f zgehd2.f zgehrd.f zgelq2.f zgelqf.f | |||||
zgels.f zgelsd.f zgelss.f zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
zgesc2.f zgesdd.f zgesvd.f zgesvx.f zgetc2.f | |||||
zgetri.f | |||||
zggbak.f zggbal.f zgges.f zggesx.f zggev.f zggevx.f zggglm.f | |||||
zgghrd.f zgglse.f zggqrf.f zggrqf.f | |||||
zggsvd.f zggsvp.f | |||||
DEPRECATED/zgegs.f DEPRECATED/zgegv.f zgehd2.f zgehrd.f zgejsv.f zgelq2.f zgelqf.f | |||||
zgels.f zgelsd.f zgelss.f DEPRECATED/zgelsx.f zgelsy.f zgeql2.f zgeqlf.f zgeqp3.f | |||||
DEPRECATED/zgeqpf.f zgeqr2.f zgeqr2p.f zgeqrf.f zgeqrfp.f zgerfs.f zgerq2.f zgerqf.f | |||||
zgesc2.f zgesdd.f zgesvd.f zgesvdx.f zgesvj.f zgesvx.f zgetc2.f | |||||
zgetri.f zgetrf2.f | |||||
zggbak.f zggbal.f zgges.f zgges3.f zggesx.f zggev.f zggev3.f zggevx.f zggglm.f | |||||
zgghd3.f zgghrd.f zgglse.f zggqrf.f zggrqf.f | |||||
DEPRECATED/zggsvd.f zggsvd3.f DEPRECATED/zggsvp.f zggsvp3.f | |||||
zgsvj0.f zgsvj1.f | |||||
zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | zgtcon.f zgtrfs.f zgtsv.f zgtsvx.f zgttrf.f zgttrs.f zgtts2.f zhbev.f | ||||
zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | zhbevd.f zhbevx.f zhbgst.f zhbgv.f zhbgvd.f zhbgvx.f zhbtrd.f | ||||
zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | zhecon.f zheev.f zheevd.f zheevr.f zheevx.f zhegs2.f zhegst.f | ||||
@@ -275,7 +306,7 @@ set(ZLASRC | |||||
zlaed0.f zlaed7.f zlaed8.f | zlaed0.f zlaed7.f zlaed8.f | ||||
zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | zlaein.f zlaesy.f zlaev2.f zlags2.f zlagtm.f | ||||
zlahef.f zlahef_rook.f zlahqr.f | zlahef.f zlahef_rook.f zlahqr.f | ||||
zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
DEPRECATED/zlahrd.f zlahr2.f zlaic1.f zlals0.f zlalsa.f zlalsd.f zlangb.f zlange.f | |||||
zlangt.f zlanhb.f | zlangt.f zlanhb.f | ||||
zlanhe.f | zlanhe.f | ||||
zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | zlanhp.f zlanhs.f zlanht.f zlansb.f zlansp.f zlansy.f zlantb.f | ||||
@@ -287,28 +318,28 @@ set(ZLASRC | |||||
zlarfg.f zlarft.f zlarfgp.f | zlarfg.f zlarft.f zlarfgp.f | ||||
zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | zlarfx.f zlargv.f zlarnv.f zlarrv.f zlartg.f zlartv.f | ||||
zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | zlarz.f zlarzb.f zlarzt.f zlascl.f zlaset.f zlasr.f | ||||
zlassq.f zlasyf.f zlasyf_rook.f | |||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f zlatzm.f | |||||
zlassq.f zlasyf.f zlasyf_rook.f zlasy_aa.f | |||||
zlatbs.f zlatdf.f zlatps.f zlatrd.f zlatrs.f zlatrz.f DEPRECATED/zlatzm.f | |||||
zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | zpbcon.f zpbequ.f zpbrfs.f zpbstf.f zpbsv.f | ||||
zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | zpbsvx.f zpbtf2.f zpbtrf.f zpbtrs.f zpocon.f zpoequ.f zporfs.f | ||||
zposv.f zposvx.f zpotrs.f zpstrf.f zpstf2.f | |||||
zposv.f zposvx.f zpotrf2.f zpotrs.f zpstrf.f zpstf2.f | |||||
zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f | zppcon.f zppequ.f zpprfs.f zppsv.f zppsvx.f zpptrf.f zpptri.f zpptrs.f | ||||
zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f | zptcon.f zpteqr.f zptrfs.f zptsv.f zptsvx.f zpttrf.f zpttrs.f zptts2.f | ||||
zrot.f zspcon.f zsprfs.f zspsv.f | zrot.f zspcon.f zsprfs.f zspsv.f | ||||
zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f | zspsvx.f zsptrf.f zsptri.f zsptrs.f zdrscl.f zstedc.f | ||||
zstegr.f zstein.f zsteqr.f | zstegr.f zstein.f zsteqr.f | ||||
zsycon.f | |||||
zsycon.f zsysv_aa.f | |||||
zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f | zsyrfs.f zsysv.f zsysvx.f zsytf2.f zsytrf.f zsytri.f zsytri2.f zsytri2x.f | ||||
zsyswapr.f zsytrs.f zsytrs2.f zsyconv.f | |||||
zsyswapr.f zsytrs.f zsytrs_aa.f zsytrs2.f zsyconv.f | |||||
zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f | zsytf2_rook.f zsytrf_rook.f zsytrs_rook.f | ||||
zsytri_rook.f zsycon_rook.f zsysv_rook.f | zsytri_rook.f zsycon_rook.f zsysv_rook.f | ||||
ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f | ztbcon.f ztbrfs.f ztbtrs.f ztgevc.f ztgex2.f | ||||
ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | ztgexc.f ztgsen.f ztgsja.f ztgsna.f ztgsy2.f ztgsyl.f ztpcon.f | ||||
ztprfs.f ztptri.f | ztprfs.f ztptri.f | ||||
ztptrs.f ztrcon.f ztrevc.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | |||||
ztrsyl.f ztrtrs.f ztzrqf.f ztzrzf.f zung2l.f | |||||
ztptrs.f ztrcon.f ztrevc.f ztrevc3.f ztrexc.f ztrrfs.f ztrsen.f ztrsna.f | |||||
ztrsyl.f ztrtrs.f DEPRECATED/ztzrqf.f ztzrzf.f zung2l.f | |||||
zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | zung2r.f zungbr.f zunghr.f zungl2.f zunglq.f zungql.f zungqr.f zungr2.f | ||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunml2.f | |||||
zungrq.f zungtr.f zunm2l.f zunm2r.f zunmbr.f zunmhr.f zunm22.f zunml2.f | |||||
zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | zunmlq.f zunmql.f zunmqr.f zunmr2.f zunmr3.f zunmrq.f zunmrz.f | ||||
zunmtr.f zupgtr.f | zunmtr.f zupgtr.f | ||||
zupmtr.f izmax1.f dzsum1.f zstemr.f | zupmtr.f izmax1.f dzsum1.f zstemr.f | ||||
@@ -320,6 +351,15 @@ set(ZLASRC | |||||
zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f | zunbdb5.f zunbdb6.f zuncsd.f zuncsd2by1.f | ||||
zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f | zgeqrt.f zgeqrt2.f zgeqrt3.f zgemqrt.f | ||||
ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f | ztpqrt.f ztpqrt2.f ztpmqrt.f ztprfb.f zpotri.f | ||||
zgelq.f zgelqt.f zgelqt3.f zgemlq.f zgemlqt.f zgemqr.f zgeqr.f zgetsls.f | |||||
zlamswlq.f zlamtsqr.f zlaswlq.f zlatsqr.f ztplqt.f ztplqt2.f ztpmlqt.f | |||||
zhesv_aa.f zhetrf_aa.f zhetrs_aa.f zlahef_aa.f zsytf2_rk.f zlasyf_rk.f | |||||
zsytrf_aa.f zsytrf_rk.f zsytrs_3.f zsycon_3.f zsytri_3.f zsytri_3x.f zsysv_rk.f | |||||
zhetf2_rk.f zlahef_rk.f zhetrf_rk.f zhetrs_3.f zhecon_3.f zhetri_3.f | |||||
zhetri_3x.f zhesv_rk.f zhb2st_kernels.f zhbev_2stage.f zhbevd_2stage.f | |||||
zhbevx_2stage.f zheev_2stage.f zheevd_2stage.f zheevr_2stage.f | |||||
zheevx_2stage.f zhegv_2stage.f zhetrd_2stage.f zhetrd_hb2st.F zhetrd_he2hb.f | |||||
zlarfy.f | |||||
) | ) | ||||
set(LA_REL_SRC ${ALLAUX}) | set(LA_REL_SRC ${ALLAUX}) | ||||
@@ -0,0 +1,10 @@ | |||||
prefix=@prefix@ | |||||
libdir=@libdir@ | |||||
includedir=@includedir@ | |||||
Name: OpenBLAS | |||||
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version | |||||
Version: @OPENBLAS_VERSION@ | |||||
URL: https://github.com/xianyi/OpenBLAS | |||||
Libs: -L${libdir} -lopenblas | |||||
Cflags: -I${includedir} |
@@ -50,20 +50,20 @@ else() | |||||
set(TARGET_CONF "config.h") | set(TARGET_CONF "config.h") | ||||
endif () | endif () | ||||
include("${CMAKE_SOURCE_DIR}/cmake/c_check.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/c_check.cmake") | |||||
if (NOT NOFORTRAN) | if (NOT NOFORTRAN) | ||||
include("${CMAKE_SOURCE_DIR}/cmake/f_check.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/f_check.cmake") | |||||
endif () | endif () | ||||
# compile getarch | # compile getarch | ||||
set(GETARCH_SRC | set(GETARCH_SRC | ||||
${CMAKE_SOURCE_DIR}/getarch.c | |||||
${PROJECT_SOURCE_DIR}/getarch.c | |||||
${CPUIDEMO} | ${CPUIDEMO} | ||||
) | ) | ||||
if (NOT MSVC) | if (NOT MSVC) | ||||
list(APPEND GETARCH_SRC ${CMAKE_SOURCE_DIR}/cpuid.S) | |||||
list(APPEND GETARCH_SRC ${PROJECT_SOURCE_DIR}/cpuid.S) | |||||
endif () | endif () | ||||
if (MSVC) | if (MSVC) | ||||
@@ -76,7 +76,7 @@ set(GETARCH_BIN "getarch${CMAKE_EXECUTABLE_SUFFIX}") | |||||
file(MAKE_DIRECTORY ${GETARCH_DIR}) | file(MAKE_DIRECTORY ${GETARCH_DIR}) | ||||
try_compile(GETARCH_RESULT ${GETARCH_DIR} | try_compile(GETARCH_RESULT ${GETARCH_DIR} | ||||
SOURCES ${GETARCH_SRC} | SOURCES ${GETARCH_SRC} | ||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} -I${PROJECT_SOURCE_DIR} | |||||
OUTPUT_VARIABLE GETARCH_LOG | OUTPUT_VARIABLE GETARCH_LOG | ||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH_BIN} | ||||
) | ) | ||||
@@ -97,8 +97,8 @@ set(GETARCH2_DIR "${PROJECT_BINARY_DIR}/getarch2_build") | |||||
set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") | set(GETARCH2_BIN "getarch_2nd${CMAKE_EXECUTABLE_SUFFIX}") | ||||
file(MAKE_DIRECTORY ${GETARCH2_DIR}) | file(MAKE_DIRECTORY ${GETARCH2_DIR}) | ||||
try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | try_compile(GETARCH2_RESULT ${GETARCH2_DIR} | ||||
SOURCES ${CMAKE_SOURCE_DIR}/getarch_2nd.c | |||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${CMAKE_SOURCE_DIR} | |||||
SOURCES ${PROJECT_SOURCE_DIR}/getarch_2nd.c | |||||
COMPILE_DEFINITIONS ${EXFLAGS} ${GETARCH_FLAGS} ${GETARCH2_FLAGS} -I${PROJECT_SOURCE_DIR} | |||||
OUTPUT_VARIABLE GETARCH2_LOG | OUTPUT_VARIABLE GETARCH2_LOG | ||||
COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | COPY_FILE ${PROJECT_BINARY_DIR}/${GETARCH2_BIN} | ||||
) | ) | ||||
@@ -3,7 +3,7 @@ | |||||
## Description: Ported from OpenBLAS/Makefile.system | ## Description: Ported from OpenBLAS/Makefile.system | ||||
## | ## | ||||
set(NETLIB_LAPACK_DIR "${CMAKE_SOURCE_DIR}/lapack-netlib") | |||||
set(NETLIB_LAPACK_DIR "${PROJECT_SOURCE_DIR}/lapack-netlib") | |||||
# TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa | # TODO: Makefile.system detects Darwin (mac) and switches to clang here -hpa | ||||
# http://stackoverflow.com/questions/714100/os-detecting-makefile | # http://stackoverflow.com/questions/714100/os-detecting-makefile | ||||
@@ -78,7 +78,7 @@ else () | |||||
set(ONLY_CBLAS 0) | set(ONLY_CBLAS 0) | ||||
endif () | endif () | ||||
include("${CMAKE_SOURCE_DIR}/cmake/prebuild.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | |||||
if (NOT DEFINED NUM_THREADS) | if (NOT DEFINED NUM_THREADS) | ||||
set(NUM_THREADS ${NUM_CORES}) | set(NUM_THREADS ${NUM_CORES}) | ||||
@@ -124,17 +124,17 @@ set(OBJCOPY "${CROSS_SUFFIX}objcopy") | |||||
set(OBJCONV "${CROSS_SUFFIX}objconv") | set(OBJCONV "${CROSS_SUFFIX}objconv") | ||||
# OS dependent settings | # OS dependent settings | ||||
include("${CMAKE_SOURCE_DIR}/cmake/os.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/os.cmake") | |||||
# Architecture dependent settings | # Architecture dependent settings | ||||
include("${CMAKE_SOURCE_DIR}/cmake/arch.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/arch.cmake") | |||||
# C Compiler dependent settings | # C Compiler dependent settings | ||||
include("${CMAKE_SOURCE_DIR}/cmake/cc.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/cc.cmake") | |||||
if (NOT NOFORTRAN) | if (NOT NOFORTRAN) | ||||
# Fortran Compiler dependent settings | # Fortran Compiler dependent settings | ||||
include("${CMAKE_SOURCE_DIR}/cmake/fc.cmake") | |||||
include("${PROJECT_SOURCE_DIR}/cmake/fc.cmake") | |||||
endif () | endif () | ||||
if (BINARY64) | if (BINARY64) | ||||
@@ -247,10 +247,10 @@ if (NOT DEFINED SYMBOLSUFFIX) | |||||
set(SYMBOLSUFFIX "") | set(SYMBOLSUFFIX "") | ||||
endif () | endif () | ||||
set(KERNELDIR "${CMAKE_SOURCE_DIR}/kernel/${ARCH}") | |||||
set(KERNELDIR "${PROJECT_SOURCE_DIR}/kernel/${ARCH}") | |||||
# TODO: nead to convert these Makefiles | # TODO: nead to convert these Makefiles | ||||
# include ${CMAKE_SOURCE_DIR}/cmake/${ARCH}.cmake | |||||
# include ${PROJECT_SOURCE_DIR}/cmake/${ARCH}.cmake | |||||
if (${CORE} STREQUAL "PPC440") | if (${CORE} STREQUAL "PPC440") | ||||
set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") | set(CCOMMON_OPT "${CCOMMON_OPT} -DALLOC_QALLOC") | ||||
@@ -410,8 +410,8 @@ set(LIBDEFNAME "${LIBNAME}.${LIBSUFFIX}.def") | |||||
set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") | set(LIBEXPNAME "${LIBNAME}.${LIBSUFFIX}.exp") | ||||
set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") | set(LIBZIPNAME "${LIBNAME}.${LIBSUFFIX}.zip") | ||||
set(LIBS "${CMAKE_SOURCE_DIR}/${LIBNAME}") | |||||
set(LIBS_P "${CMAKE_SOURCE_DIR}/${LIBNAME_P}") | |||||
set(LIBS "${PROJECT_SOURCE_DIR}/${LIBNAME}") | |||||
set(LIBS_P "${PROJECT_SOURCE_DIR}/${LIBNAME_P}") | |||||
set(LIB_COMPONENTS BLAS) | set(LIB_COMPONENTS BLAS) | ||||
@@ -93,7 +93,7 @@ extern "C" { | |||||
#include <sched.h> | #include <sched.h> | ||||
#endif | #endif | ||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) | |||||
#if defined(OS_DARWIN) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_ANDROID) | |||||
#include <sched.h> | #include <sched.h> | ||||
#endif | #endif | ||||
@@ -332,12 +332,20 @@ typedef int blasint; | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef POWER8 | |||||
#ifndef YIELDING | |||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | |||||
#endif | |||||
#endif | |||||
/* | |||||
#ifdef PILEDRIVER | #ifdef PILEDRIVER | ||||
#ifndef YIELDING | #ifndef YIELDING | ||||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | #define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | ||||
#endif | #endif | ||||
#endif | #endif | ||||
*/ | |||||
/* | /* | ||||
#ifdef STEAMROLLER | #ifdef STEAMROLLER | ||||
@@ -396,6 +404,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||||
#include "common_sparc.h" | #include "common_sparc.h" | ||||
#endif | #endif | ||||
#ifdef ARCH_MIPS | |||||
#include "common_mips.h" | |||||
#endif | |||||
#ifdef ARCH_MIPS64 | #ifdef ARCH_MIPS64 | ||||
#include "common_mips64.h" | #include "common_mips64.h" | ||||
#endif | #endif | ||||
@@ -408,10 +420,14 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||||
#include "common_arm64.h" | #include "common_arm64.h" | ||||
#endif | #endif | ||||
#ifdef ARCH_ZARCH | |||||
#include "common_zarch.h" | |||||
#endif | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#ifdef OS_WINDOWS | #ifdef OS_WINDOWS | ||||
typedef char env_var_t[MAX_PATH]; | typedef char env_var_t[MAX_PATH]; | ||||
#define readenv(p, n) GetEnvironmentVariable((n), (p), sizeof(p)) | |||||
#define readenv(p, n) GetEnvironmentVariable((LPCTSTR)(n), (LPTSTR)(p), sizeof(p)) | |||||
#else | #else | ||||
typedef char* env_var_t; | typedef char* env_var_t; | ||||
#define readenv(p, n) ((p)=getenv(n)) | #define readenv(p, n) ((p)=getenv(n)) | ||||
@@ -614,9 +630,14 @@ void gotoblas_profile_init(void); | |||||
void gotoblas_profile_quit(void); | void gotoblas_profile_quit(void); | ||||
#ifdef USE_OPENMP | #ifdef USE_OPENMP | ||||
#ifndef C_MSVC | |||||
int omp_in_parallel(void); | int omp_in_parallel(void); | ||||
int omp_get_num_procs(void); | int omp_get_num_procs(void); | ||||
#else | #else | ||||
__declspec(dllimport) int __cdecl omp_in_parallel(void); | |||||
__declspec(dllimport) int __cdecl omp_get_num_procs(void); | |||||
#endif | |||||
#else | |||||
#ifdef __ELF__ | #ifdef __ELF__ | ||||
int omp_in_parallel (void) __attribute__ ((weak)); | int omp_in_parallel (void) __attribute__ ((weak)); | ||||
int omp_get_num_procs(void) __attribute__ ((weak)); | int omp_get_num_procs(void) __attribute__ ((weak)); | ||||
@@ -727,6 +748,7 @@ typedef struct { | |||||
#endif | #endif | ||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#include "common_stackalloc.h" | |||||
#if 0 | #if 0 | ||||
#include "symcopy.h" | #include "symcopy.h" | ||||
#endif | #endif | ||||
@@ -105,7 +105,6 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
#define PROLOGUE \ | #define PROLOGUE \ | ||||
.arm ;\ | .arm ;\ | ||||
.global REALNAME ;\ | .global REALNAME ;\ | ||||
.func REALNAME ;\ | |||||
REALNAME: | REALNAME: | ||||
#define EPILOGUE | #define EPILOGUE | ||||
@@ -43,28 +43,39 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
static void __inline blas_lock(volatile BLASULONG *address){ | static void __inline blas_lock(volatile BLASULONG *address){ | ||||
long register ret; | |||||
BLASULONG ret; | |||||
do { | do { | ||||
while (*address) {YIELDING;}; | while (*address) {YIELDING;}; | ||||
__asm__ __volatile__( | __asm__ __volatile__( | ||||
"ldaxr %0, [%1] \n\t" | |||||
"stlxr w2, %2, [%1] \n\t" | |||||
"orr %0, %0, x2 \n\t" | |||||
: "=r"(ret) | |||||
: "r"(address), "r"(1l) | |||||
: "memory", "x2" | |||||
"mov x4, #1 \n\t" | |||||
"1: \n\t" | |||||
"ldaxr x2, [%1] \n\t" | |||||
"cbnz x2, 1b \n\t" | |||||
"2: \n\t" | |||||
"stxr w3, x4, [%1] \n\t" | |||||
"cbnz w3, 1b \n\t" | |||||
"mov %0, #0 \n\t" | |||||
: "=r"(ret), "=r"(address) | |||||
: "1"(address) | |||||
: "memory", "x2" , "x3", "x4" | |||||
); | ); | ||||
} while (ret); | } while (ret); | ||||
MB; | |||||
} | } | ||||
#define BLAS_LOCK_DEFINED | #define BLAS_LOCK_DEFINED | ||||
static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
return x / y; | return x / y; | ||||
} | } | ||||
@@ -110,7 +121,7 @@ REALNAME: | |||||
#define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
#if defined(CORTEXA57) | #if defined(CORTEXA57) | ||||
#define BUFFER_SIZE (40 << 20) | |||||
#define BUFFER_SIZE (20 << 20) | |||||
#else | #else | ||||
#define BUFFER_SIZE (16 << 20) | #define BUFFER_SIZE (16 << 20) | ||||
#endif | #endif | ||||
@@ -70,7 +70,7 @@ extern long int syscall (long int __sysno, ...); | |||||
static inline int my_mbind(void *addr, unsigned long len, int mode, | static inline int my_mbind(void *addr, unsigned long len, int mode, | ||||
unsigned long *nodemask, unsigned long maxnode, | unsigned long *nodemask, unsigned long maxnode, | ||||
unsigned flags) { | unsigned flags) { | ||||
#if defined (__LSB_VERSION__) | |||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH) | |||||
// So far, LSB (Linux Standard Base) don't support syscall(). | // So far, LSB (Linux Standard Base) don't support syscall(). | ||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | ||||
return 0; | return 0; | ||||
@@ -90,7 +90,7 @@ static inline int my_mbind(void *addr, unsigned long len, int mode, | |||||
} | } | ||||
static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { | static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { | ||||
#if defined (__LSB_VERSION__) | |||||
#if defined (__LSB_VERSION__) || defined(ARCH_ZARCH) | |||||
// So far, LSB (Linux Standard Base) don't support syscall(). | // So far, LSB (Linux Standard Base) don't support syscall(). | ||||
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | ||||
return 0; | return 0; | ||||
@@ -2193,7 +2193,7 @@ | |||||
#endif | #endif | ||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) | |||||
#if defined(ARCH_X86) || defined(ARCH_X86_64) || defined(ARCH_IA64) || defined(ARCH_MIPS64) || defined(ARCH_ARM64) | |||||
extern BLASLONG gemm_offset_a; | extern BLASLONG gemm_offset_a; | ||||
extern BLASLONG gemm_offset_b; | extern BLASLONG gemm_offset_b; | ||||
extern BLASLONG sgemm_p; | extern BLASLONG sgemm_p; | ||||
@@ -0,0 +1,108 @@ | |||||
/***************************************************************************** | |||||
Copyright (c) 2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written | |||||
permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
**********************************************************************************/ | |||||
#ifndef COMMON_MIPS | |||||
#define COMMON_MIPS | |||||
#define MB | |||||
#define WMB | |||||
#define INLINE inline | |||||
#define RETURN_BY_COMPLEX | |||||
#ifndef ASSEMBLER | |||||
static void INLINE blas_lock(volatile unsigned long *address){ | |||||
} | |||||
#define BLAS_LOCK_DEFINED | |||||
static inline unsigned int rpcc(void){ | |||||
unsigned long ret; | |||||
__asm__ __volatile__(".set push \n" | |||||
"rdhwr %0, $30 \n" | |||||
".set pop" : "=r"(ret) : : "memory"); | |||||
return ret; | |||||
} | |||||
#define RPCC_DEFINED | |||||
static inline int blas_quickdivide(blasint x, blasint y){ | |||||
return x / y; | |||||
} | |||||
#define GET_IMAGE(res) | |||||
#define GET_IMAGE_CANCEL | |||||
#endif | |||||
#ifndef F_INTERFACE | |||||
#define REALNAME ASMNAME | |||||
#else | |||||
#define REALNAME ASMFNAME | |||||
#endif | |||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||||
#define PROLOGUE \ | |||||
.arm ;\ | |||||
.global REALNAME ;\ | |||||
REALNAME: | |||||
#define EPILOGUE | |||||
#define PROFCODE | |||||
#endif | |||||
#define SEEK_ADDRESS | |||||
#ifndef PAGESIZE | |||||
#define PAGESIZE ( 4 << 10) | |||||
#endif | |||||
#define HUGE_PAGESIZE ( 4 << 20) | |||||
#define BUFFER_SIZE (16 << 20) | |||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | |||||
#ifndef MAP_ANONYMOUS | |||||
#define MAP_ANONYMOUS MAP_ANON | |||||
#endif | |||||
#endif |
@@ -102,7 +102,7 @@ static void INLINE blas_lock(volatile unsigned long *address){ | |||||
static inline unsigned int rpcc(void){ | static inline unsigned int rpcc(void){ | ||||
unsigned long ret; | unsigned long ret; | ||||
#if defined(LOONGSON3A) || defined(LOONGSON3B) | |||||
// unsigned long long tmp; | // unsigned long long tmp; | ||||
//__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | //__asm__ __volatile__("dmfc0 %0, $25, 1": "=r"(tmp):: "memory"); | ||||
//ret=tmp; | //ret=tmp; | ||||
@@ -111,17 +111,10 @@ static inline unsigned int rpcc(void){ | |||||
"rdhwr %0, $2\n" | "rdhwr %0, $2\n" | ||||
".set pop": "=r"(ret):: "memory"); | ".set pop": "=r"(ret):: "memory"); | ||||
#else | |||||
__asm__ __volatile__(".set push \n" | |||||
".set mips32r2\n" | |||||
"rdhwr %0, $30 \n" | |||||
".set pop" : "=r"(ret) : : "memory"); | |||||
#endif | |||||
return ret; | return ret; | ||||
} | } | ||||
#define RPCC_DEFINED | #define RPCC_DEFINED | ||||
#if defined(LOONGSON3A) || defined(LOONGSON3B) | |||||
#ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
#define WHEREAMI | #define WHEREAMI | ||||
static inline int WhereAmI(void){ | static inline int WhereAmI(void){ | ||||
@@ -134,7 +127,6 @@ static inline int WhereAmI(void){ | |||||
} | } | ||||
#endif | #endif | ||||
#endif | |||||
static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
return x / y; | return x / y; | ||||
@@ -39,8 +39,13 @@ | |||||
#ifndef COMMON_POWER | #ifndef COMMON_POWER | ||||
#define COMMON_POWER | #define COMMON_POWER | ||||
#if defined(POWER8) | |||||
#define MB __asm__ __volatile__ ("eieio":::"memory") | |||||
#define WMB __asm__ __volatile__ ("eieio":::"memory") | |||||
#else | |||||
#define MB __asm__ __volatile__ ("sync") | #define MB __asm__ __volatile__ ("sync") | ||||
#define WMB __asm__ __volatile__ ("sync") | #define WMB __asm__ __volatile__ ("sync") | ||||
#endif | |||||
#define INLINE inline | #define INLINE inline | ||||
@@ -236,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
#define HAVE_PREFETCH | #define HAVE_PREFETCH | ||||
#endif | #endif | ||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) | |||||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) | |||||
#define DCBT_ARG 0 | #define DCBT_ARG 0 | ||||
#else | #else | ||||
#define DCBT_ARG 8 | #define DCBT_ARG 8 | ||||
@@ -258,6 +263,13 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||||
#define L1_PREFETCH dcbtst | #define L1_PREFETCH dcbtst | ||||
#endif | #endif | ||||
#if defined(POWER8) | |||||
#define L1_DUALFETCH | |||||
#define L1_PREFETCHSIZE (16 + 128 * 100) | |||||
#define L1_PREFETCH dcbtst | |||||
#endif | |||||
# | |||||
#ifndef L1_PREFETCH | #ifndef L1_PREFETCH | ||||
#define L1_PREFETCH dcbt | #define L1_PREFETCH dcbt | ||||
#endif | #endif | ||||
@@ -790,6 +802,8 @@ Lmcount$lazy_ptr: | |||||
#define BUFFER_SIZE ( 2 << 20) | #define BUFFER_SIZE ( 2 << 20) | ||||
#elif defined(PPC440FP2) | #elif defined(PPC440FP2) | ||||
#define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
#elif defined(POWER8) | |||||
#define BUFFER_SIZE ( 64 << 20) | |||||
#else | #else | ||||
#define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
#endif | #endif | ||||
@@ -0,0 +1,73 @@ | |||||
/******************************************************************************* | |||||
Copyright (c) 2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*******************************************************************************/ | |||||
#define STACK_ALLOC_PROTECT | |||||
#ifdef STACK_ALLOC_PROTECT | |||||
// Try to detect stack smashing | |||||
#include <assert.h> | |||||
#define STACK_ALLOC_PROTECT_SET volatile int stack_check = 0x7fc01234; | |||||
#define STACK_ALLOC_PROTECT_CHECK assert(stack_check == 0x7fc01234); | |||||
#else | |||||
#define STACK_ALLOC_PROTECT_SET | |||||
#define STACK_ALLOC_PROTECT_CHECK | |||||
#endif | |||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
/* | |||||
* Allocate a buffer on the stack if the size is smaller than MAX_STACK_ALLOC. | |||||
* Stack allocation is much faster than blas_memory_alloc or malloc, particularly | |||||
* when OpenBLAS is used from a multi-threaded application. | |||||
* SIZE must be carefully chosen to be: | |||||
* - as small as possible to maximize the number of stack allocation | |||||
* - large enough to support all architectures and kernel | |||||
* Chosing a too small SIZE will lead to a stack smashing. | |||||
*/ | |||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) \ | |||||
/* make it volatile because some function (ex: dgemv_n.S) */ \ | |||||
/* do not restore all register */ \ | |||||
volatile int stack_alloc_size = SIZE; \ | |||||
if(stack_alloc_size > MAX_STACK_ALLOC / sizeof(TYPE)) \ | |||||
stack_alloc_size = 0; \ | |||||
STACK_ALLOC_PROTECT_SET \ | |||||
TYPE stack_buffer[stack_alloc_size] __attribute__((aligned(0x20))); \ | |||||
BUFFER = stack_alloc_size ? stack_buffer : (TYPE *)blas_memory_alloc(1); | |||||
#else | |||||
//Original OpenBLAS/GotoBLAS codes. | |||||
#define STACK_ALLOC(SIZE, TYPE, BUFFER) BUFFER = (TYPE *)blas_memory_alloc(1) | |||||
#endif | |||||
#if defined(MAX_STACK_ALLOC) && MAX_STACK_ALLOC > 0 | |||||
#define STACK_FREE(BUFFER) \ | |||||
STACK_ALLOC_PROTECT_CHECK \ | |||||
if(!stack_alloc_size) \ | |||||
blas_memory_free(BUFFER); | |||||
#else | |||||
#define STACK_FREE(BUFFER) blas_memory_free(BUFFER) | |||||
#endif | |||||
@@ -41,6 +41,10 @@ | |||||
#ifndef ASSEMBLER | #ifndef ASSEMBLER | ||||
#ifdef C_MSVC | |||||
#include <intrin.h> | |||||
#endif | |||||
#define MB | #define MB | ||||
#define WMB | #define WMB | ||||
@@ -58,7 +62,7 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
#if defined(_MSC_VER) && !defined(__clang__) | #if defined(_MSC_VER) && !defined(__clang__) | ||||
// use intrinsic instead of inline assembly | // use intrinsic instead of inline assembly | ||||
ret = _InterlockedExchange(address, 1); | |||||
ret = _InterlockedExchange((volatile LONG *)address, 1); | |||||
// inline assembly | // inline assembly | ||||
/*__asm { | /*__asm { | ||||
mov eax, address | mov eax, address | ||||
@@ -170,12 +174,13 @@ static __inline int blas_quickdivide(unsigned int x, unsigned int y){ | |||||
if (y <= 1) return x; | if (y <= 1) return x; | ||||
y = blas_quick_divide_table[y]; | |||||
#if defined(_MSC_VER) && !defined(__clang__) | #if defined(_MSC_VER) && !defined(__clang__) | ||||
(void*)result; | |||||
return x*y; | |||||
result = x/y; | |||||
return result; | |||||
#else | #else | ||||
y = blas_quick_divide_table[y]; | |||||
__asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | __asm__ __volatile__ ("mull %0" :"=d" (result) :"a"(x), "0" (y)); | ||||
return result; | return result; | ||||
@@ -396,7 +396,7 @@ REALNAME: | |||||
#define PROFCODE | #define PROFCODE | ||||
#define EPILOGUE .end REALNAME | |||||
#define EPILOGUE .end | |||||
#endif | #endif | ||||
#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI) | #if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(__ELF__) || defined(C_PGI) | ||||
@@ -0,0 +1,140 @@ | |||||
/***************************************************************************** | |||||
Copyright (c) 2011-2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written | |||||
permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
**********************************************************************************/ | |||||
#ifndef COMMON_ZARCH | |||||
#define COMMON_ZARCH | |||||
#define MB | |||||
//__asm__ __volatile__ ("dmb ish" : : : "memory") | |||||
#define WMB | |||||
//__asm__ __volatile__ ("dmb ishst" : : : "memory") | |||||
#define INLINE inline | |||||
#define RETURN_BY_COMPLEX | |||||
#ifndef ASSEMBLER | |||||
/* | |||||
static void __inline blas_lock(volatile BLASULONG *address){ | |||||
BLASULONG ret; | |||||
do { | |||||
while (*address) {YIELDING;}; | |||||
__asm__ __volatile__( | |||||
"mov x4, #1 \n\t" | |||||
"1: \n\t" | |||||
"ldaxr x2, [%1] \n\t" | |||||
"cbnz x2, 1b \n\t" | |||||
"2: \n\t" | |||||
"stxr w3, x4, [%1] \n\t" | |||||
"cbnz w3, 1b \n\t" | |||||
"mov %0, #0 \n\t" | |||||
: "=r"(ret), "=r"(address) | |||||
: "1"(address) | |||||
: "memory", "x2" , "x3", "x4" | |||||
); | |||||
} while (ret); | |||||
} | |||||
*/ | |||||
//#define BLAS_LOCK_DEFINED | |||||
static inline int blas_quickdivide(blasint x, blasint y){ | |||||
return x / y; | |||||
} | |||||
#if defined(DOUBLE) | |||||
#define GET_IMAGE(res) __asm__ __volatile__("str d1, %0" : "=m"(res) : : "memory") | |||||
#else | |||||
#define GET_IMAGE(res) __asm__ __volatile__("str s1, %0" : "=m"(res) : : "memory") | |||||
#endif | |||||
#define GET_IMAGE_CANCEL | |||||
#endif | |||||
#ifndef F_INTERFACE | |||||
#define REALNAME ASMNAME | |||||
#else | |||||
#define REALNAME ASMFNAME | |||||
#endif | |||||
#if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||||
#define PROLOGUE \ | |||||
.text ;\ | |||||
.align 256 ;\ | |||||
.global REALNAME ;\ | |||||
.type REALNAME, %function ;\ | |||||
REALNAME: | |||||
#define EPILOGUE | |||||
#define PROFCODE | |||||
#endif | |||||
#define SEEK_ADDRESS | |||||
#ifndef PAGESIZE | |||||
#define PAGESIZE ( 4 << 10) | |||||
#endif | |||||
#define HUGE_PAGESIZE ( 4 << 20) | |||||
#if defined(CORTEXA57) | |||||
#define BUFFER_SIZE (20 << 20) | |||||
#else | |||||
#define BUFFER_SIZE (16 << 20) | |||||
#endif | |||||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | |||||
#ifndef MAP_ANONYMOUS | |||||
#define MAP_ANONYMOUS MAP_ANON | |||||
#endif | |||||
#endif | |||||
@@ -74,7 +74,7 @@ int get_feature(char *search) | |||||
fclose(infile); | fclose(infile); | ||||
if( p == NULL ) return; | |||||
if( p == NULL ) return 0; | |||||
t = strtok(p," "); | t = strtok(p," "); | ||||
while( t = strtok(NULL," ")) | while( t = strtok(NULL," ")) | ||||
@@ -115,6 +115,9 @@ int detect(void) | |||||
if (strstr(p, "0xc0f")) { | if (strstr(p, "0xc0f")) { | ||||
return CPU_CORTEXA15; | return CPU_CORTEXA15; | ||||
} | } | ||||
if (strstr(p, "0xd07")) { | |||||
return CPU_ARMV7; //ARMV8 on 32-bit | |||||
} | |||||
} | } | ||||
@@ -158,6 +161,27 @@ int detect(void) | |||||
} | } | ||||
p = (char *) NULL ; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) | |||||
{ | |||||
if ((!strncmp("CPU architecture", buffer, 16))) | |||||
{ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if(p != NULL) { | |||||
if (strstr(p, "8")) { | |||||
return CPU_ARMV7; //ARMV8 on 32-bit | |||||
} | |||||
} | |||||
#endif | #endif | ||||
return CPU_UNKNOWN; | return CPU_UNKNOWN; | ||||
@@ -30,17 +30,26 @@ | |||||
#define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
#define CPU_ARMV8 1 | #define CPU_ARMV8 1 | ||||
#define CPU_CORTEXA57 2 | #define CPU_CORTEXA57 2 | ||||
#define CPU_VULCAN 3 | |||||
#define CPU_THUNDERX 4 | |||||
#define CPU_THUNDERX2T99 5 | |||||
static char *cpuname[] = { | static char *cpuname[] = { | ||||
"UNKNOWN", | "UNKNOWN", | ||||
"ARMV8" , | "ARMV8" , | ||||
"CORTEXA57" | |||||
"CORTEXA57", | |||||
"VULCAN", | |||||
"THUNDERX", | |||||
"THUNDERX2T99" | |||||
}; | }; | ||||
static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
"unknown", | "unknown", | ||||
"armv8" , | "armv8" , | ||||
"cortexa57" | |||||
"cortexa57", | |||||
"vulcan", | |||||
"thunderx", | |||||
"thunderx2t99" | |||||
}; | }; | ||||
int get_feature(char *search) | int get_feature(char *search) | ||||
@@ -85,25 +94,34 @@ int detect(void) | |||||
#ifdef linux | #ifdef linux | ||||
FILE *infile; | FILE *infile; | ||||
char buffer[512], *p; | |||||
p = (char *) NULL ; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) | |||||
{ | |||||
char buffer[512], *p, *cpu_part = NULL, *cpu_implementer = NULL; | |||||
p = (char *) NULL ; | |||||
if (!strncmp("CPU part", buffer, 8)) | |||||
{ | |||||
p = strchr(buffer, ':') + 2; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) { | |||||
if ((cpu_part != NULL) && (cpu_implementer != NULL)) { | |||||
break; | break; | ||||
} | } | ||||
if ((cpu_part == NULL) && !strncmp("CPU part", buffer, 8)) { | |||||
cpu_part = strchr(buffer, ':') + 2; | |||||
cpu_part = strdup(cpu_part); | |||||
} else if ((cpu_implementer == NULL) && !strncmp("CPU implementer", buffer, 15)) { | |||||
cpu_implementer = strchr(buffer, ':') + 2; | |||||
cpu_implementer = strdup(cpu_implementer); | |||||
} | |||||
} | } | ||||
fclose(infile); | fclose(infile); | ||||
if(p != NULL) { | |||||
if (strstr(p, "0xd07")) { | |||||
return CPU_CORTEXA57; | |||||
} | |||||
if(cpu_part != NULL && cpu_implementer != NULL) { | |||||
if (strstr(cpu_part, "0xd07") && strstr(cpu_implementer, "0x41")) | |||||
return CPU_CORTEXA57; | |||||
else if (strstr(cpu_part, "0x516") && strstr(cpu_implementer, "0x42")) | |||||
return CPU_VULCAN; | |||||
else if (strstr(cpu_part, "0x0a1") && strstr(cpu_implementer, "0x43")) | |||||
return CPU_THUNDERX; | |||||
else if (strstr(cpu_part, "0xFFF") && strstr(cpu_implementer, "0x43")) /* TODO */ | |||||
return CPU_THUNDERX2T99; | |||||
} | } | ||||
p = (char *) NULL ; | p = (char *) NULL ; | ||||
@@ -176,6 +194,28 @@ void get_cpuconfig(void) | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
break; | break; | ||||
case CPU_VULCAN: | |||||
printf("#define VULCAN \n"); | |||||
printf("#define HAVE_VFP \n"); | |||||
printf("#define HAVE_VFPV3 \n"); | |||||
printf("#define HAVE_NEON \n"); | |||||
printf("#define HAVE_VFPV4 \n"); | |||||
printf("#define L1_CODE_SIZE 32768 \n"); | |||||
printf("#define L1_CODE_LINESIZE 64 \n"); | |||||
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||||
printf("#define L1_DATA_SIZE 32768 \n"); | |||||
printf("#define L1_DATA_LINESIZE 64 \n"); | |||||
printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||||
printf("#define L2_SIZE 262144 \n"); | |||||
printf("#define L2_LINESIZE 64 \n"); | |||||
printf("#define L2_ASSOCIATIVE 8 \n"); | |||||
printf("#define L3_SIZE 33554432 \n"); | |||||
printf("#define L3_LINESIZE 64 \n"); | |||||
printf("#define L3_ASSOCIATIVE 32 \n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||||
printf("#define DTB_SIZE 4096 \n"); | |||||
break; | |||||
case CPU_CORTEXA57: | case CPU_CORTEXA57: | ||||
printf("#define CORTEXA57\n"); | printf("#define CORTEXA57\n"); | ||||
printf("#define HAVE_VFP\n"); | printf("#define HAVE_VFP\n"); | ||||
@@ -191,6 +231,42 @@ void get_cpuconfig(void) | |||||
printf("#define L2_SIZE 2097152\n"); | printf("#define L2_SIZE 2097152\n"); | ||||
printf("#define L2_LINESIZE 64\n"); | printf("#define L2_LINESIZE 64\n"); | ||||
printf("#define L2_ASSOCIATIVE 16\n"); | printf("#define L2_ASSOCIATIVE 16\n"); | ||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
break; | |||||
case CPU_THUNDERX: | |||||
printf("#define ARMV8\n"); | |||||
printf("#define THUNDERX\n"); | |||||
printf("#define L1_DATA_SIZE 32768\n"); | |||||
printf("#define L1_DATA_LINESIZE 128\n"); | |||||
printf("#define L2_SIZE 16777216\n"); | |||||
printf("#define L2_LINESIZE 128\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 16\n"); | |||||
break; | |||||
case CPU_THUNDERX2T99: | |||||
printf("#define VULCAN \n"); | |||||
printf("#define HAVE_VFP \n"); | |||||
printf("#define HAVE_VFPV3 \n"); | |||||
printf("#define HAVE_NEON \n"); | |||||
printf("#define HAVE_VFPV4 \n"); | |||||
printf("#define L1_CODE_SIZE 32768 \n"); | |||||
printf("#define L1_CODE_LINESIZE 64 \n"); | |||||
printf("#define L1_CODE_ASSOCIATIVE 8 \n"); | |||||
printf("#define L1_DATA_SIZE 32768 \n"); | |||||
printf("#define L1_DATA_LINESIZE 64 \n"); | |||||
printf("#define L1_DATA_ASSOCIATIVE 8 \n"); | |||||
printf("#define L2_SIZE 262144 \n"); | |||||
printf("#define L2_LINESIZE 64 \n"); | |||||
printf("#define L2_ASSOCIATIVE 8 \n"); | |||||
printf("#define L3_SIZE 33554432 \n"); | |||||
printf("#define L3_LINESIZE 64 \n"); | |||||
printf("#define L3_ASSOCIATIVE 32 \n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||||
printf("#define DTB_SIZE 4096 \n"); | |||||
break; | break; | ||||
} | } | ||||
} | } | ||||
@@ -71,15 +71,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
/*********************************************************************/ | /*********************************************************************/ | ||||
#define CPU_UNKNOWN 0 | #define CPU_UNKNOWN 0 | ||||
#define CPU_SICORTEX 1 | |||||
#define CPU_LOONGSON3A 2 | |||||
#define CPU_LOONGSON3B 3 | |||||
#define CPU_P5600 1 | |||||
static char *cpuname[] = { | static char *cpuname[] = { | ||||
"UNKOWN", | "UNKOWN", | ||||
"SICORTEX", | |||||
"LOONGSON3A", | |||||
"LOONGSON3B" | |||||
"P5600" | |||||
}; | }; | ||||
int detect(void){ | int detect(void){ | ||||
@@ -120,7 +116,7 @@ int detect(void){ | |||||
if (strstr(p, "loongson3a")) | if (strstr(p, "loongson3a")) | ||||
return CPU_LOONGSON3A; | return CPU_LOONGSON3A; | ||||
}else{ | }else{ | ||||
return CPU_SICORTEX; | |||||
return CPU_UNKNOWN; | |||||
} | } | ||||
} | } | ||||
//Check model name for Loongson3 | //Check model name for Loongson3 | ||||
@@ -149,64 +145,40 @@ char *get_corename(void){ | |||||
} | } | ||||
void get_architecture(void){ | void get_architecture(void){ | ||||
printf("MIPS64"); | |||||
printf("MIPS"); | |||||
} | } | ||||
void get_subarchitecture(void){ | void get_subarchitecture(void){ | ||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("LOONGSON3A"); | |||||
}else if(detect()==CPU_LOONGSON3B){ | |||||
printf("LOONGSON3B"); | |||||
if(detect()==CPU_P5600){ | |||||
printf("P5600"); | |||||
}else{ | }else{ | ||||
printf("SICORTEX"); | |||||
printf("UNKNOWN"); | |||||
} | } | ||||
} | } | ||||
void get_subdirname(void){ | void get_subdirname(void){ | ||||
printf("mips64"); | |||||
printf("mips"); | |||||
} | } | ||||
void get_cpuconfig(void){ | void get_cpuconfig(void){ | ||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("#define LOONGSON3A\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
}else if(detect()==CPU_LOONGSON3B){ | |||||
printf("#define LOONGSON3B\n"); | |||||
if(detect()==CPU_P5600){ | |||||
printf("#define P5600\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | printf("#define L1_DATA_SIZE 65536\n"); | ||||
printf("#define L1_DATA_LINESIZE 32\n"); | printf("#define L1_DATA_LINESIZE 32\n"); | ||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_SIZE 1048576\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | printf("#define L2_LINESIZE 32\n"); | ||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
}else{ | |||||
printf("#define SICORTEX\n"); | |||||
printf("#define L1_DATA_SIZE 32768\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 32\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 8\n"); | printf("#define L2_ASSOCIATIVE 8\n"); | ||||
}else{ | |||||
printf("#define UNKNOWN\n"); | |||||
} | } | ||||
} | } | ||||
void get_libname(void){ | void get_libname(void){ | ||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("loongson3a\n"); | |||||
}else if(detect()==CPU_LOONGSON3B) { | |||||
printf("loongson3b\n"); | |||||
if(detect()==CPU_P5600) { | |||||
printf("p5600\n"); | |||||
}else{ | }else{ | ||||
#ifdef __mips64 | |||||
printf("mips64\n"); | |||||
#else | |||||
printf("mips32\n"); | |||||
#endif | |||||
printf("mips\n"); | |||||
} | } | ||||
} | } |
@@ -0,0 +1,238 @@ | |||||
/***************************************************************************** | |||||
Copyright (c) 2011-2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written | |||||
permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
**********************************************************************************/ | |||||
/*********************************************************************/ | |||||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
/* All rights reserved. */ | |||||
/* */ | |||||
/* Redistribution and use in source and binary forms, with or */ | |||||
/* without modification, are permitted provided that the following */ | |||||
/* conditions are met: */ | |||||
/* */ | |||||
/* 1. Redistributions of source code must retain the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer. */ | |||||
/* */ | |||||
/* 2. Redistributions in binary form must reproduce the above */ | |||||
/* copyright notice, this list of conditions and the following */ | |||||
/* disclaimer in the documentation and/or other materials */ | |||||
/* provided with the distribution. */ | |||||
/* */ | |||||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||||
/* */ | |||||
/* The views and conclusions contained in the software and */ | |||||
/* documentation are those of the authors and should not be */ | |||||
/* interpreted as representing official policies, either expressed */ | |||||
/* or implied, of The University of Texas at Austin. */ | |||||
/*********************************************************************/ | |||||
#define CPU_UNKNOWN 0 | |||||
#define CPU_SICORTEX 1 | |||||
#define CPU_LOONGSON3A 2 | |||||
#define CPU_LOONGSON3B 3 | |||||
#define CPU_I6400 4 | |||||
#define CPU_P6600 5 | |||||
static char *cpuname[] = { | |||||
"UNKOWN", | |||||
"SICORTEX", | |||||
"LOONGSON3A", | |||||
"LOONGSON3B", | |||||
"I6400", | |||||
"P6600" | |||||
}; | |||||
int detect(void){ | |||||
#ifdef linux | |||||
FILE *infile; | |||||
char buffer[512], *p; | |||||
p = (char *)NULL; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)){ | |||||
if (!strncmp("cpu", buffer, 3)){ | |||||
p = strchr(buffer, ':') + 2; | |||||
#if 0 | |||||
fprintf(stderr, "%s\n", p); | |||||
#endif | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if(p != NULL){ | |||||
if (strstr(p, "Loongson-3A")){ | |||||
return CPU_LOONGSON3A; | |||||
}else if(strstr(p, "Loongson-3B")){ | |||||
return CPU_LOONGSON3B; | |||||
}else if (strstr(p, "Loongson-3")){ | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
p = (char *)NULL; | |||||
while (fgets(buffer, sizeof(buffer), infile)){ | |||||
if (!strncmp("system type", buffer, 11)){ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if (strstr(p, "loongson3a")) | |||||
return CPU_LOONGSON3A; | |||||
}else{ | |||||
return CPU_SICORTEX; | |||||
} | |||||
} | |||||
//Check model name for Loongson3 | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
p = (char *)NULL; | |||||
while (fgets(buffer, sizeof(buffer), infile)){ | |||||
if (!strncmp("model name", buffer, 10)){ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if(p != NULL){ | |||||
if (strstr(p, "Loongson-3A")){ | |||||
return CPU_LOONGSON3A; | |||||
}else if(strstr(p, "Loongson-3B")){ | |||||
return CPU_LOONGSON3B; | |||||
} | |||||
} | |||||
#endif | |||||
return CPU_UNKNOWN; | |||||
} | |||||
char *get_corename(void){ | |||||
return cpuname[detect()]; | |||||
} | |||||
void get_architecture(void){ | |||||
printf("MIPS64"); | |||||
} | |||||
void get_subarchitecture(void){ | |||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("LOONGSON3A"); | |||||
}else if(detect()==CPU_LOONGSON3B){ | |||||
printf("LOONGSON3B"); | |||||
}else if(detect()==CPU_I6400){ | |||||
printf("I6400"); | |||||
}else if(detect()==CPU_P6600){ | |||||
printf("P6600"); | |||||
}else{ | |||||
printf("SICORTEX"); | |||||
} | |||||
} | |||||
void get_subdirname(void){ | |||||
printf("mips64"); | |||||
} | |||||
void get_cpuconfig(void){ | |||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("#define LOONGSON3A\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
}else if(detect()==CPU_LOONGSON3B){ | |||||
printf("#define LOONGSON3B\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
}else if(detect()==CPU_I6400){ | |||||
printf("#define I6400\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 1048576\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 8\n"); | |||||
}else if(detect()==CPU_P6600){ | |||||
printf("#define P6600\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 1048576\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 8\n"); | |||||
}else{ | |||||
printf("#define SICORTEX\n"); | |||||
printf("#define L1_DATA_SIZE 32768\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 32\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 8\n"); | |||||
} | |||||
} | |||||
void get_libname(void){ | |||||
if(detect()==CPU_LOONGSON3A) { | |||||
printf("loongson3a\n"); | |||||
}else if(detect()==CPU_LOONGSON3B) { | |||||
printf("loongson3b\n"); | |||||
}else if(detect()==CPU_I6400) { | |||||
printf("i6400\n"); | |||||
}else if(detect()==CPU_P6600) { | |||||
printf("p6600\n"); | |||||
}else{ | |||||
printf("mips64\n"); | |||||
} | |||||
} |
@@ -55,6 +55,7 @@ | |||||
#define CPUTYPE_POWER6 5 | #define CPUTYPE_POWER6 5 | ||||
#define CPUTYPE_CELL 6 | #define CPUTYPE_CELL 6 | ||||
#define CPUTYPE_PPCG4 7 | #define CPUTYPE_PPCG4 7 | ||||
#define CPUTYPE_POWER8 8 | |||||
char *cpuname[] = { | char *cpuname[] = { | ||||
"UNKNOWN", | "UNKNOWN", | ||||
@@ -65,6 +66,7 @@ char *cpuname[] = { | |||||
"POWER6", | "POWER6", | ||||
"CELL", | "CELL", | ||||
"PPCG4", | "PPCG4", | ||||
"POWER8" | |||||
}; | }; | ||||
char *lowercpuname[] = { | char *lowercpuname[] = { | ||||
@@ -76,6 +78,7 @@ char *lowercpuname[] = { | |||||
"power6", | "power6", | ||||
"cell", | "cell", | ||||
"ppcg4", | "ppcg4", | ||||
"power8" | |||||
}; | }; | ||||
char *corename[] = { | char *corename[] = { | ||||
@@ -87,6 +90,7 @@ char *corename[] = { | |||||
"POWER6", | "POWER6", | ||||
"CELL", | "CELL", | ||||
"PPCG4", | "PPCG4", | ||||
"POWER8" | |||||
}; | }; | ||||
int detect(void){ | int detect(void){ | ||||
@@ -115,7 +119,7 @@ int detect(void){ | |||||
if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | if (!strncasecmp(p, "POWER5", 6)) return CPUTYPE_POWER5; | ||||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | ||||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | ||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER6; | |||||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | ||||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | ||||
@@ -636,6 +636,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||||
LD1.associative = 8; | LD1.associative = 8; | ||||
LD1.linesize = 64; | LD1.linesize = 64; | ||||
break; | break; | ||||
case 0x63 : | |||||
DTB.size = 2048; | |||||
DTB.associative = 4; | |||||
DTB.linesize = 32; | |||||
LDTB.size = 4096; | |||||
LDTB.associative= 4; | |||||
LDTB.linesize = 32; | |||||
case 0x66 : | case 0x66 : | ||||
LD1.size = 8; | LD1.size = 8; | ||||
LD1.associative = 4; | LD1.associative = 4; | ||||
@@ -667,6 +674,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | |||||
LC1.size = 64; | LC1.size = 64; | ||||
LC1.associative = 8; | LC1.associative = 8; | ||||
break; | break; | ||||
case 0x76 : | |||||
ITB.size = 2048; | |||||
ITB.associative = 0; | |||||
ITB.linesize = 8; | |||||
LITB.size = 4096; | |||||
LITB.associative= 0; | |||||
LITB.linesize = 8; | |||||
case 0x77 : | case 0x77 : | ||||
LC1.size = 16; | LC1.size = 16; | ||||
LC1.associative = 4; | LC1.associative = 4; | ||||
@@ -1110,6 +1124,9 @@ int get_cpuname(void){ | |||||
break; | break; | ||||
case 3: | case 3: | ||||
switch (model) { | switch (model) { | ||||
case 7: | |||||
// Bay Trail | |||||
return CPUTYPE_ATOM; | |||||
case 10: | case 10: | ||||
case 14: | case 14: | ||||
// Ivy Bridge | // Ivy Bridge | ||||
@@ -1172,6 +1189,11 @@ int get_cpuname(void){ | |||||
#endif | #endif | ||||
else | else | ||||
return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
case 12: | |||||
// Braswell | |||||
case 13: | |||||
// Avoton | |||||
return CPUTYPE_NEHALEM; | |||||
} | } | ||||
break; | break; | ||||
case 5: | case 5: | ||||
@@ -1197,8 +1219,35 @@ int get_cpuname(void){ | |||||
#endif | #endif | ||||
else | else | ||||
return CPUTYPE_NEHALEM; | return CPUTYPE_NEHALEM; | ||||
case 7: | |||||
// Xeon Phi Knights Landing | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
case 12: | |||||
// Apollo Lake | |||||
return CPUTYPE_NEHALEM; | |||||
} | } | ||||
break; | break; | ||||
case 9: | |||||
case 8: | |||||
switch (model) { | |||||
case 14: // Kaby Lake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CPUTYPE_HASWELL; | |||||
#else | |||||
return CPUTYPE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CPUTYPE_NEHALEM; | |||||
} | |||||
break; | |||||
} | } | ||||
break; | break; | ||||
case 0x7: | case 0x7: | ||||
@@ -1229,6 +1278,7 @@ int get_cpuname(void){ | |||||
case 2: | case 2: | ||||
return CPUTYPE_OPTERON; | return CPUTYPE_OPTERON; | ||||
case 1: | case 1: | ||||
case 3: | |||||
case 10: | case 10: | ||||
return CPUTYPE_BARCELONA; | return CPUTYPE_BARCELONA; | ||||
case 6: | case 6: | ||||
@@ -1245,6 +1295,11 @@ int get_cpuname(void){ | |||||
return CPUTYPE_PILEDRIVER; | return CPUTYPE_PILEDRIVER; | ||||
else | else | ||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | return CPUTYPE_BARCELONA; //OS don't support AVX. | ||||
case 5: // New EXCAVATOR CPUS | |||||
if(support_avx()) | |||||
return CPUTYPE_EXCAVATOR; | |||||
else | |||||
return CPUTYPE_BARCELONA; //OS don't support AVX. | |||||
case 0: | case 0: | ||||
switch(exmodel){ | switch(exmodel){ | ||||
case 1: //AMD Trinity | case 1: //AMD Trinity | ||||
@@ -1674,6 +1729,11 @@ int get_coretype(void){ | |||||
#endif | #endif | ||||
else | else | ||||
return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
case 12: | |||||
// Braswell | |||||
case 13: | |||||
// Avoton | |||||
return CORE_NEHALEM; | |||||
} | } | ||||
break; | break; | ||||
case 5: | case 5: | ||||
@@ -1699,8 +1759,32 @@ int get_coretype(void){ | |||||
#endif | #endif | ||||
else | else | ||||
return CORE_NEHALEM; | return CORE_NEHALEM; | ||||
} | |||||
case 7: | |||||
// Phi Knights Landing | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
case 12: | |||||
// Apollo Lake | |||||
return CORE_NEHALEM; | |||||
} | |||||
break; | break; | ||||
case 9: | |||||
case 8: | |||||
if (model == 14) // Kaby Lake | |||||
if(support_avx()) | |||||
#ifndef NO_AVX2 | |||||
return CORE_HASWELL; | |||||
#else | |||||
return CORE_SANDYBRIDGE; | |||||
#endif | |||||
else | |||||
return CORE_NEHALEM; | |||||
} | } | ||||
break; | break; | ||||
@@ -1730,7 +1814,11 @@ int get_coretype(void){ | |||||
return CORE_PILEDRIVER; | return CORE_PILEDRIVER; | ||||
else | else | ||||
return CORE_BARCELONA; //OS don't support AVX. | return CORE_BARCELONA; //OS don't support AVX. | ||||
case 5: // New EXCAVATOR | |||||
if(support_avx()) | |||||
return CORE_EXCAVATOR; | |||||
else | |||||
return CORE_BARCELONA; //OS don't support AVX. | |||||
case 0: | case 0: | ||||
switch(exmodel){ | switch(exmodel){ | ||||
case 1: //AMD Trinity | case 1: //AMD Trinity | ||||
@@ -0,0 +1,93 @@ | |||||
/************************************************************************** | |||||
Copyright (c) 2016, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <string.h> | |||||
#define CPU_GENERIC 0 | |||||
#define CPU_Z13 1 | |||||
static char *cpuname[] = { | |||||
"ZARCH_GENERIC", | |||||
"Z13" | |||||
}; | |||||
static char *cpuname_lower[] = { | |||||
"zarch_generic", | |||||
"z13" | |||||
}; | |||||
int detect(void) | |||||
{ | |||||
// return CPU_GENERIC; | |||||
return CPU_Z13; | |||||
} | |||||
void get_libname(void) | |||||
{ | |||||
int d = detect(); | |||||
printf("%s", cpuname_lower[d]); | |||||
} | |||||
char *get_corename(void) | |||||
{ | |||||
return cpuname[detect()]; | |||||
} | |||||
void get_architecture(void) | |||||
{ | |||||
printf("ZARCH"); | |||||
} | |||||
void get_subarchitecture(void) | |||||
{ | |||||
int d = detect(); | |||||
printf("%s", cpuname[d]); | |||||
} | |||||
void get_subdirname(void) | |||||
{ | |||||
printf("zarch"); | |||||
} | |||||
void get_cpuconfig(void) | |||||
{ | |||||
int d = detect(); | |||||
switch (d){ | |||||
case CPU_GENERIC: | |||||
printf("#define ZARCH_GENERIC\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
break; | |||||
case CPU_Z13: | |||||
printf("#define Z13\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
break; | |||||
} | |||||
} |
@@ -105,12 +105,16 @@ ARCH_X86_64 | |||||
ARCH_POWER | ARCH_POWER | ||||
#endif | #endif | ||||
#if defined(__s390x__) || defined(__zarch__) | |||||
ARCH_ZARCH | |||||
#endif | |||||
#ifdef __mips64 | #ifdef __mips64 | ||||
ARCH_MIPS64 | ARCH_MIPS64 | ||||
#endif | #endif | ||||
#if defined(__mips32) || defined(__mips) | #if defined(__mips32) || defined(__mips) | ||||
ARCH_MIPS32 | |||||
ARCH_MIPS | |||||
#endif | #endif | ||||
#ifdef __alpha | #ifdef __alpha | ||||
@@ -1,4 +1,4 @@ | |||||
include_directories(${CMAKE_SOURCE_DIR}) | |||||
include_directories(${PROJECT_SOURCE_DIR}) | |||||
enable_language(Fortran) | enable_language(Fortran) | ||||
@@ -42,6 +42,7 @@ ztestl3o_3m = c_zblas3_3m.o c_z3chke_3m.o auxiliary.o c_xerbla.o constant.o | |||||
all :: all1 all2 all3 | all :: all1 all2 all3 | ||||
all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | all1: xscblat1 xdcblat1 xccblat1 xzcblat1 | ||||
ifndef CROSS | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
OMP_NUM_THREADS=2 ./xscblat1 | OMP_NUM_THREADS=2 ./xscblat1 | ||||
OMP_NUM_THREADS=2 ./xdcblat1 | OMP_NUM_THREADS=2 ./xdcblat1 | ||||
@@ -53,8 +54,10 @@ else | |||||
OPENBLAS_NUM_THREADS=2 ./xccblat1 | OPENBLAS_NUM_THREADS=2 ./xccblat1 | ||||
OPENBLAS_NUM_THREADS=2 ./xzcblat1 | OPENBLAS_NUM_THREADS=2 ./xzcblat1 | ||||
endif | endif | ||||
endif | |||||
all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | all2: xscblat2 xdcblat2 xccblat2 xzcblat2 | ||||
ifndef CROSS | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
OMP_NUM_THREADS=2 ./xscblat2 < sin2 | OMP_NUM_THREADS=2 ./xscblat2 < sin2 | ||||
OMP_NUM_THREADS=2 ./xdcblat2 < din2 | OMP_NUM_THREADS=2 ./xdcblat2 < din2 | ||||
@@ -66,8 +69,10 @@ else | |||||
OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 | OPENBLAS_NUM_THREADS=2 ./xccblat2 < cin2 | ||||
OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 | OPENBLAS_NUM_THREADS=2 ./xzcblat2 < zin2 | ||||
endif | endif | ||||
endif | |||||
all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | all3: xscblat3 xdcblat3 xccblat3 xzcblat3 | ||||
ifndef CROSS | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
OMP_NUM_THREADS=2 ./xscblat3 < sin3 | OMP_NUM_THREADS=2 ./xscblat3 < sin3 | ||||
OMP_NUM_THREADS=2 ./xdcblat3 < din3 | OMP_NUM_THREADS=2 ./xdcblat3 < din3 | ||||
@@ -88,6 +93,7 @@ else | |||||
OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | OPENBLAS_NUM_THREADS=2 ./xccblat3_3m < cin3_3m | ||||
OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | OPENBLAS_NUM_THREADS=2 ./xzcblat3_3m < zin3_3m | ||||
endif | endif | ||||
endif | |||||
@@ -1365,8 +1365,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1365,8 +1365,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL CPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1335,8 +1335,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL DPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1339,8 +1339,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL SPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1350,7 +1350,7 @@ | |||||
* | * | ||||
* Call the subroutine. | * Call the subroutine. | ||||
* | * | ||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN | |||||
IF( SNAME( 10: 11 ).EQ.'mv' )THEN | |||||
IF( FULL )THEN | IF( FULL )THEN | ||||
IF( TRACE ) | IF( TRACE ) | ||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME, | $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | ||||
@@ -1376,7 +1376,7 @@ | |||||
CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, | CALL CZTPMV( IORDER, UPLO, TRANS, DIAG, | ||||
$ N, AA, XX, INCX ) | $ N, AA, XX, INCX ) | ||||
END IF | END IF | ||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN | |||||
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN | |||||
IF( FULL )THEN | IF( FULL )THEN | ||||
IF( TRACE ) | IF( TRACE ) | ||||
$ WRITE( NTRA, FMT = 9993 )NC, SNAME, | $ WRITE( NTRA, FMT = 9993 )NC, SNAME, | ||||
@@ -1465,7 +1465,7 @@ | |||||
END IF | END IF | ||||
* | * | ||||
IF( .NOT.NULL )THEN | IF( .NOT.NULL )THEN | ||||
IF( SNAME( 4: 5 ).EQ.'mv' )THEN | |||||
IF( SNAME( 10: 11 ).EQ.'mv' )THEN | |||||
* | * | ||||
* Check the result. | * Check the result. | ||||
* | * | ||||
@@ -1473,7 +1473,7 @@ | |||||
$ INCX, ZERO, Z, INCX, XT, G, | $ INCX, ZERO, Z, INCX, XT, G, | ||||
$ XX, EPS, ERR, FATAL, NOUT, | $ XX, EPS, ERR, FATAL, NOUT, | ||||
$ .TRUE. ) | $ .TRUE. ) | ||||
ELSE IF( SNAME( 4: 5 ).EQ.'sv' )THEN | |||||
ELSE IF( SNAME( 10: 11 ).EQ.'sv' )THEN | |||||
* | * | ||||
* Compute approximation to original vector. | * Compute approximation to original vector. | ||||
* | * | ||||
@@ -1611,7 +1611,7 @@ | |||||
* .. Common blocks .. | * .. Common blocks .. | ||||
COMMON /INFOC/INFOT, NOUTC, OK | COMMON /INFOC/INFOT, NOUTC, OK | ||||
* .. Executable Statements .. | * .. Executable Statements .. | ||||
CONJ = SNAME( 5: 5 ).EQ.'c' | |||||
CONJ = SNAME( 11: 11 ).EQ.'c' | |||||
* Define the number of arguments. | * Define the number of arguments. | ||||
NARGS = 9 | NARGS = 9 | ||||
* | * | ||||
@@ -1366,8 +1366,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1366,8 +1366,9 @@ | |||||
* | * | ||||
150 CONTINUE | 150 CONTINUE | ||||
WRITE( NOUT, FMT = 9996 )SNAME | WRITE( NOUT, FMT = 9996 )SNAME | ||||
CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
IF( TRACE ) | |||||
$ CALL ZPRCN3( NTRA, NC, SNAME, IORDER, SIDE, UPLO, TRANSA, DIAG, | |||||
$ M, N, ALPHA, LDA, LDB) | |||||
* | * | ||||
160 CONTINUE | 160 CONTINUE | ||||
RETURN | RETURN | ||||
@@ -1,7 +1,7 @@ | |||||
'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'CBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'DBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'DBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'SBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'SBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT2.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | 2 LOGICAL FLAG, T TO TEST ROW-MAJOR (IF FALSE COLUMN-MAJOR IS TESTED) | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,7 +1,7 @@ | |||||
'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | 'ZBLAT3.SNAP' NAME OF SNAPSHOT OUTPUT FILE | ||||
-1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | -1 UNIT NUMBER OF SNAPSHOT FILE (NOT USED IF .LT. 0) | ||||
F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | F LOGICAL FLAG, T TO REWIND SNAPSHOT FILE AFTER EACH RECORD. | ||||
F LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO STOP ON FAILURES. | |||||
T LOGICAL FLAG, T TO TEST ERROR EXITS. | T LOGICAL FLAG, T TO TEST ERROR EXITS. | ||||
2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | 2 0 TO TEST COLUMN-MAJOR, 1 TO TEST ROW-MAJOR, 2 TO TEST BOTH | ||||
16.0 THRESHOLD VALUE OF TEST RATIO | 16.0 THRESHOLD VALUE OF TEST RATIO | ||||
@@ -1,5 +1,5 @@ | |||||
include_directories(${CMAKE_SOURCE_DIR}) | |||||
include_directories(${PROJECT_SOURCE_DIR}) | |||||
# sources that need to be compiled twice, once with no flags and once with LOWER | # sources that need to be compiled twice, once with no flags and once with LOWER | ||||
set(UL_SOURCES | set(UL_SOURCES | ||||
@@ -119,7 +119,7 @@ static int trmv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, F | |||||
#endif | #endif | ||||
x = buffer; | x = buffer; | ||||
buffer += ((COMPSIZE * args -> m + 1023) & ~1023); | |||||
buffer += ((COMPSIZE * args -> m + 3) & ~3); | |||||
} | } | ||||
#ifndef TRANS | #ifndef TRANS | ||||
@@ -403,7 +403,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu | |||||
if (num_cpu) { | if (num_cpu) { | ||||
queue[0].sa = NULL; | queue[0].sa = NULL; | ||||
queue[0].sb = buffer + num_cpu * (((m + 255) & ~255) + 16) * COMPSIZE; | |||||
queue[0].sb = buffer + num_cpu * (((m + 3) & ~3) + 16) * COMPSIZE; | |||||
queue[num_cpu - 1].next = NULL; | queue[num_cpu - 1].next = NULL; | ||||
@@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
if (incb != 1) { | if (incb != 1) { | ||||
B = buffer; | B = buffer; | ||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15); | |||||
COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
} | } | ||||
@@ -56,7 +56,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *b, BLASLONG incb, FLOAT *bu | |||||
if (incb != 1) { | if (incb != 1) { | ||||
B = buffer; | B = buffer; | ||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 4095) & ~4095); | |||||
gemvbuffer = (FLOAT *)(((BLASLONG)buffer + m * sizeof(FLOAT) * 2 + 15) & ~15); | |||||
COPY_K(m, b, incb, buffer, 1); | COPY_K(m, b, incb, buffer, 1); | ||||
} | } | ||||
@@ -1,4 +1,4 @@ | |||||
include_directories(${CMAKE_SOURCE_DIR}) | |||||
include_directories(${PROJECT_SOURCE_DIR}) | |||||
# N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | # N.B. In the original makefile there was a BLOCKS define used in the compilation of these files but I don't see any evidence of it being set anywhere. -hpa | ||||
@@ -48,8 +48,7 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
# TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination | # TRANS needs to be set/unset when CONJ is set/unset, so can't use it as a combination | ||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) | GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK" 3 "herk_N" false ${float_type}) | ||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) | GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;TRANS;CONJ" 3 "herk_C" false ${float_type}) | ||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) | |||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) | |||||
# Need to set CONJ for trmm and trsm | # Need to set CONJ for trmm and trsm | ||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) | GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "CONJ" 0 "trmm_LR" false ${float_type}) | ||||
GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) | GenerateCombinationObjects("trmm_L.c" "UPPER;UNIT" "L;N" "TRANSA;CONJ" 0 "trmm_LC" false ${float_type}) | ||||
@@ -72,6 +71,10 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | GenerateNamedObjects("zher2k_k.c" "HER2K;LOWER;TRANS;CONJ" "her2k_LC" false "" "" false ${float_type}) | ||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | ||||
#herk | |||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3" 3 "herk_thread_N" false ${float_type}) | |||||
GenerateCombinationObjects("zherk_k.c" "LOWER" "U" "HERK;THREADED_LEVEL3;TRANS;CONJ" 3 "herk_thread_C" false ${float_type}) | |||||
#hemm | #hemm | ||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NN;THREADED_LEVEL3" 0 "hemm_thread_L" false ${float_type}) | ||||
GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | GenerateCombinationObjects("zhemm_k.c" "LOWER" "U" "NC;RSIDE;THREADED_LEVEL3" 0 "hemm_thread_R" false ${float_type}) | ||||
@@ -96,6 +99,17 @@ foreach (float_type ${FLOAT_TYPES}) | |||||
endif() | endif() | ||||
endif () | endif () | ||||
endforeach () | endforeach () | ||||
# for gemm3m | |||||
if(USE_GEMM3M) | |||||
foreach (GEMM_DEFINE ${GEMM_DEFINES}) | |||||
string(TOLOWER ${GEMM_DEFINE} GEMM_DEFINE_LC) | |||||
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE}" "gemm3m_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) | |||||
if (SMP AND NOT USE_SIMPLE_THREADED_LEVEL3) | |||||
GenerateNamedObjects("gemm3m.c" "${GEMM_DEFINE};THREADED_LEVEL3" "gemm3m_thread_${GEMM_DEFINE_LC}" false "" "" false ${float_type}) | |||||
endif () | |||||
endforeach () | |||||
endif() | |||||
endif () | endif () | ||||
endforeach () | endforeach () | ||||
@@ -316,7 +316,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
if (min_l > GEMM3M_Q) { | if (min_l > GEMM3M_Q) { | ||||
min_l = (min_l + 1) / 2; | min_l = (min_l + 1) / 2; | ||||
#ifdef UNROLL_X | #ifdef UNROLL_X | ||||
min_l = (min_l + UNROLL_X - 1) & ~(UNROLL_X - 1); | |||||
min_l = ((min_l + UNROLL_X - 1)/UNROLL_X) * UNROLL_X; | |||||
#endif | #endif | ||||
} | } | ||||
} | } | ||||
@@ -326,7 +326,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
} | } | ||||
@@ -365,7 +365,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -386,7 +386,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
} | } | ||||
@@ -429,7 +429,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -451,7 +451,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
} | } | ||||
@@ -494,7 +494,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -297,9 +297,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_l = GEMM_Q; | min_l = GEMM_Q; | ||||
} else { | } else { | ||||
if (min_l > GEMM_Q) { | if (min_l > GEMM_Q) { | ||||
min_l = (min_l / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||||
min_l = ((min_l / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
} | } | ||||
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1)); | |||||
gemm_p = ((l2size / min_l + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M; | while (gemm_p * min_l > l2size) gemm_p -= GEMM_UNROLL_M; | ||||
} | } | ||||
@@ -311,7 +311,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
} else { | } else { | ||||
l1stride = 0; | l1stride = 0; | ||||
} | } | ||||
@@ -335,7 +335,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
else | else | ||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
else | |||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
@@ -367,7 +369,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -365,7 +365,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
buffer[0] = sb; | buffer[0] = sb; | ||||
for (i = 1; i < DIVIDE_RATE; i++) { | for (i = 1; i < DIVIDE_RATE; i++) { | ||||
buffer[i] = buffer[i - 1] + GEMM3M_Q * ((div_n + GEMM3M_UNROLL_N - 1) & ~(GEMM3M_UNROLL_N - 1)); | |||||
buffer[i] = buffer[i - 1] + GEMM3M_Q * (((div_n + GEMM3M_UNROLL_N - 1)/GEMM3M_UNROLL_N) * GEMM3M_UNROLL_N); | |||||
} | } | ||||
for(ls = 0; ls < k; ls += min_l){ | for(ls = 0; ls < k; ls += min_l){ | ||||
@@ -384,7 +384,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = (min_i / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
} | } | ||||
@@ -482,7 +482,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -618,7 +618,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -754,7 +754,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM3M_P; | min_i = GEMM3M_P; | ||||
} else | } else | ||||
if (min_i > GEMM3M_P) { | if (min_i > GEMM3M_P) { | ||||
min_i = ((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1) & ~(GEMM3M_UNROLL_M - 1); | |||||
min_i = (((min_i + 1) / 2 + GEMM3M_UNROLL_M - 1)/GEMM3M_UNROLL_M) * GEMM3M_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -189,7 +189,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
#ifndef LOWER | #ifndef LOWER | ||||
@@ -230,7 +230,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa); | ICOPY_OPERATION(min_l, min_i, a, lda, ls, is, sa); | ||||
@@ -245,7 +245,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
if (m_start >= js) { | if (m_start >= js) { | ||||
@@ -284,7 +284,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa); | ICOPY_OPERATION(min_l, min_i, b, ldb, ls, is, sa); | ||||
@@ -322,7 +322,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
aa = sb + min_l * (is - js) * COMPSIZE; | aa = sb + min_l * (is - js) * COMPSIZE; | ||||
@@ -353,7 +353,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
aa = sb + min_l * (m_start - js) * COMPSIZE; | aa = sb + min_l * (m_start - js) * COMPSIZE; | ||||
@@ -383,7 +383,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
aa = sb + min_l * (is - js) * COMPSIZE; | aa = sb + min_l * (is - js) * COMPSIZE; | ||||
@@ -198,7 +198,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
#ifndef LOWER | #ifndef LOWER | ||||
@@ -239,7 +239,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
aa = sb + min_l * (is - js) * COMPSIZE; | aa = sb + min_l * (is - js) * COMPSIZE; | ||||
@@ -303,7 +303,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -375,7 +375,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
if (is < js + min_j) { | if (is < js + min_j) { | ||||
@@ -460,7 +460,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -210,8 +210,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to); | fprintf(stderr, "Thread[%ld] m_from : %ld m_to : %ld n_from : %ld n_to : %ld\n", mypos, m_from, m_to, n_from, n_to); | ||||
#endif | #endif | ||||
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE | |||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
buffer[0] = sb; | buffer[0] = sb; | ||||
for (i = 1; i < DIVIDE_RATE; i++) { | for (i = 1; i < DIVIDE_RATE; i++) { | ||||
@@ -233,7 +232,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
} | } | ||||
@@ -253,8 +252,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
STOP_RPCC(copy_A); | STOP_RPCC(copy_A); | ||||
div_n = ((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE | |||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
div_n = (((m_to - m_from + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) { | for (xxx = m_from, bufferside = 0; xxx < m_to; xxx += div_n, bufferside ++) { | ||||
@@ -353,9 +351,8 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
while (current >= 0) { | while (current >= 0) { | ||||
#endif | #endif | ||||
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE | |||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -412,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -425,8 +422,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
do { | do { | ||||
div_n = ((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE | |||||
+ GEMM_UNROLL_MN - 1) & ~(GEMM_UNROLL_MN - 1); | |||||
div_n = (((range_n[current + 1] - range_n[current] + DIVIDE_RATE - 1) / DIVIDE_RATE + GEMM_UNROLL_MN - 1)/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | for (xxx = range_n[current], bufferside = 0; xxx < range_n[current + 1]; xxx += div_n, bufferside ++) { | ||||
@@ -602,9 +598,9 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
double di = (double)i; | double di = (double)i; | ||||
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); | |||||
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) ); | |||||
if (num_cpu == 0) width = n - ((n - width) & ~mask); | |||||
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) ); | |||||
if ((width > n - i) || (width < mask)) width = n - i; | if ((width > n - i) || (width < mask)) width = n - i; | ||||
@@ -644,7 +640,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
double di = (double)i; | double di = (double)i; | ||||
width = (((BLASLONG)(sqrt(di * di + dnum) - di) + mask) & ~mask); | |||||
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); | |||||
if ((width > n - i) || (width < mask)) width = n - i; | if ((width > n - i) || (width < mask)) width = n - i; | ||||
@@ -310,7 +310,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
buffer[0] = sb; | buffer[0] = sb; | ||||
for (i = 1; i < DIVIDE_RATE; i++) { | for (i = 1; i < DIVIDE_RATE; i++) { | ||||
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1)) * COMPSIZE; | |||||
buffer[i] = buffer[i - 1] + GEMM_Q * ((div_n + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N * COMPSIZE; | |||||
} | } | ||||
@@ -331,7 +331,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else { | } else { | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = (min_i / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||||
min_i = ((min_i / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
} else { | } else { | ||||
if (args -> nthreads == 1) l1stride = 0; | if (args -> nthreads == 1) l1stride = 0; | ||||
} | } | ||||
@@ -367,7 +367,9 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
else | else | ||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
else | |||||
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | |||||
START_RPCC(); | START_RPCC(); | ||||
@@ -441,7 +443,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
min_i = GEMM_P; | min_i = GEMM_P; | ||||
} else | } else | ||||
if (min_i > GEMM_P) { | if (min_i > GEMM_P) { | ||||
min_i = ((min_i + 1) / 2 + GEMM_UNROLL_M - 1) & ~(GEMM_UNROLL_M - 1); | |||||
min_i = (((min_i + 1) / 2 + GEMM_UNROLL_M - 1)/GEMM_UNROLL_M) * GEMM_UNROLL_M; | |||||
} | } | ||||
START_RPCC(); | START_RPCC(); | ||||
@@ -158,7 +158,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, | |||||
int mm, nn; | int mm, nn; | ||||
mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||||
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
nn = MIN(GEMM_UNROLL_MN, n - loop); | nn = MIN(GEMM_UNROLL_MN, n - loop); | ||||
#ifndef LOWER | #ifndef LOWER | ||||
@@ -149,7 +149,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, FLOAT alpha_i, | |||||
int mm, nn; | int mm, nn; | ||||
mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||||
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
nn = MIN(GEMM_UNROLL_MN, n - loop); | nn = MIN(GEMM_UNROLL_MN, n - loop); | ||||
#ifndef LOWER | #ifndef LOWER | ||||
@@ -132,7 +132,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG k, FLOAT alpha_r, | |||||
int mm, nn; | int mm, nn; | ||||
mm = (loop & ~(GEMM_UNROLL_MN - 1)); | |||||
mm = (loop/GEMM_UNROLL_MN) * GEMM_UNROLL_MN; | |||||
nn = MIN(GEMM_UNROLL_MN, n - loop); | nn = MIN(GEMM_UNROLL_MN, n - loop); | ||||
#ifndef LOWER | #ifndef LOWER | ||||