Browse Source

Merge pull request #3033 from xianyi/develop

Update branch from develop to release 0.3.13
tags/v0.3.13
Martin Kroeker GitHub 5 years ago
parent
commit
d2b11c4777
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
100 changed files with 1291 additions and 4289 deletions
  1. +20
    -16
      .travis.yml
  2. +1
    -1
      CMakeLists.txt
  3. +4
    -1
      CONTRIBUTORS.md
  4. +50
    -0
      Changelog.txt
  5. +4
    -0
      Makefile
  6. +5
    -0
      Makefile.arm
  7. +8
    -8
      Makefile.install
  8. +4
    -0
      Makefile.prebuild
  9. +4
    -0
      Makefile.riscv64
  10. +1
    -1
      Makefile.rule
  11. +12
    -4
      Makefile.sparc
  12. +66
    -23
      Makefile.system
  13. +9
    -3
      Makefile.x86
  14. +19
    -10
      Makefile.x86_64
  15. +7
    -0
      README.md
  16. +5
    -0
      TargetList.txt
  17. +56
    -114
      benchmark/amax.c
  18. +56
    -111
      benchmark/amin.c
  19. +55
    -125
      benchmark/asum.c
  20. +4
    -82
      benchmark/axpby.c
  21. +4
    -77
      benchmark/axpy.c
  22. +104
    -0
      benchmark/bench.h
  23. +4
    -46
      benchmark/cholesky.c
  24. +4
    -82
      benchmark/copy.c
  25. +4
    -81
      benchmark/dot.c
  26. +4
    -76
      benchmark/geev.c
  27. +4
    -76
      benchmark/gemm.c
  28. +4
    -80
      benchmark/gemm3m.c
  29. +7
    -79
      benchmark/gemv.c
  30. +5
    -81
      benchmark/ger.c
  31. +4
    -79
      benchmark/gesv.c
  32. +4
    -75
      benchmark/getri.c
  33. +4
    -80
      benchmark/hbmv.c
  34. +4
    -77
      benchmark/hemm.c
  35. +4
    -78
      benchmark/hemv.c
  36. +4
    -81
      benchmark/her.c
  37. +4
    -81
      benchmark/her2.c
  38. +4
    -77
      benchmark/her2k.c
  39. +4
    -79
      benchmark/herk.c
  40. +4
    -78
      benchmark/hpmv.c
  41. +4
    -76
      benchmark/iamax.c
  42. +4
    -76
      benchmark/iamin.c
  43. +4
    -76
      benchmark/imax.c
  44. +4
    -76
      benchmark/imin.c
  45. +7
    -78
      benchmark/linpack.c
  46. +4
    -76
      benchmark/max.c
  47. +4
    -76
      benchmark/min.c
  48. +4
    -76
      benchmark/nrm2.c
  49. +10
    -46
      benchmark/potrf.c
  50. +4
    -75
      benchmark/rot.c
  51. +5
    -77
      benchmark/rotm.c
  52. +4
    -76
      benchmark/scal.c
  53. +4
    -77
      benchmark/spmv.c
  54. +4
    -78
      benchmark/spr.c
  55. +4
    -76
      benchmark/spr2.c
  56. +4
    -75
      benchmark/swap.c
  57. +4
    -76
      benchmark/symm.c
  58. +4
    -76
      benchmark/symv.c
  59. +4
    -76
      benchmark/syr.c
  60. +4
    -77
      benchmark/syr2.c
  61. +4
    -75
      benchmark/syr2k.c
  62. +4
    -76
      benchmark/syrk.c
  63. +4
    -44
      benchmark/tpmv.c
  64. +4
    -44
      benchmark/tpsv.c
  65. +4
    -75
      benchmark/trmm.c
  66. +4
    -44
      benchmark/trmv.c
  67. +4
    -75
      benchmark/trsm.c
  68. +4
    -83
      benchmark/trsv.c
  69. +5
    -78
      benchmark/zdot-intel.c
  70. +4
    -77
      benchmark/zdot.c
  71. +20
    -2
      c_check
  72. +1
    -0
      cblas.h
  73. +5
    -2
      cmake/cc.cmake
  74. +2
    -2
      cmake/kernel.cmake
  75. +8
    -0
      cmake/os.cmake
  76. +15
    -0
      cmake/prebuild.cmake
  77. +58
    -42
      cmake/system.cmake
  78. +5
    -0
      common.h
  79. +0
    -6
      common_arm64.h
  80. +2
    -0
      common_interface.h
  81. +4
    -0
      common_level2.h
  82. +0
    -8
      common_linux.h
  83. +6
    -4
      common_macro.h
  84. +2
    -7
      common_mips64.h
  85. +2
    -2
      common_param.h
  86. +4
    -0
      common_power.h
  87. +98
    -0
      common_riscv64.h
  88. +4
    -0
      common_sb.h
  89. +6
    -0
      common_sparc.h
  90. +29
    -62
      cpuid_mips64.c
  91. +113
    -0
      cpuid_riscv64.c
  92. +8
    -8
      cpuid_x86.c
  93. +5
    -0
      ctest.c
  94. +3
    -3
      ctest/Makefile
  95. +15
    -1
      driver/level2/Makefile
  96. +149
    -0
      driver/level2/sbgemv_thread.c
  97. +6
    -6
      driver/level3/Makefile
  98. +3
    -1
      driver/level3/level3.c
  99. +2
    -0
      driver/level3/level3_thread.c
  100. +11
    -5
      driver/others/Makefile

+ 20
- 16
.travis.yml View File

@@ -211,44 +211,48 @@ matrix:


- &test-macos - &test-macos
os: osx os: osx
osx_image: xcode10.1
osx_image: xcode11.5
before_script: before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
- brew install gcc@8 # for gfortran
script: script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env: env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8"
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9"


- <<: *test-macos - <<: *test-macos
osx_image: xcode12 osx_image: xcode12
before_script: before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update - brew update
- brew install gcc@10 # for gfortran
- brew install gcc@10
script: script:
- travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE
env: env:
- BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10"
- <<: *test-macos
osx_image: xcode10.0
env:
- BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"
# - <<: *test-macos
# osx_image: xcode10
# env:
# - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1"


- <<: *test-macos - <<: *test-macos
osx_image: xcode10.1
osx_image: xcode11.5
before_script:
- COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32"
- brew update
env: env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0"
- CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0"
- BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1" - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1"

- <<: *test-macos - <<: *test-macos
osx_image: xcode10.1
osx_image: xcode11.5
env: env:
- CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
# - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
# - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1"
- CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang"
- CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1"
- BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1"


- &test-graviton2 - &test-graviton2


+ 1
- 1
CMakeLists.txt View File

@@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5)
project(OpenBLAS C ASM) project(OpenBLAS C ASM)
set(OpenBLAS_MAJOR_VERSION 0) set(OpenBLAS_MAJOR_VERSION 0)
set(OpenBLAS_MINOR_VERSION 3) set(OpenBLAS_MINOR_VERSION 3)
set(OpenBLAS_PATCH_VERSION 12)
set(OpenBLAS_PATCH_VERSION 13)
set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}")


# Adhere to GNU filesystem layout conventions # Adhere to GNU filesystem layout conventions


+ 4
- 1
CONTRIBUTORS.md View File

@@ -190,4 +190,7 @@ In chronological order:
* [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support


* Danfeng Zhang <https://github.com/craft-zhang> * Danfeng Zhang <https://github.com/craft-zhang>
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53
* [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53

* PingTouGe Semiconductor Co., Ltd.
* [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910

+ 50
- 0
Changelog.txt View File

@@ -1,4 +1,54 @@
OpenBLAS ChangeLog OpenBLAS ChangeLog
====================================================================
Version 0.3.13
12-Dec-2020
common:
* Added a generic bfloat16 SBGEMV kernel
* Fixed a potentially severe memory leak after fork in OpenMP builds
that was introduced in 0.3.12
* Added detection of the Fujitsu Fortran compiler
* Added detection of the (e)gfortran compiler on OpenBSD
* Added support for overriding the default name of the library independently
from symbol suffixing in the gmake builds (already supported in cmake)

RISCV:
* Added a RISC V port optimized for C910V

POWER:
* Added optimized POWER10 kernels for SAXPY, CAXPY, SDOT, DDOT and DGEMV_N
* Improved DGEMM performance on POWER10
* Improved STRSM and DTRSM performance on POWER9 and POWER10
* Fixed segmemtation faults in DYNAMIC_ARCH builds
* Fixed compilation with the PGI compiler

x86:
* Fixed compilation of kernels that require SSE2 intrinsics since 0.3.12
x86_64:
* Added an optimized bfloat16 SBGEMV kernel for SkylakeX and Cooperlake
* Improved the performance of SASUM and DASUM kernels through parallelization
* Improved the performance of SROT and DROT kernels
* Improved the performance of multithreaded xSYRK
* Fixed OpenMP builds that use the LLVM Clang compiler together with GNU gfortran
(where linking of both the LLVM libomp and GNU libgomp could lead to lockups or
wrong results)
* Fixed miscompilations by old gcc 4.6
* Fixed misdetection of AVX2 capability in some Sandybridge cpus
* Fixed lockups in builds combining DYNAMIC_ARCH with TARGET=GENERIC on OpenBSD

ARM64:
* Fixed segmemtation faults in DYNAMIC_ARCH builds

MIPS:
* Improved kernels for Loongson 3R3 ("3A") and 3R4 ("3B") models, including MSA
* Fixed bugs in the MSA kernels for CGEMM, CTRMM, CGEMV and ZGEMV
* Added handling of zero increments in the MSA kernels for SSWAP and DSWAP
* Added DYNAMIC_ARCH support for MIPS64 (currently Loongson3R3/3R4 only)

SPARC:
* Fixed building 32 and 64 bit SPARC kernels with the SolarisStudio compilers

==================================================================== ====================================================================
Version 0.3.12 Version 0.3.12
24-Oct-2020 24-Oct-2020


+ 4
- 0
Makefile View File

@@ -268,7 +268,11 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN)))
-@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1)
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
-@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc


+ 5
- 0
Makefile.arm View File

@@ -12,3 +12,8 @@ ifeq ($(CORE), ARMV6)
CCOMMON_OPT += -mfpu=vfp CCOMMON_OPT += -mfpu=vfp
FCOMMON_OPT += -mfpu=vfp FCOMMON_OPT += -mfpu=vfp
endif endif

ifdef HAVE_NEON
CCOMMON_OPT += -mfpu=neon
FCOMMON_OPT += -mfpu=neon
endif

+ 8
- 8
Makefile.install View File

@@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include
OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib
OPENBLAS_BINARY_DIR := $(PREFIX)/bin OPENBLAS_BINARY_DIR := $(PREFIX)/bin
OPENBLAS_BUILD_DIR := $(CURDIR) OPENBLAS_BUILD_DIR := $(CURDIR)
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas
OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE)
OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake
OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake
OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig
@@ -150,13 +150,13 @@ endif
endif endif


#Generating openblas.pc #Generating openblas.pc
@echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc"
@echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)"
@echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"
@cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc"




#Generating OpenBLASConfig.cmake #Generating OpenBLASConfig.cmake


+ 4
- 0
Makefile.prebuild View File

@@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500)
TARGET_FLAGS = -mips64r6 TARGET_FLAGS = -mips64r6
endif endif


ifeq ($(TARGET), C910V)
TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v
endif

all: getarch_2nd all: getarch_2nd
./getarch_2nd 0 >> $(TARGET_MAKE) ./getarch_2nd 0 >> $(TARGET_MAKE)
./getarch_2nd 1 >> $(TARGET_CONF) ./getarch_2nd 1 >> $(TARGET_CONF)


+ 4
- 0
Makefile.riscv64 View File

@@ -0,0 +1,4 @@
ifeq ($(CORE), C910V)
CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v
FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static
endif

+ 1
- 1
Makefile.rule View File

@@ -3,7 +3,7 @@
# #


# This library's version # This library's version
VERSION = 0.3.12
VERSION = 0.3.13


# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library


+ 12
- 4
Makefile.sparc View File

@@ -3,21 +3,29 @@ RANLIB = ranlib


ifdef BINARY64 ifdef BINARY64


ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 -m64 CCOMMON_OPT += -mcpu=v9 -m64
else
CCOMMON_OPT += -m64
endif
ifeq ($(COMPILER_F77), g77) ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 -m64 FCOMMON_OPT += -mcpu=v9 -m64
endif endif
ifeq ($(COMPILER_F77), f90)
FCOMMON_OPT += -xarch=v9
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -m64
endif endif
else else


ifeq ($(C_COMPILER), GCC)
CCOMMON_OPT += -mcpu=v9 CCOMMON_OPT += -mcpu=v9
else
CCOMMON_OPT += -xarch=v9
endif


ifeq ($(COMPILER_F77), g77) ifeq ($(COMPILER_F77), g77)
FCOMMON_OPT += -mcpu=v9 FCOMMON_OPT += -mcpu=v9
endif endif
ifeq ($(COMPILER_F77), f90)
ifeq ($(COMPILER_F77), f95)
FCOMMON_OPT += -xarch=v8plusb FCOMMON_OPT += -xarch=v8plusb
endif endif


@@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \
else else
LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \
-Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath
endif
endif

+ 66
- 23
Makefile.system View File

@@ -6,7 +6,7 @@
INCLUDED = 1 INCLUDED = 1


ifndef TOPDIR ifndef TOPDIR
TOPDIR = .
TOPDIR = .
endif endif


# If ARCH is not set, we use the host system's architecture for getarch compile options. # If ARCH is not set, we use the host system's architecture for getarch compile options.
@@ -93,6 +93,12 @@ endif
ifdef TARGET ifdef TARGET
GETARCH_FLAGS := -DFORCE_$(TARGET) GETARCH_FLAGS := -DFORCE_$(TARGET)
GETARCH_FLAGS += -DUSER_TARGET GETARCH_FLAGS += -DUSER_TARGET
ifeq ($(TARGET), GENERIC)
ifeq ($(DYNAMIC_ARCH), 1)
override NO_EXPRECISION=1
export NO_EXPRECiSION
endif
endif
endif endif


# Force fallbacks for 32bit # Force fallbacks for 32bit
@@ -246,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)"
ifndef TARGET_CORE ifndef TARGET_CORE
include $(TOPDIR)/Makefile.conf include $(TOPDIR)/Makefile.conf
else else
HAVE_NEON=
HAVE_VFP=
HAVE_VFPV3=
HAVE_VFPV4=
HAVE_MMX=
HAVE_SSE=
HAVE_SSE2=
HAVE_SSE3=
HAVE_SSSE3=
HAVE_SSE4_1=
HAVE_SSE4_2=
HAVE_SSE4A=
HAVE_SSE5=
HAVE_AVX=
HAVE_AVX2=
HAVE_FMA3=
include $(TOPDIR)/Makefile_kernel.conf include $(TOPDIR)/Makefile_kernel.conf
endif endif


@@ -319,6 +341,7 @@ ifeq ($(GCCVERSIONGTEQ7),1)
else else
GCCDUMPVERSION_PARAM := -dumpversion GCCDUMPVERSION_PARAM := -dumpversion
endif endif
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1)
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2) GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7) GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7)
endif endif
@@ -602,6 +625,10 @@ DYNAMIC_CORE += EMAG8180
DYNAMIC_CORE += THUNDERX3T110 DYNAMIC_CORE += THUNDERX3T110
endif endif


ifeq ($(ARCH), mips64)
DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4
endif

ifeq ($(ARCH), zarch) ifeq ($(ARCH), zarch)
DYNAMIC_CORE = ZARCH_GENERIC DYNAMIC_CORE = ZARCH_GENERIC


@@ -649,7 +676,7 @@ DYNAMIC_CORE += POWER9
else else
$(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.)
endif endif
LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35)
LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35)
ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11)
DYNAMIC_CORE += POWER10 DYNAMIC_CORE += POWER10
CCOMMON_OPT += -DHAVE_P10_SUPPORT CCOMMON_OPT += -DHAVE_P10_SUPPORT
@@ -728,7 +755,10 @@ endif
endif endif
endif endif



ifeq ($(ARCH), riscv64)
NO_BINARY_MODE = 1
BINARY_DEFINED = 1
endif




# #
@@ -761,14 +791,9 @@ CCOMMON_OPT += -mabi=32
BINARY_DEFINED = 1 BINARY_DEFINED = 1
endif endif


ifeq ($(CORE), LOONGSON3A)
CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
endif

ifeq ($(CORE), LOONGSON3B)
CCOMMON_OPT += -march=mips64
FCOMMON_OPT += -march=mips64
ifeq ($(CORE), $(filter $(CORE),LOONGSON3R3 LOONGSON3R4))
CCOMMON_OPT += -march=loongson3a
FCOMMON_OPT += -march=loongson3a
endif endif


ifeq ($(CORE), MIPS24K) ifeq ($(CORE), MIPS24K)
@@ -810,7 +835,9 @@ endif
ifndef BINARY_DEFINED ifndef BINARY_DEFINED
ifneq ($(OSNAME), AIX) ifneq ($(OSNAME), AIX)
ifdef BINARY64 ifdef BINARY64
ifneq ($(ARCH), riscv64)
CCOMMON_OPT += -m64 CCOMMON_OPT += -m64
endif
else else
CCOMMON_OPT += -m32 CCOMMON_OPT += -m32
endif endif
@@ -855,7 +882,7 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG
FCOMMON_OPT += -Mrecursive -Kieee FCOMMON_OPT += -Mrecursive -Kieee
ifeq ($(OSNAME), Linux) ifeq ($(OSNAME), Linux)
ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`)
FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`)
ifeq ($(FLANG_VENDOR),AOCC) ifeq ($(FLANG_VENDOR),AOCC)
FCOMMON_OPT += -fno-unroll-loops FCOMMON_OPT += -fno-unroll-loops
endif endif
@@ -931,8 +958,10 @@ endif
else else
ifdef BINARY64 ifdef BINARY64
ifneq ($(OSNAME), AIX) ifneq ($(OSNAME), AIX)
ifneq ($(ARCH), riscv64)
FCOMMON_OPT += -m64 FCOMMON_OPT += -m64
endif endif
endif
ifdef INTERFACE64 ifdef INTERFACE64
ifneq ($(INTERFACE64), 0) ifneq ($(INTERFACE64), 0)
FCOMMON_OPT += -fdefault-integer-8 FCOMMON_OPT += -fdefault-integer-8
@@ -1048,11 +1077,11 @@ FCOMMON_OPT += -n32
else else
FCOMMON_OPT += -n64 FCOMMON_OPT += -n64
endif endif
ifeq ($(CORE), LOONGSON3A)
ifeq ($(CORE), LOONGSON3R3)
FCOMMON_OPT += -loongson3 -static FCOMMON_OPT += -loongson3 -static
endif endif


ifeq ($(CORE), LOONGSON3B)
ifeq ($(CORE), LOONGSON3R4)
FCOMMON_OPT += -loongson3 -static FCOMMON_OPT += -loongson3 -static
endif endif


@@ -1078,11 +1107,11 @@ CCOMMON_OPT += -n32
else else
CCOMMON_OPT += -n64 CCOMMON_OPT += -n64
endif endif
ifeq ($(CORE), LOONGSON3A)
ifeq ($(CORE), LOONGSON3R3)
CCOMMON_OPT += -loongson3 -static CCOMMON_OPT += -loongson3 -static
endif endif


ifeq ($(CORE), LOONGSON3B)
ifeq ($(CORE), LOONGSON3R4)
CCOMMON_OPT += -loongson3 -static CCOMMON_OPT += -loongson3 -static
endif endif


@@ -1101,16 +1130,25 @@ CCOMMON_OPT += -w
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
CCOMMON_OPT += -m32 CCOMMON_OPT += -m32
else else
FCOMMON_OPT += -m64
ifdef BINARY64
CCOMMON_OPT += -m64
else
CCOMMON_OPT += -m32
endif
endif endif
endif endif


ifeq ($(F_COMPILER), SUN) ifeq ($(F_COMPILER), SUN)
CCOMMON_OPT += -DF_INTERFACE_SUN CCOMMON_OPT += -DF_INTERFACE_SUN
FCOMMON_OPT += -ftrap=%none -xrecursive
ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
FCOMMON_OPT += -m32 FCOMMON_OPT += -m32
else else
ifdef BINARY64
FCOMMON_OPT += -m64 FCOMMON_OPT += -m64
else
FCOMMON_OPT += -m32
endif
endif endif
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
FCOMMON_OPT += -xopenmp=parallel FCOMMON_OPT += -xopenmp=parallel
@@ -1184,10 +1222,8 @@ ifdef SMP
CCOMMON_OPT += -DSMP_SERVER CCOMMON_OPT += -DSMP_SERVER


ifeq ($(ARCH), mips64) ifeq ($(ARCH), mips64)
ifneq ($(CORE), LOONGSON3B)
USE_SIMPLE_THREADED_LEVEL3 = 1 USE_SIMPLE_THREADED_LEVEL3 = 1
endif endif
endif


ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
# USE_SIMPLE_THREADED_LEVEL3 = 1 # USE_SIMPLE_THREADED_LEVEL3 = 1
@@ -1262,10 +1298,14 @@ ifndef SYMBOLSUFFIX
SYMBOLSUFFIX = SYMBOLSUFFIX =
endif endif


ifndef LIBSONAMEBASE
LIBSONAMEBASE = openblas
endif

ifndef LIBNAMESUFFIX ifndef LIBNAMESUFFIX
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)
else else
LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX)
endif endif


ifeq ($(OSNAME), CYGWIN_NT) ifeq ($(OSNAME), CYGWIN_NT)
@@ -1279,8 +1319,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH)
include $(TOPDIR)/Makefile.$(ARCH) include $(TOPDIR)/Makefile.$(ARCH)


ifneq ($(C_COMPILER), PGI) ifneq ($(C_COMPILER), PGI)
ifneq ($(C_COMPILER), SUN)
CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME
endif endif
endif
CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\"


ifeq ($(CORE), PPC440) ifeq ($(CORE), PPC440)
@@ -1297,11 +1339,9 @@ endif


ifneq ($(ARCH), x86_64) ifneq ($(ARCH), x86_64)
ifneq ($(ARCH), x86) ifneq ($(ARCH), x86)
ifneq ($(CORE), LOONGSON3B)
NO_AFFINITY = 1 NO_AFFINITY = 1
endif endif
endif endif
endif


ifdef NO_AFFINITY ifdef NO_AFFINITY
ifeq ($(NO_AFFINITY), 0) ifeq ($(NO_AFFINITY), 0)
@@ -1515,6 +1555,8 @@ export HAVE_SSE4_2
export HAVE_SSE4A export HAVE_SSE4A
export HAVE_SSE5 export HAVE_SSE5
export HAVE_AVX export HAVE_AVX
export HAVE_AVX2
export HAVE_FMA3
export HAVE_VFP export HAVE_VFP
export HAVE_VFPV3 export HAVE_VFPV3
export HAVE_VFPV4 export HAVE_VFPV4
@@ -1525,6 +1567,7 @@ export KERNELDIR
export FUNCTION_PROFILE export FUNCTION_PROFILE
export TARGET_CORE export TARGET_CORE
export NO_AVX512 export NO_AVX512
export NO_AVX2
export BUILD_BFLOAT16 export BUILD_BFLOAT16


export SBGEMM_UNROLL_M export SBGEMM_UNROLL_M


+ 9
- 3
Makefile.x86 View File

@@ -1,5 +1,10 @@
# COMPILER_PREFIX = mingw32- # COMPILER_PREFIX = mingw32-


ifdef HAVE_SSE
CCOMMON_OPT += -msse
FCOMMON_OPT += -msse
endif



ifeq ($(OSNAME), Interix) ifeq ($(OSNAME), Interix)
ARFLAGS = -m x86 ARFLAGS = -m x86
@@ -54,9 +59,11 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm
else else
LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm
endif endif

ifdef HAVE_SSE2
CCOMMON_OPT += -msse2
FCOMMON_OPT += -msse2
endif
ifdef HAVE_SSE3 ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3 CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3
ifdef HAVE_SSSE3 ifdef HAVE_SSSE3
@@ -68,5 +75,4 @@ CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1 FCOMMON_OPT += -msse4.1
endif endif
endif endif
endif



+ 19
- 10
Makefile.x86_64 View File

@@ -9,9 +9,9 @@ endif
endif endif


ifdef HAVE_SSE3 ifdef HAVE_SSE3
ifndef DYNAMIC_ARCH
CCOMMON_OPT += -msse3 CCOMMON_OPT += -msse3
FCOMMON_OPT += -msse3 FCOMMON_OPT += -msse3
endif
ifdef HAVE_SSSE3 ifdef HAVE_SSSE3
CCOMMON_OPT += -mssse3 CCOMMON_OPT += -mssse3
FCOMMON_OPT += -mssse3 FCOMMON_OPT += -mssse3
@@ -20,6 +20,22 @@ ifdef HAVE_SSE4_1
CCOMMON_OPT += -msse4.1 CCOMMON_OPT += -msse4.1
FCOMMON_OPT += -msse4.1 FCOMMON_OPT += -msse4.1
endif endif
ifndef OLDGCC
ifdef HAVE_AVX
CCOMMON_OPT += -mavx
FCOMMON_OPT += -mavx
endif
endif
ifndef NO_AVX2
ifdef HAVE_AVX2
CCOMMON_OPT += -mavx2
FCOMMON_OPT += -mavx2
endif
endif
ifndef OLDGCC
ifdef HAVE_FMA3
CCOMMON_OPT += -mfma
FCOMMON_OPT += -mfma
endif endif
endif endif


@@ -47,8 +63,6 @@ ifndef DYNAMIC_ARCH
ifndef NO_AVX512 ifndef NO_AVX512
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# cooperlake support was added in 10.1 # cooperlake support was added in 10.1
GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10)
GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1)
ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11)
CCOMMON_OPT += -march=cooperlake CCOMMON_OPT += -march=cooperlake
FCOMMON_OPT += -march=cooperlake FCOMMON_OPT += -march=cooperlake
@@ -68,15 +82,11 @@ endif
endif endif
endif endif


ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE))
ifndef DYNAMIC_ARCH
ifdef HAVE_AVX2
ifndef NO_AVX2 ifndef NO_AVX2
ifeq ($(C_COMPILER), GCC) ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0 # AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
CCOMMON_OPT += -mavx2 CCOMMON_OPT += -mavx2
endif endif
@@ -101,7 +111,6 @@ endif
endif endif
endif endif
endif endif
endif








+ 7
- 0
README.md View File

@@ -172,6 +172,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th
- **Z13**: Optimized Level-3 BLAS and Level-1,2 - **Z13**: Optimized Level-3 BLAS and Level-1,2
- **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 - **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2


#### RISC-V

- **C910V**: Optimized Leve-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1.
```sh
make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran
```

### Support for multiple targets in a single library ### Support for multiple targets in a single library


OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake.


+ 5
- 0
TargetList.txt View File

@@ -104,3 +104,8 @@ VORTEX
ZARCH_GENERIC ZARCH_GENERIC
Z13 Z13
Z14 Z14

10.RISC-V 64:
RISCV64_GENERIC
C910V


+ 56
- 114
benchmark/amax.c View File

@@ -25,125 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef AMAX #undef AMAX
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(dzamax)
#define AMAX BLASFUNC(dzamax)
#else #else
#define AMAX BLASFUNC(scamax)
#define AMAX BLASFUNC(scamax)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMAX BLASFUNC(damax)
#define AMAX BLASFUNC(damax)
#else #else
#define AMAX BLASFUNC(samax)
#define AMAX BLASFUNC(samax)
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
int main(int argc, char *argv[])
{
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1;
blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1;
int to = 200;
int step = 1;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
double time1, timeg;
argc--;argv++;
argc--;
argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
} }
#ifdef __linux #ifdef __linux
@@ -152,37 +100,31 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
for (m = from; m <= to; m += step)
{ {
timeg=0;
fprintf(stderr, " %6d : ", (int)m);
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
AMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
timeg += time1;
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
AMAX(&m, x, &inc_x);
end();
timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;


+ 56
- 111
benchmark/amin.c View File

@@ -25,124 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef AMIN #undef AMIN
#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(dzamin)
#define AMIN BLASFUNC(dzamin)
#else #else
#define AMIN BLASFUNC(scamin)
#define AMIN BLASFUNC(scamin)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define AMIN BLASFUNC(damin)
#define AMIN BLASFUNC(damin)
#else #else
#define AMIN BLASFUNC(samin)
#endif
#endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#define AMIN BLASFUNC(samin)
#endif #endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif #endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){
int main(int argc, char *argv[])
{
FLOAT *x; FLOAT *x;
blasint m, i; blasint m, i;
blasint inc_x=1;
blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;
int from = 1;
int to = 200;
int step = 1;
int from = 1;
int to = 200;
int step = 1;
struct timeval start, stop;
double time1,timeg;
double time1, timeg;
argc--;argv++;
argc--;
argv++;
if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}
if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);
if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
} }
#ifdef __linux #ifdef __linux
@@ -151,39 +100,35 @@ int main(int argc, char *argv[]){
fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");
for(m = from; m <= to; m += step)
for (m = from; m <= to; m += step)
{ {
timeg=0;
timeg = 0;
fprintf(stderr, " %6d : ", (int)m);
fprintf(stderr, " %6d : ", (int)m);
for (l = 0; l < loops; l++)
{
for (l=0; l<loops; l++)
{
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
AMIN (&m, x, &inc_x);
for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
gettimeofday( &stop, (struct timezone *)0);
begin();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
AMIN(&m, x, &inc_x);
timeg += time1;
end();
timeg += getsec();
} }
timeg /= loops; timeg /= loops;
fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
" %10.2f MFlops %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
} }
return 0; return 0;


+ 55
- 125
benchmark/asum.c View File

@@ -25,178 +25,108 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef ASUM #undef ASUM


#ifdef COMPLEX #ifdef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
#define ASUM BLASFUNC(dzasum)
#define ASUM BLASFUNC(dzasum)
#else #else
#define ASUM BLASFUNC(scasum)
#define ASUM BLASFUNC(scasum)
#endif #endif
#else #else
#ifdef DOUBLE #ifdef DOUBLE
#define ASUM BLASFUNC(dasum)
#define ASUM BLASFUNC(dasum)
#else #else
#define ASUM BLASFUNC(sasum)
#define ASUM BLASFUNC(sasum)
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){
int main(int argc, char *argv[])
{


FLOAT *x; FLOAT *x;
FLOAT result; FLOAT result;
blasint m, i; blasint m, i;
blasint inc_x=1;
blasint inc_x = 1;
int loops = 1; int loops = 1;
int l; int l;
char *p; char *p;


int from = 1;
int to = 200;
int step = 1;

#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
double time1,timeg;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
int from = 1;
int to = 200;
int step = 1;
double time1, timeg; double time1, timeg;
#endif


argc--;argv++;
argc--;
argv++;


if (argc > 0) { from = atol(*argv); argc--; argv++;}
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;}
if (argc > 0) { step = atol(*argv); argc--; argv++;}
if (argc > 0)
{
from = atol(*argv);
argc--;
argv++;
}
if (argc > 0)
{
to = MAX(atol(*argv), from);
argc--;
argv++;
}
if (argc > 0)
{
step = atol(*argv);
argc--;
argv++;
}


if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p);
if ((p = getenv("OPENBLAS_LOOPS")))
loops = atoi(p);
if ((p = getenv("OPENBLAS_INCX")))
inc_x = atoi(p);


fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops);
fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops);


if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){
fprintf(stderr,"Out of Memory!!\n");exit(1);
if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL)
{
fprintf(stderr, "Out of Memory!!\n");
exit(1);
} }



#ifdef __linux #ifdef __linux
srandom(getpid()); srandom(getpid());
#endif #endif


fprintf(stderr, " SIZE Flops\n"); fprintf(stderr, " SIZE Flops\n");


for(m = from; m <= to; m += step)
for (m = from; m <= to; m += step)
{ {


timeg=0;

fprintf(stderr, " %6d : ", (int)m);
timeg = 0;


for (l=0; l<loops; l++)
{
fprintf(stderr, " %6d : ", (int)m);


for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
result = ASUM (&m, x, &inc_x);
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
clock_gettime(CLOCK_REALTIME, &stop);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#else
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
#endif

timeg += time1;
for (l = 0; l < loops; l++)
{


for (i = 0; i < m * COMPSIZE * abs(inc_x); i++)
{
x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5;
}
begin();
result = ASUM(&m, x, &inc_x);
end();
timeg += getsec();
} }
if (loops >1)
timeg /= loops;
if (loops > 1)
timeg /= loops;


#ifdef COMPLEX #ifdef COMPLEX
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg);
#else #else
fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg);
#endif #endif

} }


return 0; return 0;


+ 4
- 82
benchmark/axpby.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef AXPBY #undef AXPBY


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -176,16 +104,10 @@ int main(int argc, char *argv[]){


for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
gettimeofday( &start, (struct timezone *)0);

begin();
AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y ); AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y );

gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;

end();
timeg += getsec();
} }


timeg /= loops; timeg /= loops;


+ 4
- 77
benchmark/axpy.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef AXPY #undef AXPY


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -127,8 +56,6 @@ int main(int argc, char *argv[]){
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;

struct timespec start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -175,13 +102,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
clock_gettime( CLOCK_REALTIME, &start);
begin();


AXPY (&m, alpha, x, &inc_x, y, &inc_y ); AXPY (&m, alpha, x, &inc_x, y, &inc_y );


clock_gettime( CLOCK_REALTIME, &stop);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
time1 = getsec();


timeg += time1; timeg += time1;




+ 104
- 0
benchmark/bench.h View File

@@ -0,0 +1,104 @@
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}


#define malloc huge_malloc

#endif

#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
#else
struct timespec start = { 0, 0 }, stop = { 0, 0 };
#endif

double getsec()
{
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
#else
return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9;
#endif
}

void begin() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &start, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &start);
#endif
}

void end() {
#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
gettimeofday( &stop, (struct timezone *)0);
#else
clock_gettime(CLOCK_REALTIME, &stop);
#endif
}

+ 4
- 46
benchmark/cholesky.c View File

@@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


double fabs(double); double fabs(double);


@@ -71,41 +66,6 @@ double fabs(double);
#endif #endif
#endif #endif




#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif


static __inline double getmflops(int ratio, int m, double secs){ static __inline double getmflops(int ratio, int m, double secs){


double mm = (double)m; double mm = (double)m;
@@ -145,7 +105,6 @@ int main(int argc, char *argv[]){


FLOAT maxerr; FLOAT maxerr;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -220,20 +179,19 @@ int main(int argc, char *argv[]){


SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);


gettimeofday( &start, (struct timezone *)0);
begin();


POTRF(uplo[uplos], &m, b, &m, &info); POTRF(uplo[uplos], &m, b, &m, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info != 0) { if (info != 0) {
fprintf(stderr, "Info = %d\n", info); fprintf(stderr, "Info = %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


maxerr = 0.;


if (!(uplos & 1)) { if (!(uplos & 1)) {
for (j = 0; j < m; j++) { for (j = 0; j < m; j++) {


+ 4
- 82
benchmark/copy.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef COPY #undef COPY


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -128,11 +57,9 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1 = 0.0, timeg = 0.0; double time1 = 0.0, timeg = 0.0;
long nanos = 0; long nanos = 0;
time_t seconds = 0; time_t seconds = 0;
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };


argc--;argv++; argc--;argv++;


@@ -176,15 +103,10 @@ int main(int argc, char *argv[]){


for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
clock_gettime(CLOCK_REALTIME, &time_start);
begin();
COPY (&m, x, &inc_x, y, &inc_y ); COPY (&m, x, &inc_x, y, &inc_y );
clock_gettime(CLOCK_REALTIME, &time_end);

nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;

time1 = seconds + nanos / 1.e9;
timeg += time1;
end();
timeg += getsec();
} }


timeg /= loops; timeg /= loops;


+ 4
- 81
benchmark/dot.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef DOT #undef DOT



#ifdef DOUBLE #ifdef DOUBLE
#define DOT BLASFUNC(ddot) #define DOT BLASFUNC(ddot)
#else #else
#define DOT BLASFUNC(sdot) #define DOT BLASFUNC(sdot)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -122,7 +49,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -169,15 +95,12 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


result = DOT (&m, x, &inc_x, y, &inc_y ); result = DOT (&m, x, &inc_x, y, &inc_y );


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;
end();
timeg += getsec();


} }




+ 4
- 76
benchmark/geev.c View File

@@ -36,13 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef GEEV #undef GEEV


@@ -74,71 +68,6 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a,
FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info ); FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info );
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork;
@@ -154,7 +83,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -223,7 +151,7 @@ int main(int argc, char *argv[]){
for(m = from; m <= to; m += step){ for(m = from; m <= to; m += step){


fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
gettimeofday( &start, (struct timezone *)0);
begin();


lwork = -1; lwork = -1;
#ifndef COMPLEX #ifndef COMPLEX
@@ -239,14 +167,14 @@ int main(int argc, char *argv[]){
GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info);
#endif #endif


gettimeofday( &stop, (struct timezone *)0);
end();


if (info) { if (info) {
fprintf(stderr, "failed to compute eigenvalues .. %d\n", info); fprintf(stderr, "failed to compute eigenvalues .. %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n", " %10.2f MFlops : %10.2f Sec : %d\n",


+ 4
- 76
benchmark/gemm.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef GEMM #undef GEMM


@@ -55,71 +49,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


IFLOAT *a, *b; IFLOAT *a, *b;
@@ -139,7 +68,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1, timeg; double time1, timeg;


argc--;argv++; argc--;argv++;
@@ -228,14 +156,14 @@ int main(int argc, char *argv[]){
ldc = m; ldc = m;


fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k);
gettimeofday( &start, (struct timezone *)0);
begin();


for (j=0; j<loops; j++) { for (j=0; j<loops; j++) {
GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc);
} }


gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();
time1 = getsec();


timeg = time1/loops; timeg = time1/loops;
fprintf(stderr, fprintf(stderr,


+ 4
- 80
benchmark/gemm3m.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef GEMM #undef GEMM


@@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@@ -133,7 +62,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -187,16 +115,12 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;

end();
timeg += getsec();
} }


timeg /= loops; timeg /= loops;


+ 7
- 79
benchmark/gemv.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef GEMV #undef GEMV
@@ -52,72 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif


#endif #endif

#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -211,10 +139,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();
time1 = getsec();
timeg += time1; timeg += time1;


} }
@@ -248,10 +176,10 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();
time1 = getsec();
timeg += time1; timeg += time1;


} }


+ 5
- 81
benchmark/ger.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef GER #undef GER


@@ -49,72 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -131,7 +59,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -198,16 +125,13 @@ int main(int argc, char *argv[]){
for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {


gettimeofday( &start, (struct timezone *)0);
begin();


GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m); GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m);


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;

end();
timeg += getsec();
} }


timeg /= loops; timeg /= loops;


+ 4
- 79
benchmark/gesv.c View File

@@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


double fabs(double); double fabs(double);


@@ -66,71 +61,6 @@ double fabs(double);
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b; FLOAT *a, *b;
@@ -142,7 +72,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -194,22 +123,18 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


GESV (&m, &m, a, &m, ipiv, b, &m, &info); GESV (&m, &m, a, &m, ipiv, b, &m, &info);


gettimeofday( &stop, (struct timezone *)0);


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

end();


time1 = getsec();


fprintf(stderr, fprintf(stderr,
"%10.2f MFlops %10.6f s\n", "%10.2f MFlops %10.6f s\n",
COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1);



} }


return 0; return 0;


+ 4
- 75
benchmark/getri.c View File

@@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


#undef GETRF #undef GETRF
#undef GETRI #undef GETRI
@@ -72,71 +67,6 @@


extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info);


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a,*work; FLOAT *a,*work;
@@ -148,7 +78,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -205,21 +134,21 @@ int main(int argc, char *argv[]){
exit(1); exit(1);
} }


gettimeofday( &start, (struct timezone *)0);
begin();


lwork = -1; lwork = -1;
GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info);


lwork = (blasint)wkopt[0]; lwork = (blasint)wkopt[0];
GETRI(&m, a, &m, ipiv, work, &lwork, &info); GETRI(&m, a, &m, ipiv, work, &lwork, &info);
gettimeofday( &stop, (struct timezone *)0);
end();


if (info) { if (info) {
fprintf(stderr, "failed compute inverse matrix .. %d\n", info); fprintf(stderr, "failed compute inverse matrix .. %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops : %10.2f Sec : %d\n", " %10.2f MFlops : %10.2f Sec : %d\n",


+ 4
- 80
benchmark/hbmv.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef HBMV #undef HBMV
#ifdef DOUBLE #ifdef DOUBLE
#define HBMV BLASFUNC(zhbmv) #define HBMV BLASFUNC(zhbmv)
#else #else
#define HBMV BLASFUNC(chbmv) #define HBMV BLASFUNC(chbmv)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -125,7 +52,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -186,15 +112,13 @@ int main(int argc, char *argv[]){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();
timeg += time1;
timeg += getsec();
} }


+ 4
- 77
benchmark/hemm.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HEMM #undef HEMM


@@ -41,72 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HEMM BLASFUNC(chemm) #define HEMM BLASFUNC(chemm)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@@ -126,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 78
benchmark/hemv.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HEMV #undef HEMV



#ifdef DOUBLE #ifdef DOUBLE
#define HEMV BLASFUNC(zhemv) #define HEMV BLASFUNC(zhemv)
#else #else
#define HEMV BLASFUNC(chemv) #define HEMV BLASFUNC(chemv)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -182,13 +108,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 81
benchmark/her.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HER #undef HER



#ifdef DOUBLE #ifdef DOUBLE
#define HER BLASFUNC(zher) #define HER BLASFUNC(zher)
#else #else
#define HER BLASFUNC(cher) #define HER BLASFUNC(cher)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x; FLOAT *a, *x;
@@ -126,8 +53,6 @@ int main(int argc, char *argv[]){
int from = 1; int from = 1;
int to = 200; int to = 200;
int step = 1; int step = 1;

struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -166,15 +91,13 @@ int main(int argc, char *argv[]){
x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);
begin();


HER (&uplo, &m, alpha, x, &incx, a, &m ); HER (&uplo, &m, alpha, x, &incx, a, &m );


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();


gettimeofday( &start, (struct timezone *)0);
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 81
benchmark/her2.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HER2 #undef HER2



#ifdef DOUBLE #ifdef DOUBLE
#define HER2 BLASFUNC(zher2) #define HER2 BLASFUNC(zher2)
#else #else
#define HER2 BLASFUNC(cher2) #define HER2 BLASFUNC(cher2)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -169,16 +95,13 @@ int main(int argc, char *argv[]){
y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);

begin();


HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m ); HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m );


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
end();


gettimeofday( &start, (struct timezone *)0);
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 77
benchmark/her2k.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HER2K #undef HER2K
#ifdef DOUBLE #ifdef DOUBLE
@@ -40,72 +34,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define HER2K BLASFUNC(cher2k) #define HER2K BLASFUNC(cher2k)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -169,13 +96,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 79
benchmark/herk.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef HERK #undef HERK



#ifdef DOUBLE #ifdef DOUBLE
#define HERK BLASFUNC(zherk) #define HERK BLASFUNC(zherk)
#else #else
#define HERK BLASFUNC(cherk) #define HERK BLASFUNC(cherk)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *c; FLOAT *a, *c;
@@ -127,7 +54,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -167,18 +93,17 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6);

} }


return 0; return 0;


+ 4
- 78
benchmark/hpmv.c View File

@@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef HPMV #undef HPMV
#ifdef DOUBLE #ifdef DOUBLE
#define HPMV BLASFUNC(zhpmv) #define HPMV BLASFUNC(zhpmv)
#else #else
#define HPMV BLASFUNC(chpmv) #define HPMV BLASFUNC(chpmv)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz) {
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size) {
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -124,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -183,13 +109,13 @@ int main(int argc, char *argv[]){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 76
benchmark/iamax.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef IAMAX #undef IAMAX


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x; FLOAT *x;
@@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);
begin();


IAMAX (&m, x, &inc_x); IAMAX (&m, x, &inc_x);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 76
benchmark/iamin.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef IAMIN #undef IAMIN
@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
IAMIN (&m, x, &inc_x); IAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 76
benchmark/imax.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef IMAX #undef IMAX
@@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
IMAX (&m, x, &inc_x); IMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 76
benchmark/imin.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef IMIN #undef IMIN
@@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
IMIN (&m, x, &inc_x); IMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 7
- 78
benchmark/linpack.c View File

@@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


double fabs(double); double fabs(double);


@@ -72,71 +67,6 @@ double fabs(double);
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b; FLOAT *a, *b;
@@ -151,7 +81,6 @@ int main(int argc, char *argv[]){


FLOAT maxerr; FLOAT maxerr;


struct timeval start, stop;
double time1, time2; double time1, time2;


argc--;argv++; argc--;argv++;
@@ -198,31 +127,31 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


GETRF (&m, &m, a, &m, ipiv, &info); GETRF (&m, &m, a, &m, ipiv, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info) { if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info); fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


gettimeofday( &start, (struct timezone *)0);
begin();


GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info) { if (info) {
fprintf(stderr, "Matrix is not singular .. %d\n", info); fprintf(stderr, "Matrix is not singular .. %d\n", info);
exit(1); exit(1);
} }


time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time2 = getsec();


maxerr = 0.; maxerr = 0.;




+ 4
- 76
benchmark/max.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef NAMAX #undef NAMAX
@@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
NAMAX (&m, x, &inc_x); NAMAX (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 76
benchmark/min.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef NAMIN #undef NAMIN
@@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *x; FLOAT *x;
@@ -121,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -160,13 +88,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
NAMIN (&m, x, &inc_x); NAMIN (&m, x, &inc_x);
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 76
benchmark/nrm2.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef NRM2 #undef NRM2


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x; FLOAT *x;
@@ -127,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -166,13 +94,13 @@ int main(int argc, char *argv[]){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);
begin();


NRM2 (&m, x, &inc_x); NRM2 (&m, x, &inc_x);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 10
- 46
benchmark/potrf.c View File

@@ -36,12 +36,7 @@
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


double fabs(double); double fabs(double);


@@ -86,37 +81,7 @@ double fabs(double);
// extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info); // extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info);
// extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info); // extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info);


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);


tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif


int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


@@ -141,7 +106,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -217,18 +181,18 @@ int main(int argc, char *argv[]){


SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m);


gettimeofday( &start, (struct timezone *)0);
begin();


POTRF(uplo[uplos], &m, b, &m, &info); POTRF(uplo[uplos], &m, b, &m, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info != 0) { if (info != 0) {
fprintf(stderr, "Potrf info = %d\n", info); fprintf(stderr, "Potrf info = %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6;


if ( btest == 'S' ) if ( btest == 'S' )
@@ -240,17 +204,17 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info != 0) { if (info != 0) {
fprintf(stderr, "Potrs info = %d\n", info); fprintf(stderr, "Potrs info = %d\n", info);
exit(1); exit(1);
} }
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6;


} }
@@ -258,18 +222,18 @@ int main(int argc, char *argv[]){
if ( btest == 'I' ) if ( btest == 'I' )
{ {
gettimeofday( &start, (struct timezone *)0);
begin();


POTRI(uplo[uplos], &m, b, &m, &info); POTRI(uplo[uplos], &m, b, &m, &info);


gettimeofday( &stop, (struct timezone *)0);
end();


if (info != 0) { if (info != 0) {
fprintf(stderr, "Potri info = %d\n", info); fprintf(stderr, "Potri info = %d\n", info);
exit(1); exit(1);
} }


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6;
} }


+ 4
- 75
benchmark/rot.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


#undef ROT #undef ROT


@@ -52,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -133,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -179,13 +108,13 @@ int main(int argc, char *argv[]){


for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
gettimeofday( &start, (struct timezone *)0);
begin();


ROT (&m, x, &inc_x, y, &inc_y, c, s); ROT (&m, x, &inc_x, y, &inc_y, c, s);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 5
- 77
benchmark/rotm.c View File

@@ -25,12 +25,7 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef ROTM #undef ROTM
@@ -40,72 +35,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ROTM BLASFUNC(srotm) #define ROTM BLASFUNC(srotm)
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz)
{
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv) {
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =
shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT | 0600)) < 0) {
printf("Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1) {
printf("Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
@@ -122,7 +51,7 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1, timeg; double time1, timeg;
argc--; argc--;
@@ -188,14 +117,13 @@ int main(int argc, char *argv[])
} }
for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
gettimeofday(&start, (struct timezone *)0);
begin();
ROTM(&m, x, &inc_x, y, &inc_y, param); ROTM(&m, x, &inc_x, y, &inc_y, param);
gettimeofday(&stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) +
(double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;
} }


+ 4
- 76
benchmark/scal.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SCAL #undef SCAL


@@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -128,7 +57,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -174,13 +102,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


SCAL (&m, alpha, x, &inc_x); SCAL (&m, alpha, x, &inc_x);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 77
benchmark/spmv.c View File

@@ -25,17 +25,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/
#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"
#undef SPMV #undef SPMV
#ifndef COMPLEX #ifndef COMPLEX
#ifdef DOUBLE #ifdef DOUBLE
@@ -54,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#if defined(__WIN32__) || defined(__WIN64__)
#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif
int gettimeofday(struct timeval *tv, void *tz){
FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;
if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);
tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;
/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}
return 0;
}
#endif
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0
static void *huge_malloc(BLASLONG size){
int shmid;
void *address;
#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif
if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}
address = shmat(shmid, NULL, SHM_RND);
if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}
shmctl(shmid, IPC_RMID, 0);
return address;
}
#define malloc huge_malloc
#endif
int main(int argc, char *argv[]){ int main(int argc, char *argv[]){
FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -135,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1,timeg; double time1,timeg;
argc--;argv++; argc--;argv++;
@@ -193,13 +120,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y );
gettimeofday( &stop, (struct timezone *)0);
end();
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;


+ 4
- 78
benchmark/spr.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SPR #undef SPR


@@ -41,73 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SPR BLASFUNC(sspr) #define SPR BLASFUNC(sspr)
#endif #endif




#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a,*c; FLOAT *a,*c;
@@ -129,7 +56,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -173,13 +99,13 @@ int main(int argc, char *argv[]){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SPR (&uplo, &m, alpha, c, &inc_x, a); SPR (&uplo, &m, alpha, c, &inc_x, a);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;
} }


+ 4
- 76
benchmark/spr2.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef SPR2 #undef SPR2
@@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif





#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a,*b,*c; FLOAT *a,*b,*c;
@@ -129,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -182,13 +110,13 @@ int main(int argc, char *argv[]){
c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a); SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();
timeg += time1; timeg += time1;
} }


+ 4
- 75
benchmark/swap.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef SWAP #undef SWAP
@@ -49,71 +44,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif
#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -128,7 +58,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -175,13 +104,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


SWAP (&m, x, &inc_x, y, &inc_y ); SWAP (&m, x, &inc_x, y, &inc_y );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 76
benchmark/symm.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SYMM #undef SYMM


@@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -181,13 +109,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 76
benchmark/symv.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SYMV #undef SYMV


@@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x, *y; FLOAT *a, *x, *y;
@@ -134,7 +63,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -192,13 +120,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 76
benchmark/syr.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef SYR #undef SYR
@@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif





#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x,*a; FLOAT *x,*a;
@@ -124,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;
struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -165,13 +93,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SYR (&uplo, &m, alpha, x, &inc_x, a, &m ); SYR (&uplo, &m, alpha, x, &inc_x, a, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 77
benchmark/syr2.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SYR2 #undef SYR2


@@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SYR2 BLASFUNC(ssyr2) #define SYR2 BLASFUNC(ssyr2)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y, *a; FLOAT *x, *y, *a;
@@ -125,7 +53,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -174,13 +101,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m ); SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 75
benchmark/syr2k.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef SYR2K #undef SYR2K
@@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b, *c; FLOAT *a, *b, *c;
@@ -137,7 +67,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -181,13 +110,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 76
benchmark/syrk.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef SYRK #undef SYRK


@@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *c; FLOAT *a, *c;
@@ -137,7 +66,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -177,13 +105,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops\n", " %10.2f MFlops\n",


+ 4
- 44
benchmark/tpmv.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


#undef TPMV #undef TPMV


@@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {


@@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;


argc--;argv++; argc--;argv++;
@@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }


for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
begin();
TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x); TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
time1 = getsec();
timeg += time1; timeg += time1;
} }




+ 4
- 44
benchmark/tpsv.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


#undef TPSV #undef TPSV


@@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {


@@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;


argc--;argv++; argc--;argv++;
@@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }


for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
begin();
TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x); TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
time1 = getsec();
timeg += time1; timeg += time1;
} }




+ 4
- 75
benchmark/trmm.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef TRMM #undef TRMM
@@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b; FLOAT *a, *b;
@@ -141,7 +71,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -180,13 +109,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


fprintf(stderr, fprintf(stderr,
" %10.2f MFlops %10.6f sec\n", " %10.2f MFlops %10.6f sec\n",


+ 4
- 44
benchmark/trmv.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"


#undef TRMV #undef TRMV


@@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size)
{
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1) {
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {


@@ -112,7 +73,6 @@ int main(int argc, char *argv[])
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timespec start = { 0, 0 }, stop = { 0, 0 };
double time1, timeg; double time1, timeg;


argc--;argv++; argc--;argv++;
@@ -153,11 +113,11 @@ int main(int argc, char *argv[])
} }


for (l = 0; l < loops; l++) { for (l = 0; l < loops; l++) {
clock_gettime(CLOCK_REALTIME, &start);
begin();
TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x); TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x);
clock_gettime(CLOCK_REALTIME, &stop);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9;
time1 = getsec();
timeg += time1; timeg += time1;
} }




+ 4
- 75
benchmark/trsm.c View File

@@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"
#include "bench.h"




#undef TRSM #undef TRSM
@@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *b; FLOAT *a, *b;
@@ -151,7 +81,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1; double time1;


argc--;argv++; argc--;argv++;
@@ -196,13 +125,13 @@ int main(int argc, char *argv[]){
} }
} }


gettimeofday( &start, (struct timezone *)0);
begin();


TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m);


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;
} }


+ 4
- 83
benchmark/trsv.c View File

@@ -25,14 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include <time.h>
#include "common.h"

#include "bench.h"


#undef GEMV #undef GEMV
#undef TRSV #undef TRSV
@@ -55,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#endif #endif


#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *a, *x; FLOAT *a, *x;
@@ -133,7 +61,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timespec time_start, time_end;
time_t seconds = 0; time_t seconds = 0;


double time1,timeg; double time1,timeg;
@@ -189,19 +116,13 @@ int main(int argc, char *argv[]){


for(l =0;l< loops;l++){ for(l =0;l< loops;l++){


clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start);

begin();
TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x); TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x);

clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end);
nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;

time1 = seconds + nanos /1.e9;
end();
time1 = getsec();
timeg += time1; timeg += time1;
} }



timeg /= loops; timeg /= loops;
long long muls = n*(n+1)/2.0; long long muls = n*(n+1)/2.0;
long long adds = (n - 1.0)*n/2.0; long long adds = (n - 1.0)*n/2.0;


+ 5
- 78
benchmark/zdot-intel.c View File

@@ -25,90 +25,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#define RETURN_BY_STACK 1
#include "common.h"
#include "bench.h"


#define RETURN_BY_STACK 1


#undef DOT #undef DOT



#ifdef DOUBLE #ifdef DOUBLE
#define DOT BLASFUNC(zdotu) #define DOT BLASFUNC(zdotu)
#else #else
#define DOT BLASFUNC(cdotu) #define DOT BLASFUNC(cdotu)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -123,7 +51,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -170,13 +97,13 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();


DOT (&result, &m, x, &inc_x, y, &inc_y ); DOT (&result, &m, x, &inc_x, y, &inc_y );


gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 4
- 77
benchmark/zdot.c View File

@@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


#include <stdio.h>
#include <stdlib.h>
#ifdef __CYGWIN32__
#include <sys/time.h>
#endif
#include "common.h"

#include "bench.h"


#undef DOT #undef DOT


@@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define DOT BLASFUNC(cdotu) #define DOT BLASFUNC(cdotu)
#endif #endif



#if defined(__WIN32__) || defined(__WIN64__)

#ifndef DELTA_EPOCH_IN_MICROSECS
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL
#endif

int gettimeofday(struct timeval *tv, void *tz){

FILETIME ft;
unsigned __int64 tmpres = 0;
static int tzflag;

if (NULL != tv)
{
GetSystemTimeAsFileTime(&ft);

tmpres |= ft.dwHighDateTime;
tmpres <<= 32;
tmpres |= ft.dwLowDateTime;

/*converting file time to unix epoch*/
tmpres /= 10; /*convert into microseconds*/
tmpres -= DELTA_EPOCH_IN_MICROSECS;
tv->tv_sec = (long)(tmpres / 1000000UL);
tv->tv_usec = (long)(tmpres % 1000000UL);
}

return 0;
}

#endif

#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0

static void *huge_malloc(BLASLONG size){
int shmid;
void *address;

#ifndef SHM_HUGETLB
#define SHM_HUGETLB 04000
#endif

if ((shmid =shmget(IPC_PRIVATE,
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1),
SHM_HUGETLB | IPC_CREAT |0600)) < 0) {
printf( "Memory allocation failed(shmget).\n");
exit(1);
}

address = shmat(shmid, NULL, SHM_RND);

if ((BLASLONG)address == -1){
printf( "Memory allocation failed(shmat).\n");
exit(1);
}

shmctl(shmid, IPC_RMID, 0);

return address;
}

#define malloc huge_malloc

#endif

int main(int argc, char *argv[]){ int main(int argc, char *argv[]){


FLOAT *x, *y; FLOAT *x, *y;
@@ -122,7 +50,6 @@ int main(int argc, char *argv[]){
int to = 200; int to = 200;
int step = 1; int step = 1;


struct timeval start, stop;
double time1,timeg; double time1,timeg;


argc--;argv++; argc--;argv++;
@@ -169,15 +96,15 @@ int main(int argc, char *argv[]){
for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
} }
gettimeofday( &start, (struct timezone *)0);
begin();
#ifdef RETURN_BY_STACK #ifdef RETURN_BY_STACK
DOT (&result , &m, x, &inc_x, y, &inc_y ); DOT (&result , &m, x, &inc_x, y, &inc_y );
#else #else
result = DOT (&m, x, &inc_x, y, &inc_y ); result = DOT (&m, x, &inc_x, y, &inc_y );
#endif #endif
gettimeofday( &stop, (struct timezone *)0);
end();


time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;
time1 = getsec();


timeg += time1; timeg += time1;




+ 20
- 2
c_check View File

@@ -6,7 +6,8 @@
# Checking cross compile # Checking cross compile
$hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos);
$hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch);
$hostarch = `uname -p` if ($hostos eq "AIX");
$hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS");
chop($hostarch);
$hostarch = "x86_64" if ($hostarch eq "amd64"); $hostarch = "x86_64" if ($hostarch eq "amd64");
$hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/);
$hostarch = "arm64" if ($hostarch eq "aarch64"); $hostarch = "arm64" if ($hostarch eq "aarch64");
@@ -92,6 +93,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/);
$architecture = arm if ($data =~ /ARCH_ARM/); $architecture = arm if ($data =~ /ARCH_ARM/);
$architecture = arm64 if ($data =~ /ARCH_ARM64/); $architecture = arm64 if ($data =~ /ARCH_ARM64/);
$architecture = zarch if ($data =~ /ARCH_ZARCH/); $architecture = zarch if ($data =~ /ARCH_ZARCH/);
$architecture = riscv64 if ($data =~ /ARCH_RISCV64/);


$defined = 0; $defined = 0;


@@ -136,6 +138,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) {
$binary =32; $binary =32;
} }


if ($architecture eq "riscv64") {
$defined = 1;
$binary = 64;
}

if ($compiler eq "PGI") { if ($compiler eq "PGI") {
$compiler_name .= " -tp p7" if ($binary eq "32"); $compiler_name .= " -tp p7" if ($binary eq "32");
$compiler_name .= " -tp p7-64" if ($binary eq "64"); $compiler_name .= " -tp p7-64" if ($binary eq "64");
@@ -192,7 +199,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) {
} else { } else {
$tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); $tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 );
$code = '"addvi.b $w0, $w1, 1"'; $code = '"addvi.b $w0, $w1, 1"';
$msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs";
$msa_flags = "-mmsa -mfp64 -mload-store-pairs";
print $tmpf "#include <msa.h>\n\n"; print $tmpf "#include <msa.h>\n\n";
print $tmpf "void main(void){ __asm__ volatile($code); }\n"; print $tmpf "void main(void){ __asm__ volatile($code); }\n";


@@ -270,6 +277,15 @@ if ($data =~ /HAVE_C11/) {
} }
} }


if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) {
$no_avx2 = 0;
$oldgcc = 0;
$data = `$compiler_name -dumpversion`;
if ($data <= 4.6) {
$no_avx2 = 1;
$oldgcc = 1;
}
}


$data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; $data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`;


@@ -362,6 +378,8 @@ print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n";
print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1;
print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1;
print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1;
print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1;
print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1;


$os =~ tr/[a-z]/[A-Z]/; $os =~ tr/[a-z]/[A-Z]/;
$architecture =~ tr/[a-z]/[A-Z]/; $architecture =~ tr/[a-z]/[A-Z]/;


+ 1
- 0
cblas.h View File

@@ -393,6 +393,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE
void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout);
/* dot production of BFLOAT16 input arrays, and output as float */ /* dot production of BFLOAT16 input arrays, and output as float */
float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy);
void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy);


#ifdef __cplusplus #ifdef __cplusplus
} }


+ 5
- 2
cmake/cc.cmake View File

@@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN")
endif () endif ()
endif () endif ()


if (${CORE} STREQUAL "SKYLAKEX")
if (${CORE} STREQUAL SKYLAKEX)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512) if (NOT NO_AVX512)
set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512")
@@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX")
endif () endif ()
endif () endif ()


if (${CORE} STREQUAL "COOPERLAKE")
if (${CORE} STREQUAL COOPERLAKE)
if (NOT DYNAMIC_ARCH) if (NOT DYNAMIC_ARCH)
if (NOT NO_AVX512) if (NOT NO_AVX512)
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
@@ -124,6 +124,9 @@ if (NOT DYNAMIC_ARCH)
if (HAVE_AVX) if (HAVE_AVX)
set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") set (CCOMMON_OPT "${CCOMMON_OPT} -mavx")
endif () endif ()
if (HAVE_FMA3)
set (CCOMMON_OPT "${CCOMMON_OPT} -mfma")
endif ()
if (HAVE_SSE) if (HAVE_SSE)
set (CCOMMON_OPT "${CCOMMON_OPT} -msse") set (CCOMMON_OPT "${CCOMMON_OPT} -msse")
endif () endif ()


+ 2
- 2
cmake/kernel.cmake View File

@@ -184,8 +184,8 @@ macro(SetDefaultL2)
set(XHEMV_V_KERNEL ../generic/zhemv_k.c) set(XHEMV_V_KERNEL ../generic/zhemv_k.c)
set(XHEMV_M_KERNEL ../generic/zhemv_k.c) set(XHEMV_M_KERNEL ../generic/zhemv_k.c)
if (BUILD_BFLOAT16) if (BUILD_BFLOAT16)
set(SBGEMVNKERNEL ../arm/gemv_n.c)
set(SBGEMVTKERNEL ../arm/gemv_t.c)
set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c)
set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c)
set(SHGERKERNEL ../generic/ger.c) set(SHGERKERNEL ../generic/ger.c)
endif () endif ()
endmacro () endmacro ()


+ 8
- 0
cmake/os.cmake View File

@@ -84,6 +84,14 @@ if (X86)
set(NO_EXPRECISION 1) set(NO_EXPRECISION 1)
endif () endif ()


if (DYNAMIC_ARCH)
if (TARGET)
if (${TARGET} STREQUAL "GENERIC")
set(NO_EXPRECISION 1)
endif ()
endif ()
endif ()

if (UTEST_CHECK) if (UTEST_CHECK)
set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK")
set(SANITY_CHECK 1) set(SANITY_CHECK 1)


+ 15
- 0
cmake/prebuild.cmake View File

@@ -556,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING)
MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}")
endif () endif ()
endif () endif ()
unset (HAVE_AVX2)
unset (HAVE_AVX)
unset (HAVE_FMA3)
unset (HAVE_MMX)
unset (HAVE_SSE)
unset (HAVE_SSE2)
unset (HAVE_SSE3)
unset (HAVE_SSSE3)
unset (HAVE_SSE4A)
unset (HAVE_SSE4_1)
unset (HAVE_SSE4_2)
unset (HAVE_NEON)
unset (HAVE_VFP)
unset (HAVE_VFPV3)
unset (HAVE_VFPV4)
message(STATUS "Running getarch") message(STATUS "Running getarch")


# use the cmake binary w/ the -E param to run a shell command in a cross-platform way # use the cmake binary w/ the -E param to run a shell command in a cross-platform way


+ 58
- 42
cmake/system.cmake View File

@@ -44,50 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32)
endif () endif ()
endif () endif ()


if (DEFINED TARGET)
if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()


if (DEFINED TARGET) if (DEFINED TARGET)
message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --")
message(STATUS "Targeting the ${TARGET} architecture.") message(STATUS "Targeting the ${TARGET} architecture.")
set(GETARCH_FLAGS "-DFORCE_${TARGET}") set(GETARCH_FLAGS "-DFORCE_${TARGET}")
endif () endif ()
@@ -187,6 +146,63 @@ else()
endif () endif ()


include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake")
if (DEFINED TARGET)
if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512)
# if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake")
else()
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
# elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
# set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
# endif()
endif()
if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512")
endif()
if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2)
if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU")
execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION)
if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG")
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_AVX)
if (NOT NO_AVX)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx")
endif()
endif()
if (DEFINED HAVE_AVX2)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2")
endif()
endif()
if (DEFINED HAVE_FMA3)
if (NOT NO_AVX2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma")
endif()
endif()
if (DEFINED HAVE_SSE)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse")
endif()
if (DEFINED HAVE_SSE2)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2")
endif()
if (DEFINED HAVE_SSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3")
endif()
if (DEFINED HAVE_SSSE3)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3")
endif()
if (DEFINED HAVE_SSE4_1)
set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1")
endif()
endif()
if (DEFINED BINARY) if (DEFINED BINARY)
message(STATUS "Compiling a ${BINARY}-bit binary.") message(STATUS "Compiling a ${BINARY}-bit binary.")
endif () endif ()


+ 5
- 0
common.h View File

@@ -437,6 +437,11 @@ please https://github.com/xianyi/OpenBLAS/issues/246
#include "common_mips.h" #include "common_mips.h"
#endif #endif


#ifdef ARCH_RISCV64
#include "common_riscv64.h"
#endif

#ifdef ARCH_MIPS64 #ifdef ARCH_MIPS64
#include "common_mips64.h" #include "common_mips64.h"
#endif #endif


+ 0
- 6
common_arm64.h View File

@@ -142,14 +142,8 @@ REALNAME:
#define HUGE_PAGESIZE ( 4 << 20) #define HUGE_PAGESIZE ( 4 << 20)


#ifndef BUFFERSIZE #ifndef BUFFERSIZE
#if defined(CORTEXA57)
#define BUFFER_SIZE (20 << 20)
#elif defined(TSV110) || defined(EMAG8180)
#define BUFFER_SIZE (32 << 20) #define BUFFER_SIZE (32 << 20)
#else #else
#define BUFFER_SIZE (16 << 20)
#endif
#else
#define BUFFER_SIZE (32 << BUFFERSIZE) #define BUFFER_SIZE (32 << BUFFERSIZE)
#endif #endif




+ 2
- 0
common_interface.h View File

@@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *,
xdouble *, blasint *, xdouble *, blasint *); xdouble *, blasint *, xdouble *, blasint *);


void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *,
bfloat16 *, blasint *, float *, float *, blasint *);
void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *,
float *, blasint *, float *, float *, blasint *); float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *, void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *,


+ 4
- 0
common_level2.h View File

@@ -44,6 +44,10 @@
extern "C" { extern "C" {
#endif #endif


int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int);
int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *);
int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *);


+ 0
- 8
common_linux.h View File

@@ -75,18 +75,10 @@ static inline int my_mbind(void *addr, unsigned long len, int mode,
// https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482
return 0; return 0;
#else #else
#if defined (LOONGSON3B)
#if defined (__64BIT__)
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
#else
return 0; //NULL Implementation on Loongson 3B 32bit.
#endif
#else
//Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34 //Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34
// unsigned long null_nodemask=0; // unsigned long null_nodemask=0;
return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags);
#endif #endif
#endif
} }


static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) {


+ 6
- 4
common_macro.h View File

@@ -646,10 +646,12 @@


#elif defined(BFLOAT16) #elif defined(BFLOAT16)


#define D_TO_BF16_K SBDTOBF16_K
#define D_BF16_TO_K DBF16TOD_K
#define S_TO_BF16_K SBSTOBF16_K
#define S_BF16_TO_K SBF16TOS_K
#define D_TO_BF16_K SBDTOBF16_K
#define D_BF16_TO_K DBF16TOD_K
#define S_TO_BF16_K SBSTOBF16_K
#define S_BF16_TO_K SBF16TOS_K
#define SBGEMV_N SBGEMV_N_K
#define SBGEMV_T SBGEMV_T_K


#define AMAX_K SAMAX_K #define AMAX_K SAMAX_K
#define AMIN_K SAMIN_K #define AMIN_K SAMIN_K


+ 2
- 7
common_mips64.h View File

@@ -229,12 +229,7 @@ REALNAME: ;\


#define BUFFER_SIZE ( 32 << 21) #define BUFFER_SIZE ( 32 << 21)


#if defined(LOONGSON3A)
#define PAGESIZE (16UL << 10)
#define FIXED_PAGESIZE (16UL << 10)
#endif

#if defined(LOONGSON3B)
#if defined(LOONGSON3R3) || defined(LOONGSON3R4)
#define PAGESIZE (16UL << 10) #define PAGESIZE (16UL << 10)
#define FIXED_PAGESIZE (16UL << 10) #define FIXED_PAGESIZE (16UL << 10)
#endif #endif
@@ -250,7 +245,7 @@ REALNAME: ;\
#define MAP_ANONYMOUS MAP_ANON #define MAP_ANONYMOUS MAP_ANON
#endif #endif


#if defined(LOONGSON3A) || defined(LOONGSON3B)
#if defined(LOONGSON3R3) || defined(LOONGSON3R4)
#define PREFETCHD_(x) ld $0, x #define PREFETCHD_(x) ld $0, x
#define PREFETCHD(x) PREFETCHD_(x) #define PREFETCHD(x) PREFETCHD_(x)
#else #else


+ 2
- 2
common_param.h View File

@@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG);
int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);


int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);
int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG);
int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);


int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *);


+ 4
- 0
common_power.h View File

@@ -849,6 +849,10 @@ Lmcount$lazy_ptr:
#else #else
#define BUFFER_SIZE ( 16 << 20) #define BUFFER_SIZE ( 16 << 20)
#endif #endif
#ifdef DYNAMIC_ARCH
#undef BUFFER_SIZE
#define BUFFER_SIZE (64 << 22)
#endif


#ifndef PAGESIZE #ifndef PAGESIZE
#define PAGESIZE ( 4 << 10) #define PAGESIZE ( 4 << 10)


+ 98
- 0
common_riscv64.h View File

@@ -0,0 +1,98 @@
/*****************************************************************************
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************************/

/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#ifndef COMMON_RISCV64
#define COMMON_RISCV64

#define MB __sync_synchronize()
#define WMB __sync_synchronize()
#define RMB __sync_synchronize()

#define INLINE inline

#ifndef ASSEMBLER


static inline int blas_quickdivide(blasint x, blasint y){
return x / y;
}

#endif



#define BUFFER_SIZE ( 32 << 20)
#define SEEK_ADDRESS

#if defined(C910V)
#include <riscv-vector.h>
#endif

#endif

+ 4
- 0
common_sb.h View File

@@ -8,6 +8,8 @@
#define SBDTOBF16_K sbdtobf16_k #define SBDTOBF16_K sbdtobf16_k
#define SBF16TOS_K sbf16tos_k #define SBF16TOS_K sbf16tos_k
#define DBF16TOD_K dbf16tod_k #define DBF16TOD_K dbf16tod_k
#define SBGEMV_N_K sbgemv_n
#define SBGEMV_T_K sbgemv_t


#define SBGEMM_ONCOPY sbgemm_oncopy #define SBGEMM_ONCOPY sbgemm_oncopy
#define SBGEMM_OTCOPY sbgemm_otcopy #define SBGEMM_OTCOPY sbgemm_otcopy
@@ -29,6 +31,8 @@
#define SBDTOBF16_K gotoblas -> sbdtobf16_k #define SBDTOBF16_K gotoblas -> sbdtobf16_k
#define SBF16TOS_K gotoblas -> sbf16tos_k #define SBF16TOS_K gotoblas -> sbf16tos_k
#define DBF16TOD_K gotoblas -> dbf16tod_k #define DBF16TOD_K gotoblas -> dbf16tod_k
#define SBGEMV_N_K gotoblas -> sbgemv_n
#define SBGEMV_T_K gotoblas -> sbgemv_t


#define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy #define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy
#define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy #define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy


+ 6
- 0
common_sparc.h View File

@@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){
#define __BIG_ENDIAN__ #define __BIG_ENDIAN__
#endif #endif


#ifdef C_SUN
#ifndef __64BIT
#define RETURN_BY_STACK
#endif
#endif

#ifdef DOUBLE #ifdef DOUBLE
#define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory")
#else #else


+ 29
- 62
cpuid_mips64.c View File

@@ -70,19 +70,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* or implied, of The University of Texas at Austin. */ /* or implied, of The University of Texas at Austin. */
/*********************************************************************/ /*********************************************************************/


#define CPU_UNKNOWN 0
#define CPU_SICORTEX 1
#define CPU_LOONGSON3A 2
#define CPU_LOONGSON3B 3
#define CPU_I6400 4
#define CPU_P6600 5
#define CPU_I6500 6
#define CPU_UNKNOWN 0
#define CPU_SICORTEX 1
#define CPU_LOONGSON3R3 2
#define CPU_LOONGSON3R4 3
#define CPU_I6400 4
#define CPU_P6600 5
#define CPU_I6500 6


static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
"SICORTEX", "SICORTEX",
"LOONGSON3A",
"LOONGSON3B",
"LOONGSON3R3",
"LOONGSON3R4",
"I6400", "I6400",
"P6600", "P6600",
"I6500" "I6500"
@@ -90,48 +90,13 @@ static char *cpuname[] = {


int detect(void){ int detect(void){


#ifdef __linux
#ifdef linux
FILE *infile; FILE *infile;
char buffer[512], *p; char buffer[512], *p;


p = (char *)NULL; p = (char *)NULL;
infile = fopen("/proc/cpuinfo", "r");
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("cpu", buffer, 3)){
p = strchr(buffer, ':') + 2;
#if 0
fprintf(stderr, "%s\n", p);
#endif
break;
}
}

fclose(infile);

if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}else if (strstr(p, "Loongson-3")){
infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("system type", buffer, 11)){
p = strchr(buffer, ':') + 2;
break;
}
}
fclose(infile);
if (strstr(p, "loongson3a"))
return CPU_LOONGSON3A;
}else{
return CPU_SICORTEX;
}
}
//Check model name for Loongson3 //Check model name for Loongson3
infile = fopen("/proc/cpuinfo", "r"); infile = fopen("/proc/cpuinfo", "r");
p = (char *)NULL;
while (fgets(buffer, sizeof(buffer), infile)){ while (fgets(buffer, sizeof(buffer), infile)){
if (!strncmp("model name", buffer, 10)){ if (!strncmp("model name", buffer, 10)){
p = strchr(buffer, ':') + 2; p = strchr(buffer, ':') + 2;
@@ -140,14 +105,16 @@ int detect(void){
} }
fclose(infile); fclose(infile);
if(p != NULL){ if(p != NULL){
if (strstr(p, "Loongson-3A")){
return CPU_LOONGSON3A;
}else if(strstr(p, "Loongson-3B")){
return CPU_LOONGSON3B;
}
if (strstr(p, "Loongson-3A3000") || strstr(p, "Loongson-3B3000")){
return CPU_LOONGSON3R3;
}else if(strstr(p, "Loongson-3A4000") || strstr(p, "Loongson-3B4000")){
return CPU_LOONGSON3R4;
} else{
return CPU_SICORTEX;
} }
#endif #endif
return CPU_UNKNOWN; return CPU_UNKNOWN;
}
} }


char *get_corename(void){ char *get_corename(void){
@@ -159,10 +126,10 @@ void get_architecture(void){
} }


void get_subarchitecture(void){ void get_subarchitecture(void){
if(detect()==CPU_LOONGSON3A) {
printf("LOONGSON3A");
}else if(detect()==CPU_LOONGSON3B){
printf("LOONGSON3B");
if(detect()==CPU_LOONGSON3R3) {
printf("LOONGSON3R3");
}else if(detect()==CPU_LOONGSON3R4){
printf("LOONGSON3R4");
}else if(detect()==CPU_I6400){ }else if(detect()==CPU_I6400){
printf("I6400"); printf("I6400");
}else if(detect()==CPU_P6600){ }else if(detect()==CPU_P6600){
@@ -179,8 +146,8 @@ void get_subdirname(void){
} }


void get_cpuconfig(void){ void get_cpuconfig(void){
if(detect()==CPU_LOONGSON3A) {
printf("#define LOONGSON3A\n");
if(detect()==CPU_LOONGSON3R3) {
printf("#define LOONGSON3R3\n");
printf("#define L1_DATA_SIZE 65536\n"); printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n"); printf("#define L2_SIZE 512488\n");
@@ -188,8 +155,8 @@ void get_cpuconfig(void){
printf("#define DTB_DEFAULT_ENTRIES 64\n"); printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n"); printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n"); printf("#define L2_ASSOCIATIVE 4\n");
}else if(detect()==CPU_LOONGSON3B){
printf("#define LOONGSON3B\n");
}else if(detect()==CPU_LOONGSON3R4){
printf("#define LOONGSON3R4\n");
printf("#define L1_DATA_SIZE 65536\n"); printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n"); printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n"); printf("#define L2_SIZE 512488\n");
@@ -237,10 +204,10 @@ void get_cpuconfig(void){
} }


void get_libname(void){ void get_libname(void){
if(detect()==CPU_LOONGSON3A) {
printf("loongson3a\n");
}else if(detect()==CPU_LOONGSON3B) {
printf("loongson3b\n");
if(detect()==CPU_LOONGSON3R3) {
printf("loongson3r3\n");
}else if(detect()==CPU_LOONGSON3R4) {
printf("loongson3r4\n");
}else if(detect()==CPU_I6400) { }else if(detect()==CPU_I6400) {
printf("i6400\n"); printf("i6400\n");
}else if(detect()==CPU_P6600) { }else if(detect()==CPU_P6600) {


+ 113
- 0
cpuid_riscv64.c View File

@@ -0,0 +1,113 @@
/*****************************************************************************
Copyright (c) 2011-2014, The OpenBLAS Project
All rights reserved.

Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:

1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.

2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written
permission.

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

**********************************************************************************/


/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#define CPU_UNKNOWN 0
#define CPU_C910V 1

static char *cpuname[] = {
"UNKOWN",
"C910V"
};

int detect(void){
return CPU_UNKNOWN;
}

char *get_corename(void){
return cpuname[detect()];
}

void get_architecture(void){
printf("RISCV64");
}

void get_subarchitecture(void){
}

void get_subdirname(void){
printf("riscv64");
}

void get_cpuconfig(void){
printf("#define UNKNOWN\n");
printf("#define L1_DATA_SIZE 65536\n");
printf("#define L1_DATA_LINESIZE 32\n");
printf("#define L2_SIZE 512488\n");
printf("#define L2_LINESIZE 32\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");
printf("#define L2_ASSOCIATIVE 4\n");
}

void get_libname(void){
printf("riscv64\n");
}

+ 8
- 8
cpuid_x86.c View File

@@ -202,7 +202,7 @@ int support_avx(){
if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){
xgetbv(0, &eax, &edx); xgetbv(0, &eax, &edx);
if((eax & 6) == 6){ if((eax & 6) == 6){
ret=1; //OS support AVX
ret=1; //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2))
} }
} }
return ret; return ret;
@@ -219,8 +219,8 @@ int support_avx2(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & (1<<7)) != 0)
ret=1; //OS supports AVX2
if((ebx & (1<<5)) != 0)
ret=1; //CPU supports AVX2
return ret; return ret;
#else #else
return 0; return 0;
@@ -235,14 +235,14 @@ int support_avx512(){
if (!support_avx()) if (!support_avx())
return 0; return 0;
cpuid(7, &eax, &ebx, &ecx, &edx); cpuid(7, &eax, &ebx, &ecx, &edx);
if((ebx & 32) != 32){
ret=0; //OS does not even support AVX2
if((ebx & (1<<5)) == 0){
ret=0; //cpu does not have avx2 flag
} }
if((ebx & (1<<31)) != 0){
if((ebx & (1<<31)) != 0){ //AVX512VL flag
xgetbv(0, &eax, &edx); xgetbv(0, &eax, &edx);
if((eax & 0xe0) == 0xe0) if((eax & 0xe0) == 0xe0)
ret=1; //OS supports AVX512VL
}
ret=1; //OS supports saving zmm registers
}
return ret; return ret;
#else #else
return 0; return 0;


+ 5
- 0
ctest.c View File

@@ -153,6 +153,11 @@ ARCH_ARM
ARCH_ARM64 ARCH_ARM64
#endif #endif


#if defined(__riscv)
ARCH_RISCV64
#endif

#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
HAVE_C11 HAVE_C11
#endif #endif


+ 3
- 3
ctest/Makefile View File

@@ -61,7 +61,7 @@ endif


all1: $(all1targets) all1: $(all1targets)


ifndef CROSS
ifneq ($(CROSS), 1)
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1) ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat1 OMP_NUM_THREADS=2 ./xscblat1
@@ -106,7 +106,7 @@ endif


all2: $(all2targets) all2: $(all2targets)


ifndef CROSS
ifneq ($(CROSS), 1)
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1) ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat2 < sin2 OMP_NUM_THREADS=2 ./xscblat2 < sin2
@@ -152,7 +152,7 @@ endif


all3: $(all3targets) all3: $(all3targets)


ifndef CROSS
ifneq ($(CROSS), 1)
ifeq ($(USE_OPENMP), 1) ifeq ($(USE_OPENMP), 1)
ifeq ($(BUILD_SINGLE),1) ifeq ($(BUILD_SINGLE),1)
OMP_NUM_THREADS=2 ./xscblat3 < sin3 OMP_NUM_THREADS=2 ./xscblat3 < sin3


+ 15
- 1
driver/level2/Makefile View File

@@ -413,7 +413,13 @@ XBLASOBJS += \
xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \ xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \
xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \ xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \
xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \ xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) \
xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX)

ifeq ($(BUILD_BFLOAT16),1)
SBBLASOBJS += \
sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \
sbgemv_thread_t$(TSUFFIX).$(SUFFIX)
endif


endif endif


@@ -3693,4 +3699,12 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h
xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h
$(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F) $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F)


ifeq ($(BUILD_BFLOAT16),1)
sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F)
sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F)
endif


include ../../Makefile.tail include ../../Makefile.tail

+ 149
- 0
driver/level2/sbgemv_thread.c View File

@@ -0,0 +1,149 @@
/*********************************************************************/
/* Copyright 2009, 2010 The University of Texas at Austin. */
/* All rights reserved. */
/* */
/* Redistribution and use in source and binary forms, with or */
/* without modification, are permitted provided that the following */
/* conditions are met: */
/* */
/* 1. Redistributions of source code must retain the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer. */
/* */
/* 2. Redistributions in binary form must reproduce the above */
/* copyright notice, this list of conditions and the following */
/* disclaimer in the documentation and/or other materials */
/* provided with the distribution. */
/* */
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
/* POSSIBILITY OF SUCH DAMAGE. */
/* */
/* The views and conclusions contained in the software and */
/* documentation are those of the authors and should not be */
/* interpreted as representing official policies, either expressed */
/* or implied, of The University of Texas at Austin. */
/*********************************************************************/

#include <stdio.h>
#include <stdlib.h>
#include "common.h"

#ifndef TRANSA
#define SBGEMV SBGEMV_N
#else
#define SBGEMV SBGEMV_T
#endif

static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){

bfloat16 *a, *x;
float *y;
BLASLONG lda, incx, incy;
BLASLONG m_from, m_to, n_from, n_to;

a = (bfloat16 *)args->a;
x = (bfloat16 *)args->b;
y = (float *)args->c;

lda = args->lda;
incx = args->ldb;
incy = args->ldc;
#ifndef TRANSA // N
m_from = *(range_m + 0);
m_to = *(range_m + 1);
n_from = 0;
n_to = args -> n;
a += m_from;
y += m_from * incy;
#else // T
m_from = 0;
m_to = args->m;
n_from = *(range_n + 0);
n_to = *(range_n + 1);
a += n_from * lda;
y += n_from * incy;
#endif

SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy);

return 0;
}

int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads)
{
blas_arg_t args;
blas_queue_t queue[MAX_CPU_NUMBER];
BLASLONG range[MAX_CPU_NUMBER + 1];

#ifndef TRANSA
BLASLONG width_for_split = m;
#else
BLASLONG width_for_split = n;
#endif

BLASLONG BLOCK_WIDTH = width_for_split/threads;

int mode = BLAS_BFLOAT16 | BLAS_REAL;

args.m = m;
args.n = n;
args.a = (void *)a;
args.b = (void *)x;
args.c = (void *)y;
args.lda = lda;
args.ldb = incx;
args.ldc = incy;
args.alpha = (void *)&alpha;
args.beta = (void *)&beta;

range[0] = 0;

int thread_idx;

for (thread_idx=0; thread_idx<threads; thread_idx++) {
if (thread_idx != threads-1) {
range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH;
} else {
range[thread_idx + 1] = range[thread_idx] + width_for_split;
}

queue[thread_idx].mode = mode;
queue[thread_idx].routine = sbgemv_kernel;
queue[thread_idx].args = &args;
#ifndef TRANSA
queue[thread_idx].range_m = &range[thread_idx];
queue[thread_idx].range_n = NULL;
#else
queue[thread_idx].range_m = NULL;
queue[thread_idx].range_n = &range[thread_idx];
#endif
queue[thread_idx].sa = NULL;
queue[thread_idx].sb = NULL;
queue[thread_idx].next = &queue[thread_idx + 1];

width_for_split -= BLOCK_WIDTH;
}

if (thread_idx) {
queue[0].sa = NULL;
queue[0].sb = NULL;
queue[thread_idx - 1].next = NULL;

exec_blas(thread_idx, queue);
}

return 0;
}

+ 6
- 6
driver/level3/Makefile View File

@@ -206,7 +206,7 @@ ifdef SMP
COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(SUFFIX) gemm_thread_variable.$(SUFFIX) COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(SUFFIX) gemm_thread_variable.$(SUFFIX)
COMMONOBJS += syrk_thread.$(SUFFIX) COMMONOBJS += syrk_thread.$(SUFFIX)


ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
ifeq ($(BUILD_BFLOAT16),1) ifeq ($(BUILD_BFLOAT16),1)
SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX)
endif endif
@@ -282,7 +282,7 @@ HPLOBJS = \
dtrsm_RNUU.$(SUFFIX) dtrsm_RNUN.$(SUFFIX) dtrsm_RNLU.$(SUFFIX) dtrsm_RNLN.$(SUFFIX) \ dtrsm_RNUU.$(SUFFIX) dtrsm_RNUN.$(SUFFIX) dtrsm_RNLU.$(SUFFIX) dtrsm_RNLN.$(SUFFIX) \
dtrsm_RTUU.$(SUFFIX) dtrsm_RTUN.$(SUFFIX) dtrsm_RTLU.$(SUFFIX) dtrsm_RTLN.$(SUFFIX) dtrsm_RTUU.$(SUFFIX) dtrsm_RTUN.$(SUFFIX) dtrsm_RTLU.$(SUFFIX) dtrsm_RTLN.$(SUFFIX)


ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \
dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif endif
@@ -297,13 +297,13 @@ ifeq ($(BUILD_DOUBLE),1)
strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \
ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \
ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX)
endif endif
endif endif
ifeq ($(BUILD_COMPLEX),1) ifeq ($(BUILD_COMPLEX),1)
SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX)
endif endif
endif endif
@@ -312,7 +312,7 @@ ifneq ($(BUILD_DOUBLE),1)
DBLASOBJS= DBLASOBJS=
ifeq ($(BUILD_COMPLEX16),1) ifeq ($(BUILD_COMPLEX16),1)
DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX)
endif endif
endif endif
@@ -332,7 +332,7 @@ ifeq ($(BUILD_COMPLEX16),1)
ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \
ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \
ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX)
ifndef USE_SIMPLE_THREADED_LEVEL3
ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1)
CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX)
endif endif
endif endif


+ 3
- 1
driver/level3/level3.c View File

@@ -339,8 +339,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#else #else
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else else
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
/*
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
else else
*/
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#endif #endif




+ 2
- 0
driver/level3/level3_thread.c View File

@@ -373,8 +373,10 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
#else #else
if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N;
else else
/*
if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N;
else else
*/
if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N;
#endif #endif
/* Copy part of local region of B into workspace */ /* Copy part of local region of B into workspace */


+ 11
- 5
driver/others/Makefile View File

@@ -7,7 +7,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX)


ifdef SMP ifdef SMP
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX)
ifndef NO_AFFINITY
ifneq ($(NO_AFFINITY), 1)
COMMONOBJS += init.$(SUFFIX) COMMONOBJS += init.$(SUFFIX)
endif endif
endif endif
@@ -24,19 +24,23 @@ else
ifeq ($(ARCH),zarch) ifeq ($(ARCH),zarch)
COMMONOBJS += dynamic_zarch.$(SUFFIX) COMMONOBJS += dynamic_zarch.$(SUFFIX)
else else
ifeq ($(ARCH),mips64)
COMMONOBJS += dynamic_mips64.$(SUFFIX)
else
COMMONOBJS += dynamic.$(SUFFIX) COMMONOBJS += dynamic.$(SUFFIX)
endif endif
endif endif
endif endif
endif
else else
COMMONOBJS += parameter.$(SUFFIX) COMMONOBJS += parameter.$(SUFFIX)
endif endif


ifdef EXPRECISION
ifeq ($(EXPRECISION), 1)
COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX)
endif endif


ifdef QUAD_PRECISION
ifeq ($(QUAD_PRECISION), 1)
COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX)
endif endif


@@ -46,11 +50,9 @@ ifeq ($(C_COMPILER), PGI)
endif endif
endif endif


ifdef USE_CUDA
ifeq ($(USE_CUDA), 1) ifeq ($(USE_CUDA), 1)
COMMONOBJS += cuda_init.$(SUFFIX) COMMONOBJS += cuda_init.$(SUFFIX)
endif endif
endif


ifdef FUNCTION_PROFILE ifdef FUNCTION_PROFILE
COMMONOBJS += profile.$(SUFFIX) COMMONOBJS += profile.$(SUFFIX)
@@ -94,10 +96,14 @@ else
ifeq ($(ARCH),zarch) ifeq ($(ARCH),zarch)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX)
else else
ifeq ($(ARCH),mips64)
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX)
else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX)
endif endif
endif endif
endif endif
endif
else else
HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX)
endif endif


Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save