Update branch from develop to release 0.3.13tags/v0.3.13
| @@ -211,44 +211,48 @@ matrix: | |||||
| - &test-macos | - &test-macos | ||||
| os: osx | os: osx | ||||
| osx_image: xcode10.1 | |||||
| osx_image: xcode11.5 | |||||
| before_script: | before_script: | ||||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | ||||
| - brew update | |||||
| - brew install gcc@8 # for gfortran | |||||
| script: | script: | ||||
| - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | ||||
| env: | env: | ||||
| - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-8" | |||||
| - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-9" | |||||
| - <<: *test-macos | - <<: *test-macos | ||||
| osx_image: xcode12 | osx_image: xcode12 | ||||
| before_script: | before_script: | ||||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | ||||
| - brew update | - brew update | ||||
| - brew install gcc@10 # for gfortran | |||||
| - brew install gcc@10 | |||||
| script: | script: | ||||
| - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | - travis_wait 45 make QUIET_MAKE=1 $COMMON_FLAGS $BTYPE | ||||
| env: | env: | ||||
| - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" | - BTYPE="TARGET=NEHALEM BINARY=64 INTERFACE64=1 FC=gfortran-10" | ||||
| - <<: *test-macos | |||||
| osx_image: xcode10.0 | |||||
| env: | |||||
| - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1" | |||||
| # - <<: *test-macos | |||||
| # osx_image: xcode10 | |||||
| # env: | |||||
| # - BTYPE="TARGET=NEHALEM BINARY=32 NOFORTRAN=1" | |||||
| - <<: *test-macos | - <<: *test-macos | ||||
| osx_image: xcode10.1 | |||||
| osx_image: xcode11.5 | |||||
| before_script: | |||||
| - COMMON_FLAGS="DYNAMIC_ARCH=1 NUM_THREADS=32" | |||||
| - brew update | |||||
| env: | env: | ||||
| - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0" | |||||
| # - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| # - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch arm64 -miphoneos-version-min=10.0" | |||||
| - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| - CFLAGS="-O2 -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch arm64 -miphoneos-version-min=10.0" | |||||
| - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1" | - BTYPE="TARGET=ARMV8 BINARY=64 HOSTCC=clang NOFORTRAN=1" | ||||
| - <<: *test-macos | - <<: *test-macos | ||||
| osx_image: xcode10.1 | |||||
| osx_image: xcode11.5 | |||||
| env: | env: | ||||
| - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1" | |||||
| # - CC="/Applications/Xcode-10.1.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| # - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-10.1.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS12.1.sdk -arch armv7 -miphoneos-version-min=5.1" | |||||
| - CC="/Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Toolchains/XcodeDefault.xctoolchain/usr/bin/clang" | |||||
| - CFLAGS="-O2 -mno-thumb -Wno-macro-redefined -isysroot /Applications/Xcode-11.5.GM.Seed.app/Contents/Developer/Platforms/iPhoneOS.platform/Developer/SDKs/iPhoneOS13.5.sdk -arch armv7 -miphoneos-version-min=5.1" | |||||
| - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" | - BTYPE="TARGET=ARMV7 HOSTCC=clang NOFORTRAN=1" | ||||
| - &test-graviton2 | - &test-graviton2 | ||||
| @@ -6,7 +6,7 @@ cmake_minimum_required(VERSION 2.8.5) | |||||
| project(OpenBLAS C ASM) | project(OpenBLAS C ASM) | ||||
| set(OpenBLAS_MAJOR_VERSION 0) | set(OpenBLAS_MAJOR_VERSION 0) | ||||
| set(OpenBLAS_MINOR_VERSION 3) | set(OpenBLAS_MINOR_VERSION 3) | ||||
| set(OpenBLAS_PATCH_VERSION 12) | |||||
| set(OpenBLAS_PATCH_VERSION 13) | |||||
| set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | set(OpenBLAS_VERSION "${OpenBLAS_MAJOR_VERSION}.${OpenBLAS_MINOR_VERSION}.${OpenBLAS_PATCH_VERSION}") | ||||
| # Adhere to GNU filesystem layout conventions | # Adhere to GNU filesystem layout conventions | ||||
| @@ -190,4 +190,7 @@ In chronological order: | |||||
| * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support | * [2020-09-07] Fix builds with clang on IBM z, including dynamic architecture support | ||||
| * Danfeng Zhang <https://github.com/craft-zhang> | * Danfeng Zhang <https://github.com/craft-zhang> | ||||
| * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 | |||||
| * [2020-05-20] Improve performance of SGEMM and STRMM on Arm Cortex-A53 | |||||
| * PingTouGe Semiconductor Co., Ltd. | |||||
| * [2020-10] Add RISC-V Vector (0.7.1) support. Optimize BLAS kernels for Xuantie C910 | |||||
| @@ -1,4 +1,54 @@ | |||||
| OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
| ==================================================================== | |||||
| Version 0.3.13 | |||||
| 12-Dec-2020 | |||||
| common: | |||||
| * Added a generic bfloat16 SBGEMV kernel | |||||
| * Fixed a potentially severe memory leak after fork in OpenMP builds | |||||
| that was introduced in 0.3.12 | |||||
| * Added detection of the Fujitsu Fortran compiler | |||||
| * Added detection of the (e)gfortran compiler on OpenBSD | |||||
| * Added support for overriding the default name of the library independently | |||||
| from symbol suffixing in the gmake builds (already supported in cmake) | |||||
| RISCV: | |||||
| * Added a RISC V port optimized for C910V | |||||
| POWER: | |||||
| * Added optimized POWER10 kernels for SAXPY, CAXPY, SDOT, DDOT and DGEMV_N | |||||
| * Improved DGEMM performance on POWER10 | |||||
| * Improved STRSM and DTRSM performance on POWER9 and POWER10 | |||||
| * Fixed segmemtation faults in DYNAMIC_ARCH builds | |||||
| * Fixed compilation with the PGI compiler | |||||
| x86: | |||||
| * Fixed compilation of kernels that require SSE2 intrinsics since 0.3.12 | |||||
| x86_64: | |||||
| * Added an optimized bfloat16 SBGEMV kernel for SkylakeX and Cooperlake | |||||
| * Improved the performance of SASUM and DASUM kernels through parallelization | |||||
| * Improved the performance of SROT and DROT kernels | |||||
| * Improved the performance of multithreaded xSYRK | |||||
| * Fixed OpenMP builds that use the LLVM Clang compiler together with GNU gfortran | |||||
| (where linking of both the LLVM libomp and GNU libgomp could lead to lockups or | |||||
| wrong results) | |||||
| * Fixed miscompilations by old gcc 4.6 | |||||
| * Fixed misdetection of AVX2 capability in some Sandybridge cpus | |||||
| * Fixed lockups in builds combining DYNAMIC_ARCH with TARGET=GENERIC on OpenBSD | |||||
| ARM64: | |||||
| * Fixed segmemtation faults in DYNAMIC_ARCH builds | |||||
| MIPS: | |||||
| * Improved kernels for Loongson 3R3 ("3A") and 3R4 ("3B") models, including MSA | |||||
| * Fixed bugs in the MSA kernels for CGEMM, CTRMM, CGEMV and ZGEMV | |||||
| * Added handling of zero increments in the MSA kernels for SSWAP and DSWAP | |||||
| * Added DYNAMIC_ARCH support for MIPS64 (currently Loongson3R3/3R4 only) | |||||
| SPARC: | |||||
| * Fixed building 32 and 64 bit SPARC kernels with the SolarisStudio compilers | |||||
| ==================================================================== | ==================================================================== | ||||
| Version 0.3.12 | Version 0.3.12 | ||||
| 24-Oct-2020 | 24-Oct-2020 | ||||
| @@ -268,7 +268,11 @@ ifeq ($(NOFORTRAN), $(filter 0,$(NOFORTRAN))) | |||||
| -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "POPTS = $(LAPACK_FPFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "FFLAGS_NOOPT = -O0 $(LAPACK_NOOPT)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PNOOPT = $(LAPACK_FPFLAGS) -O0" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| ifeq ($(C_COMPILER)$(F_COMPILER)$(USE_OPENMP), CLANGGFORTRAN1) | |||||
| -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB) -lomp" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
| else | |||||
| -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "LDFLAGS = $(FFLAGS) $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| endif | |||||
| -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CC = $(CC)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "override CFLAGS = $(LAPACK_CFLAGS)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "AR = $(AR)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
| @@ -12,3 +12,8 @@ ifeq ($(CORE), ARMV6) | |||||
| CCOMMON_OPT += -mfpu=vfp | CCOMMON_OPT += -mfpu=vfp | ||||
| FCOMMON_OPT += -mfpu=vfp | FCOMMON_OPT += -mfpu=vfp | ||||
| endif | endif | ||||
| ifdef HAVE_NEON | |||||
| CCOMMON_OPT += -mfpu=neon | |||||
| FCOMMON_OPT += -mfpu=neon | |||||
| endif | |||||
| @@ -9,7 +9,7 @@ OPENBLAS_INCLUDE_DIR := $(PREFIX)/include | |||||
| OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib | OPENBLAS_LIBRARY_DIR := $(PREFIX)/lib | ||||
| OPENBLAS_BINARY_DIR := $(PREFIX)/bin | OPENBLAS_BINARY_DIR := $(PREFIX)/bin | ||||
| OPENBLAS_BUILD_DIR := $(CURDIR) | OPENBLAS_BUILD_DIR := $(CURDIR) | ||||
| OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/openblas | |||||
| OPENBLAS_CMAKE_DIR := $(OPENBLAS_LIBRARY_DIR)/cmake/$(LIBSONAMEBASE) | |||||
| OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | OPENBLAS_CMAKE_CONFIG := OpenBLASConfig.cmake | ||||
| OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | OPENBLAS_CMAKE_CONFIG_VERSION := OpenBLASConfigVersion.cmake | ||||
| OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig | OPENBLAS_PKGCONFIG_DIR := $(OPENBLAS_LIBRARY_DIR)/pkgconfig | ||||
| @@ -150,13 +150,13 @@ endif | |||||
| endif | endif | ||||
| #Generating openblas.pc | #Generating openblas.pc | ||||
| @echo Generating openblas.pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | |||||
| @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/openblas.pc" | |||||
| @echo Generating $(LIBSONAMEBASE).pc in "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)" | |||||
| @echo 'libdir='$(OPENBLAS_LIBRARY_DIR) > "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| @echo 'includedir='$(OPENBLAS_INCLUDE_DIR) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| @echo 'openblas_config= USE_64BITINT='$(USE_64BITINT) 'DYNAMIC_ARCH='$(DYNAMIC_ARCH) 'DYNAMIC_OLDER='$(DYNAMIC_OLDER) 'NO_CBLAS='$(NO_CBLAS) 'NO_LAPACK='$(NO_LAPACK) 'NO_LAPACKE='$(NO_LAPACKE) 'NO_AFFINITY='$(NO_AFFINITY) 'USE_OPENMP='$(USE_OPENMP) $(CORE) 'MAX_THREADS='$(NUM_THREADS)>> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| @echo 'version='$(VERSION) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| @echo 'extralib='$(PKG_EXTRALIB) >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| @cat openblas.pc.in >> "$(DESTDIR)$(OPENBLAS_PKGCONFIG_DIR)/$(LIBSONAMEBASE).pc" | |||||
| #Generating OpenBLASConfig.cmake | #Generating OpenBLASConfig.cmake | ||||
| @@ -41,6 +41,10 @@ ifeq ($(TARGET), I6500) | |||||
| TARGET_FLAGS = -mips64r6 | TARGET_FLAGS = -mips64r6 | ||||
| endif | endif | ||||
| ifeq ($(TARGET), C910V) | |||||
| TARGET_FLAGS = -march=rv64gcvxthead -mabi=lp64v | |||||
| endif | |||||
| all: getarch_2nd | all: getarch_2nd | ||||
| ./getarch_2nd 0 >> $(TARGET_MAKE) | ./getarch_2nd 0 >> $(TARGET_MAKE) | ||||
| ./getarch_2nd 1 >> $(TARGET_CONF) | ./getarch_2nd 1 >> $(TARGET_CONF) | ||||
| @@ -0,0 +1,4 @@ | |||||
| ifeq ($(CORE), C910V) | |||||
| CCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v | |||||
| FCOMMON_OPT += -march=rv64gcvxthead -mabi=lp64v -static | |||||
| endif | |||||
| @@ -3,7 +3,7 @@ | |||||
| # | # | ||||
| # This library's version | # This library's version | ||||
| VERSION = 0.3.12 | |||||
| VERSION = 0.3.13 | |||||
| # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
| # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
| @@ -3,21 +3,29 @@ RANLIB = ranlib | |||||
| ifdef BINARY64 | ifdef BINARY64 | ||||
| ifeq ($(C_COMPILER), GCC) | |||||
| CCOMMON_OPT += -mcpu=v9 -m64 | CCOMMON_OPT += -mcpu=v9 -m64 | ||||
| else | |||||
| CCOMMON_OPT += -m64 | |||||
| endif | |||||
| ifeq ($(COMPILER_F77), g77) | ifeq ($(COMPILER_F77), g77) | ||||
| FCOMMON_OPT += -mcpu=v9 -m64 | FCOMMON_OPT += -mcpu=v9 -m64 | ||||
| endif | endif | ||||
| ifeq ($(COMPILER_F77), f90) | |||||
| FCOMMON_OPT += -xarch=v9 | |||||
| ifeq ($(COMPILER_F77), f95) | |||||
| FCOMMON_OPT += -m64 | |||||
| endif | endif | ||||
| else | else | ||||
| ifeq ($(C_COMPILER), GCC) | |||||
| CCOMMON_OPT += -mcpu=v9 | CCOMMON_OPT += -mcpu=v9 | ||||
| else | |||||
| CCOMMON_OPT += -xarch=v9 | |||||
| endif | |||||
| ifeq ($(COMPILER_F77), g77) | ifeq ($(COMPILER_F77), g77) | ||||
| FCOMMON_OPT += -mcpu=v9 | FCOMMON_OPT += -mcpu=v9 | ||||
| endif | endif | ||||
| ifeq ($(COMPILER_F77), f90) | |||||
| ifeq ($(COMPILER_F77), f95) | |||||
| FCOMMON_OPT += -xarch=v8plusb | FCOMMON_OPT += -xarch=v8plusb | ||||
| endif | endif | ||||
| @@ -37,4 +45,4 @@ LIBSUNPERF = -L/opt/SUNWspro/lib/v9 -L/opt/SUNWspro/prod/lib/v9 \ | |||||
| else | else | ||||
| LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ | LIBSUNPERF = -L/opt/SUNWspro/lib -L/opt/SUNWspro/prod/lib \ | ||||
| -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath | -Wl,-R,/opt/SUNWspro/lib -lsunperf -lompstubs -lfui -lfsu -lsunmath | ||||
| endif | |||||
| endif | |||||
| @@ -6,7 +6,7 @@ | |||||
| INCLUDED = 1 | INCLUDED = 1 | ||||
| ifndef TOPDIR | ifndef TOPDIR | ||||
| TOPDIR = . | |||||
| TOPDIR = . | |||||
| endif | endif | ||||
| # If ARCH is not set, we use the host system's architecture for getarch compile options. | # If ARCH is not set, we use the host system's architecture for getarch compile options. | ||||
| @@ -93,6 +93,12 @@ endif | |||||
| ifdef TARGET | ifdef TARGET | ||||
| GETARCH_FLAGS := -DFORCE_$(TARGET) | GETARCH_FLAGS := -DFORCE_$(TARGET) | ||||
| GETARCH_FLAGS += -DUSER_TARGET | GETARCH_FLAGS += -DUSER_TARGET | ||||
| ifeq ($(TARGET), GENERIC) | |||||
| ifeq ($(DYNAMIC_ARCH), 1) | |||||
| override NO_EXPRECISION=1 | |||||
| export NO_EXPRECiSION | |||||
| endif | |||||
| endif | |||||
| endif | endif | ||||
| # Force fallbacks for 32bit | # Force fallbacks for 32bit | ||||
| @@ -246,6 +252,22 @@ DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" | |||||
| ifndef TARGET_CORE | ifndef TARGET_CORE | ||||
| include $(TOPDIR)/Makefile.conf | include $(TOPDIR)/Makefile.conf | ||||
| else | else | ||||
| HAVE_NEON= | |||||
| HAVE_VFP= | |||||
| HAVE_VFPV3= | |||||
| HAVE_VFPV4= | |||||
| HAVE_MMX= | |||||
| HAVE_SSE= | |||||
| HAVE_SSE2= | |||||
| HAVE_SSE3= | |||||
| HAVE_SSSE3= | |||||
| HAVE_SSE4_1= | |||||
| HAVE_SSE4_2= | |||||
| HAVE_SSE4A= | |||||
| HAVE_SSE5= | |||||
| HAVE_AVX= | |||||
| HAVE_AVX2= | |||||
| HAVE_FMA3= | |||||
| include $(TOPDIR)/Makefile_kernel.conf | include $(TOPDIR)/Makefile_kernel.conf | ||||
| endif | endif | ||||
| @@ -319,6 +341,7 @@ ifeq ($(GCCVERSIONGTEQ7),1) | |||||
| else | else | ||||
| GCCDUMPVERSION_PARAM := -dumpversion | GCCDUMPVERSION_PARAM := -dumpversion | ||||
| endif | endif | ||||
| GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 1) | |||||
| GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2) | GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 2) | ||||
| GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7) | GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) $(GCCDUMPVERSION_PARAM) | cut -f2 -d.` \>= 7) | ||||
| endif | endif | ||||
| @@ -602,6 +625,10 @@ DYNAMIC_CORE += EMAG8180 | |||||
| DYNAMIC_CORE += THUNDERX3T110 | DYNAMIC_CORE += THUNDERX3T110 | ||||
| endif | endif | ||||
| ifeq ($(ARCH), mips64) | |||||
| DYNAMIC_CORE = LOONGSON3R3 LOONGSON3R4 | |||||
| endif | |||||
| ifeq ($(ARCH), zarch) | ifeq ($(ARCH), zarch) | ||||
| DYNAMIC_CORE = ZARCH_GENERIC | DYNAMIC_CORE = ZARCH_GENERIC | ||||
| @@ -649,7 +676,7 @@ DYNAMIC_CORE += POWER9 | |||||
| else | else | ||||
| $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | $(info, OpenBLAS: Your gcc version is too old to build the POWER9 kernels.) | ||||
| endif | endif | ||||
| LDVERSIONGTEQ35 := $(shell expr `ld --version | head -1 | cut -f2 -d "." | cut -f1 -d "-"` >= 35) | |||||
| LDVERSIONGTEQ35 := $(shell expr `$(CC) -Wl,--version 2> /dev/null | head -1 | cut -f2 -d "." | cut -f1 -d "-"` \>= 35) | |||||
| ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) | ifeq ($(GCCVERSIONGTEQ11)$(LDVERSIONGTEQ35), 11) | ||||
| DYNAMIC_CORE += POWER10 | DYNAMIC_CORE += POWER10 | ||||
| CCOMMON_OPT += -DHAVE_P10_SUPPORT | CCOMMON_OPT += -DHAVE_P10_SUPPORT | ||||
| @@ -728,7 +755,10 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(ARCH), riscv64) | |||||
| NO_BINARY_MODE = 1 | |||||
| BINARY_DEFINED = 1 | |||||
| endif | |||||
| # | # | ||||
| @@ -761,14 +791,9 @@ CCOMMON_OPT += -mabi=32 | |||||
| BINARY_DEFINED = 1 | BINARY_DEFINED = 1 | ||||
| endif | endif | ||||
| ifeq ($(CORE), LOONGSON3A) | |||||
| CCOMMON_OPT += -march=mips64 | |||||
| FCOMMON_OPT += -march=mips64 | |||||
| endif | |||||
| ifeq ($(CORE), LOONGSON3B) | |||||
| CCOMMON_OPT += -march=mips64 | |||||
| FCOMMON_OPT += -march=mips64 | |||||
| ifeq ($(CORE), $(filter $(CORE),LOONGSON3R3 LOONGSON3R4)) | |||||
| CCOMMON_OPT += -march=loongson3a | |||||
| FCOMMON_OPT += -march=loongson3a | |||||
| endif | endif | ||||
| ifeq ($(CORE), MIPS24K) | ifeq ($(CORE), MIPS24K) | ||||
| @@ -810,7 +835,9 @@ endif | |||||
| ifndef BINARY_DEFINED | ifndef BINARY_DEFINED | ||||
| ifneq ($(OSNAME), AIX) | ifneq ($(OSNAME), AIX) | ||||
| ifdef BINARY64 | ifdef BINARY64 | ||||
| ifneq ($(ARCH), riscv64) | |||||
| CCOMMON_OPT += -m64 | CCOMMON_OPT += -m64 | ||||
| endif | |||||
| else | else | ||||
| CCOMMON_OPT += -m32 | CCOMMON_OPT += -m32 | ||||
| endif | endif | ||||
| @@ -855,7 +882,7 @@ CCOMMON_OPT += -DF_INTERFACE_FLANG | |||||
| FCOMMON_OPT += -Mrecursive -Kieee | FCOMMON_OPT += -Mrecursive -Kieee | ||||
| ifeq ($(OSNAME), Linux) | ifeq ($(OSNAME), Linux) | ||||
| ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
| FLANG_VENDOR := $(shell expr `$(FC) --version|cut -f 1 -d "."|head -1`) | |||||
| FLANG_VENDOR := $(shell `$(FC) --version|cut -f 1 -d "."|head -1`) | |||||
| ifeq ($(FLANG_VENDOR),AOCC) | ifeq ($(FLANG_VENDOR),AOCC) | ||||
| FCOMMON_OPT += -fno-unroll-loops | FCOMMON_OPT += -fno-unroll-loops | ||||
| endif | endif | ||||
| @@ -931,8 +958,10 @@ endif | |||||
| else | else | ||||
| ifdef BINARY64 | ifdef BINARY64 | ||||
| ifneq ($(OSNAME), AIX) | ifneq ($(OSNAME), AIX) | ||||
| ifneq ($(ARCH), riscv64) | |||||
| FCOMMON_OPT += -m64 | FCOMMON_OPT += -m64 | ||||
| endif | endif | ||||
| endif | |||||
| ifdef INTERFACE64 | ifdef INTERFACE64 | ||||
| ifneq ($(INTERFACE64), 0) | ifneq ($(INTERFACE64), 0) | ||||
| FCOMMON_OPT += -fdefault-integer-8 | FCOMMON_OPT += -fdefault-integer-8 | ||||
| @@ -1048,11 +1077,11 @@ FCOMMON_OPT += -n32 | |||||
| else | else | ||||
| FCOMMON_OPT += -n64 | FCOMMON_OPT += -n64 | ||||
| endif | endif | ||||
| ifeq ($(CORE), LOONGSON3A) | |||||
| ifeq ($(CORE), LOONGSON3R3) | |||||
| FCOMMON_OPT += -loongson3 -static | FCOMMON_OPT += -loongson3 -static | ||||
| endif | endif | ||||
| ifeq ($(CORE), LOONGSON3B) | |||||
| ifeq ($(CORE), LOONGSON3R4) | |||||
| FCOMMON_OPT += -loongson3 -static | FCOMMON_OPT += -loongson3 -static | ||||
| endif | endif | ||||
| @@ -1078,11 +1107,11 @@ CCOMMON_OPT += -n32 | |||||
| else | else | ||||
| CCOMMON_OPT += -n64 | CCOMMON_OPT += -n64 | ||||
| endif | endif | ||||
| ifeq ($(CORE), LOONGSON3A) | |||||
| ifeq ($(CORE), LOONGSON3R3) | |||||
| CCOMMON_OPT += -loongson3 -static | CCOMMON_OPT += -loongson3 -static | ||||
| endif | endif | ||||
| ifeq ($(CORE), LOONGSON3B) | |||||
| ifeq ($(CORE), LOONGSON3R4) | |||||
| CCOMMON_OPT += -loongson3 -static | CCOMMON_OPT += -loongson3 -static | ||||
| endif | endif | ||||
| @@ -1101,16 +1130,25 @@ CCOMMON_OPT += -w | |||||
| ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
| CCOMMON_OPT += -m32 | CCOMMON_OPT += -m32 | ||||
| else | else | ||||
| FCOMMON_OPT += -m64 | |||||
| ifdef BINARY64 | |||||
| CCOMMON_OPT += -m64 | |||||
| else | |||||
| CCOMMON_OPT += -m32 | |||||
| endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(F_COMPILER), SUN) | ifeq ($(F_COMPILER), SUN) | ||||
| CCOMMON_OPT += -DF_INTERFACE_SUN | CCOMMON_OPT += -DF_INTERFACE_SUN | ||||
| FCOMMON_OPT += -ftrap=%none -xrecursive | |||||
| ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
| FCOMMON_OPT += -m32 | FCOMMON_OPT += -m32 | ||||
| else | else | ||||
| ifdef BINARY64 | |||||
| FCOMMON_OPT += -m64 | FCOMMON_OPT += -m64 | ||||
| else | |||||
| FCOMMON_OPT += -m32 | |||||
| endif | |||||
| endif | endif | ||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| FCOMMON_OPT += -xopenmp=parallel | FCOMMON_OPT += -xopenmp=parallel | ||||
| @@ -1184,10 +1222,8 @@ ifdef SMP | |||||
| CCOMMON_OPT += -DSMP_SERVER | CCOMMON_OPT += -DSMP_SERVER | ||||
| ifeq ($(ARCH), mips64) | ifeq ($(ARCH), mips64) | ||||
| ifneq ($(CORE), LOONGSON3B) | |||||
| USE_SIMPLE_THREADED_LEVEL3 = 1 | USE_SIMPLE_THREADED_LEVEL3 = 1 | ||||
| endif | endif | ||||
| endif | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| # USE_SIMPLE_THREADED_LEVEL3 = 1 | # USE_SIMPLE_THREADED_LEVEL3 = 1 | ||||
| @@ -1262,10 +1298,14 @@ ifndef SYMBOLSUFFIX | |||||
| SYMBOLSUFFIX = | SYMBOLSUFFIX = | ||||
| endif | endif | ||||
| ifndef LIBSONAMEBASE | |||||
| LIBSONAMEBASE = openblas | |||||
| endif | |||||
| ifndef LIBNAMESUFFIX | ifndef LIBNAMESUFFIX | ||||
| LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX) | |||||
| LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX) | |||||
| else | else | ||||
| LIBNAMEBASE = $(SYMBOLPREFIX)openblas$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) | |||||
| LIBNAMEBASE = $(SYMBOLPREFIX)$(LIBSONAMEBASE)$(SYMBOLSUFFIX)_$(LIBNAMESUFFIX) | |||||
| endif | endif | ||||
| ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
| @@ -1279,8 +1319,10 @@ KERNELDIR = $(TOPDIR)/kernel/$(ARCH) | |||||
| include $(TOPDIR)/Makefile.$(ARCH) | include $(TOPDIR)/Makefile.$(ARCH) | ||||
| ifneq ($(C_COMPILER), PGI) | ifneq ($(C_COMPILER), PGI) | ||||
| ifneq ($(C_COMPILER), SUN) | |||||
| CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME | CCOMMON_OPT += -UASMNAME -UASMFNAME -UNAME -UCNAME -UCHAR_NAME -UCHAR_CNAME | ||||
| endif | endif | ||||
| endif | |||||
| CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" | CCOMMON_OPT += -DASMNAME=$(FU)$(*F) -DASMFNAME=$(FU)$(*F)$(BU) -DNAME=$(*F)$(BU) -DCNAME=$(*F) -DCHAR_NAME=\"$(*F)$(BU)\" -DCHAR_CNAME=\"$(*F)\" | ||||
| ifeq ($(CORE), PPC440) | ifeq ($(CORE), PPC440) | ||||
| @@ -1297,11 +1339,9 @@ endif | |||||
| ifneq ($(ARCH), x86_64) | ifneq ($(ARCH), x86_64) | ||||
| ifneq ($(ARCH), x86) | ifneq ($(ARCH), x86) | ||||
| ifneq ($(CORE), LOONGSON3B) | |||||
| NO_AFFINITY = 1 | NO_AFFINITY = 1 | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| ifdef NO_AFFINITY | ifdef NO_AFFINITY | ||||
| ifeq ($(NO_AFFINITY), 0) | ifeq ($(NO_AFFINITY), 0) | ||||
| @@ -1515,6 +1555,8 @@ export HAVE_SSE4_2 | |||||
| export HAVE_SSE4A | export HAVE_SSE4A | ||||
| export HAVE_SSE5 | export HAVE_SSE5 | ||||
| export HAVE_AVX | export HAVE_AVX | ||||
| export HAVE_AVX2 | |||||
| export HAVE_FMA3 | |||||
| export HAVE_VFP | export HAVE_VFP | ||||
| export HAVE_VFPV3 | export HAVE_VFPV3 | ||||
| export HAVE_VFPV4 | export HAVE_VFPV4 | ||||
| @@ -1525,6 +1567,7 @@ export KERNELDIR | |||||
| export FUNCTION_PROFILE | export FUNCTION_PROFILE | ||||
| export TARGET_CORE | export TARGET_CORE | ||||
| export NO_AVX512 | export NO_AVX512 | ||||
| export NO_AVX2 | |||||
| export BUILD_BFLOAT16 | export BUILD_BFLOAT16 | ||||
| export SBGEMM_UNROLL_M | export SBGEMM_UNROLL_M | ||||
| @@ -1,5 +1,10 @@ | |||||
| # COMPILER_PREFIX = mingw32- | # COMPILER_PREFIX = mingw32- | ||||
| ifdef HAVE_SSE | |||||
| CCOMMON_OPT += -msse | |||||
| FCOMMON_OPT += -msse | |||||
| endif | |||||
| ifeq ($(OSNAME), Interix) | ifeq ($(OSNAME), Interix) | ||||
| ARFLAGS = -m x86 | ARFLAGS = -m x86 | ||||
| @@ -54,9 +59,11 @@ LIBATLAS = -L$(ATLASPATH)/32 -lcblas -lf77blas -latlas -lm | |||||
| else | else | ||||
| LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm | LIBATLAS = -L$(ATLASPATH)/32 -lptf77blas -lptatlas -lpthread -lm | ||||
| endif | endif | ||||
| ifdef HAVE_SSE2 | |||||
| CCOMMON_OPT += -msse2 | |||||
| FCOMMON_OPT += -msse2 | |||||
| endif | |||||
| ifdef HAVE_SSE3 | ifdef HAVE_SSE3 | ||||
| ifndef DYNAMIC_ARCH | |||||
| CCOMMON_OPT += -msse3 | CCOMMON_OPT += -msse3 | ||||
| FCOMMON_OPT += -msse3 | FCOMMON_OPT += -msse3 | ||||
| ifdef HAVE_SSSE3 | ifdef HAVE_SSSE3 | ||||
| @@ -68,5 +75,4 @@ CCOMMON_OPT += -msse4.1 | |||||
| FCOMMON_OPT += -msse4.1 | FCOMMON_OPT += -msse4.1 | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| @@ -9,9 +9,9 @@ endif | |||||
| endif | endif | ||||
| ifdef HAVE_SSE3 | ifdef HAVE_SSE3 | ||||
| ifndef DYNAMIC_ARCH | |||||
| CCOMMON_OPT += -msse3 | CCOMMON_OPT += -msse3 | ||||
| FCOMMON_OPT += -msse3 | FCOMMON_OPT += -msse3 | ||||
| endif | |||||
| ifdef HAVE_SSSE3 | ifdef HAVE_SSSE3 | ||||
| CCOMMON_OPT += -mssse3 | CCOMMON_OPT += -mssse3 | ||||
| FCOMMON_OPT += -mssse3 | FCOMMON_OPT += -mssse3 | ||||
| @@ -20,6 +20,22 @@ ifdef HAVE_SSE4_1 | |||||
| CCOMMON_OPT += -msse4.1 | CCOMMON_OPT += -msse4.1 | ||||
| FCOMMON_OPT += -msse4.1 | FCOMMON_OPT += -msse4.1 | ||||
| endif | endif | ||||
| ifndef OLDGCC | |||||
| ifdef HAVE_AVX | |||||
| CCOMMON_OPT += -mavx | |||||
| FCOMMON_OPT += -mavx | |||||
| endif | |||||
| endif | |||||
| ifndef NO_AVX2 | |||||
| ifdef HAVE_AVX2 | |||||
| CCOMMON_OPT += -mavx2 | |||||
| FCOMMON_OPT += -mavx2 | |||||
| endif | |||||
| endif | |||||
| ifndef OLDGCC | |||||
| ifdef HAVE_FMA3 | |||||
| CCOMMON_OPT += -mfma | |||||
| FCOMMON_OPT += -mfma | |||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -47,8 +63,6 @@ ifndef DYNAMIC_ARCH | |||||
| ifndef NO_AVX512 | ifndef NO_AVX512 | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| # cooperlake support was added in 10.1 | # cooperlake support was added in 10.1 | ||||
| GCCVERSIONGTEQ10 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 10) | |||||
| GCCMINORVERSIONGTEQ1 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 1) | |||||
| ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) | ifeq ($(GCCVERSIONGTEQ10)$(GCCMINORVERSIONGTEQ1), 11) | ||||
| CCOMMON_OPT += -march=cooperlake | CCOMMON_OPT += -march=cooperlake | ||||
| FCOMMON_OPT += -march=cooperlake | FCOMMON_OPT += -march=cooperlake | ||||
| @@ -68,15 +82,11 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(CORE), $(filter $(CORE), HASWELL ZEN SKYLAKEX COOPERLAKE)) | |||||
| ifndef DYNAMIC_ARCH | |||||
| ifdef HAVE_AVX2 | |||||
| ifndef NO_AVX2 | ifndef NO_AVX2 | ||||
| ifeq ($(C_COMPILER), GCC) | ifeq ($(C_COMPILER), GCC) | ||||
| # AVX2 support was added in 4.7.0 | # AVX2 support was added in 4.7.0 | ||||
| GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4) | |||||
| GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5) | |||||
| GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7) | |||||
| GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) | |||||
| GCCVERSIONCHECK := $(GCCVERSIONGT4)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7) | |||||
| ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) | ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111)) | ||||
| CCOMMON_OPT += -mavx2 | CCOMMON_OPT += -mavx2 | ||||
| endif | endif | ||||
| @@ -101,7 +111,6 @@ endif | |||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| @@ -172,6 +172,13 @@ Please read `GotoBLAS_01Readme.txt` for older CPU models already supported by th | |||||
| - **Z13**: Optimized Level-3 BLAS and Level-1,2 | - **Z13**: Optimized Level-3 BLAS and Level-1,2 | ||||
| - **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 | - **Z14**: Optimized Level-3 BLAS and (single precision) Level-1,2 | ||||
| #### RISC-V | |||||
| - **C910V**: Optimized Leve-3 BLAS (real) and Level-1,2 by RISC-V Vector extension 0.7.1. | |||||
| ```sh | |||||
| make HOSTCC=gcc TARGET=C910V CC=riscv64-unknown-linux-gnu-gcc FC=riscv64-unknown-linux-gnu-gfortran | |||||
| ``` | |||||
| ### Support for multiple targets in a single library | ### Support for multiple targets in a single library | ||||
| OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. | OpenBLAS can be built for multiple targets with runtime detection of the target cpu by specifiying `DYNAMIC_ARCH=1` in Makefile.rule, on the gmake command line or as `-DDYNAMIC_ARCH=TRUE` in cmake. | ||||
| @@ -104,3 +104,8 @@ VORTEX | |||||
| ZARCH_GENERIC | ZARCH_GENERIC | ||||
| Z13 | Z13 | ||||
| Z14 | Z14 | ||||
| 10.RISC-V 64: | |||||
| RISCV64_GENERIC | |||||
| C910V | |||||
| @@ -25,125 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef AMAX | #undef AMAX | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define AMAX BLASFUNC(dzamax) | |||||
| #define AMAX BLASFUNC(dzamax) | |||||
| #else | #else | ||||
| #define AMAX BLASFUNC(scamax) | |||||
| #define AMAX BLASFUNC(scamax) | |||||
| #endif | #endif | ||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define AMAX BLASFUNC(damax) | |||||
| #define AMAX BLASFUNC(damax) | |||||
| #else | #else | ||||
| #define AMAX BLASFUNC(samax) | |||||
| #define AMAX BLASFUNC(samax) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | |||||
| int main(int argc, char *argv[]) | |||||
| { | |||||
| FLOAT *x; | FLOAT *x; | ||||
| blasint m, i; | blasint m, i; | ||||
| blasint inc_x=1; | |||||
| blasint inc_x = 1; | |||||
| int loops = 1; | int loops = 1; | ||||
| int l; | int l; | ||||
| char *p; | char *p; | ||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1,timeg; | |||||
| double time1, timeg; | |||||
| argc--;argv++; | |||||
| argc--; | |||||
| argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) | |||||
| { | |||||
| from = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| to = MAX(atol(*argv), from); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| step = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) | |||||
| loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) | |||||
| inc_x = atoi(p); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||||
| if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||||
| { | |||||
| fprintf(stderr, "Out of Memory!!\n"); | |||||
| exit(1); | |||||
| } | } | ||||
| #ifdef __linux | #ifdef __linux | ||||
| @@ -152,37 +100,31 @@ int main(int argc, char *argv[]){ | |||||
| fprintf(stderr, " SIZE Flops\n"); | fprintf(stderr, " SIZE Flops\n"); | ||||
| for(m = from; m <= to; m += step) | |||||
| for (m = from; m <= to; m += step) | |||||
| { | { | ||||
| timeg=0; | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| timeg = 0; | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for (l = 0; l < loops; l++) | |||||
| { | |||||
| for (l=0; l<loops; l++) | |||||
| { | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| AMAX (&m, x, &inc_x); | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||||
| { | |||||
| x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||||
| } | |||||
| begin(); | |||||
| AMAX(&m, x, &inc_x); | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops %10.6f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
| " %10.2f MFlops %10.6f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -25,124 +25,73 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef AMIN | #undef AMIN | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define AMIN BLASFUNC(dzamin) | |||||
| #define AMIN BLASFUNC(dzamin) | |||||
| #else | #else | ||||
| #define AMIN BLASFUNC(scamin) | |||||
| #define AMIN BLASFUNC(scamin) | |||||
| #endif | #endif | ||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define AMIN BLASFUNC(damin) | |||||
| #define AMIN BLASFUNC(damin) | |||||
| #else | #else | ||||
| #define AMIN BLASFUNC(samin) | |||||
| #endif | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #define AMIN BLASFUNC(samin) | |||||
| #endif | #endif | ||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | #endif | ||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | |||||
| int main(int argc, char *argv[]) | |||||
| { | |||||
| FLOAT *x; | FLOAT *x; | ||||
| blasint m, i; | blasint m, i; | ||||
| blasint inc_x=1; | |||||
| blasint inc_x = 1; | |||||
| int loops = 1; | int loops = 1; | ||||
| int l; | int l; | ||||
| char *p; | char *p; | ||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| struct timeval start, stop; | |||||
| double time1,timeg; | |||||
| double time1, timeg; | |||||
| argc--;argv++; | |||||
| argc--; | |||||
| argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) | |||||
| { | |||||
| from = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| to = MAX(atol(*argv), from); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| step = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) | |||||
| loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) | |||||
| inc_x = atoi(p); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||||
| if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||||
| { | |||||
| fprintf(stderr, "Out of Memory!!\n"); | |||||
| exit(1); | |||||
| } | } | ||||
| #ifdef __linux | #ifdef __linux | ||||
| @@ -151,39 +100,35 @@ int main(int argc, char *argv[]){ | |||||
| fprintf(stderr, " SIZE Flops\n"); | fprintf(stderr, " SIZE Flops\n"); | ||||
| for(m = from; m <= to; m += step) | |||||
| for (m = from; m <= to; m += step) | |||||
| { | { | ||||
| timeg=0; | |||||
| timeg = 0; | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for (l = 0; l < loops; l++) | |||||
| { | |||||
| for (l=0; l<loops; l++) | |||||
| { | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| AMIN (&m, x, &inc_x); | |||||
| for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||||
| { | |||||
| x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||||
| } | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| begin(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| AMIN(&m, x, &inc_x); | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops %10.6f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
| " %10.2f MFlops %10.6f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -25,178 +25,108 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef ASUM | #undef ASUM | ||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define ASUM BLASFUNC(dzasum) | |||||
| #define ASUM BLASFUNC(dzasum) | |||||
| #else | #else | ||||
| #define ASUM BLASFUNC(scasum) | |||||
| #define ASUM BLASFUNC(scasum) | |||||
| #endif | #endif | ||||
| #else | #else | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define ASUM BLASFUNC(dasum) | |||||
| #define ASUM BLASFUNC(dasum) | |||||
| #else | #else | ||||
| #define ASUM BLASFUNC(sasum) | |||||
| #define ASUM BLASFUNC(sasum) | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | |||||
| int main(int argc, char *argv[]) | |||||
| { | |||||
| FLOAT *x; | FLOAT *x; | ||||
| FLOAT result; | FLOAT result; | ||||
| blasint m, i; | blasint m, i; | ||||
| blasint inc_x=1; | |||||
| blasint inc_x = 1; | |||||
| int loops = 1; | int loops = 1; | ||||
| int l; | int l; | ||||
| char *p; | char *p; | ||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| struct timeval start, stop; | |||||
| double time1,timeg; | |||||
| #else | |||||
| struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||||
| int from = 1; | |||||
| int to = 200; | |||||
| int step = 1; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| #endif | |||||
| argc--;argv++; | |||||
| argc--; | |||||
| argv++; | |||||
| if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
| if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
| if (argc > 0) | |||||
| { | |||||
| from = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| to = MAX(atol(*argv), from); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if (argc > 0) | |||||
| { | |||||
| step = atol(*argv); | |||||
| argc--; | |||||
| argv++; | |||||
| } | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) inc_x = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_LOOPS"))) | |||||
| loops = atoi(p); | |||||
| if ((p = getenv("OPENBLAS_INCX"))) | |||||
| inc_x = atoi(p); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step,inc_x,loops); | |||||
| fprintf(stderr, "From : %3d To : %3d Step = %3d Inc_x = %d Loops = %d\n", from, to, step, inc_x, loops); | |||||
| if (( x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL){ | |||||
| fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
| if ((x = (FLOAT *)malloc(sizeof(FLOAT) * to * abs(inc_x) * COMPSIZE)) == NULL) | |||||
| { | |||||
| fprintf(stderr, "Out of Memory!!\n"); | |||||
| exit(1); | |||||
| } | } | ||||
| #ifdef __linux | #ifdef __linux | ||||
| srandom(getpid()); | srandom(getpid()); | ||||
| #endif | #endif | ||||
| fprintf(stderr, " SIZE Flops\n"); | fprintf(stderr, " SIZE Flops\n"); | ||||
| for(m = from; m <= to; m += step) | |||||
| for (m = from; m <= to; m += step) | |||||
| { | { | ||||
| timeg=0; | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| timeg = 0; | |||||
| for (l=0; l<loops; l++) | |||||
| { | |||||
| fprintf(stderr, " %6d : ", (int)m); | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| #else | |||||
| clock_gettime(CLOCK_REALTIME, &start); | |||||
| #endif | |||||
| result = ASUM (&m, x, &inc_x); | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| clock_gettime(CLOCK_REALTIME, &stop); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| #else | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; | |||||
| #endif | |||||
| timeg += time1; | |||||
| for (l = 0; l < loops; l++) | |||||
| { | |||||
| for (i = 0; i < m * COMPSIZE * abs(inc_x); i++) | |||||
| { | |||||
| x[i] = ((FLOAT)rand() / (FLOAT)RAND_MAX) - 0.5; | |||||
| } | |||||
| begin(); | |||||
| result = ASUM(&m, x, &inc_x); | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| if (loops >1) | |||||
| timeg /= loops; | |||||
| if (loops > 1) | |||||
| timeg /= loops; | |||||
| #ifdef COMPLEX | #ifdef COMPLEX | ||||
| fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); | fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 4. * (double)m / timeg * 1.e-6, timeg); | ||||
| #else | #else | ||||
| fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); | fprintf(stderr, " %10.2f MFlops %10.6f sec\n", 2. * (double)m / timeg * 1.e-6, timeg); | ||||
| #endif | #endif | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef AXPBY | #undef AXPBY | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -129,7 +58,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -176,16 +104,10 @@ int main(int argc, char *argv[]){ | |||||
| for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
| { | { | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y ); | AXPBY (&m, alpha, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef AXPY | #undef AXPY | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -127,8 +56,6 @@ int main(int argc, char *argv[]){ | |||||
| int from = 1; | int from = 1; | ||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timespec start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -175,13 +102,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| clock_gettime( CLOCK_REALTIME, &start); | |||||
| begin(); | |||||
| AXPY (&m, alpha, x, &inc_x, y, &inc_y ); | AXPY (&m, alpha, x, &inc_x, y, &inc_y ); | ||||
| clock_gettime( CLOCK_REALTIME, &stop); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -0,0 +1,104 @@ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #include <time.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| struct timeval start, stop; | |||||
| #else | |||||
| struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||||
| #endif | |||||
| double getsec() | |||||
| { | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| #else | |||||
| return (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) * 1.e-9; | |||||
| #endif | |||||
| } | |||||
| void begin() { | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| #else | |||||
| clock_gettime(CLOCK_REALTIME, &start); | |||||
| #endif | |||||
| } | |||||
| void end() { | |||||
| #if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS) | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| #else | |||||
| clock_gettime(CLOCK_REALTIME, &stop); | |||||
| #endif | |||||
| } | |||||
| @@ -36,12 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| double fabs(double); | double fabs(double); | ||||
| @@ -71,41 +66,6 @@ double fabs(double); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| static __inline double getmflops(int ratio, int m, double secs){ | static __inline double getmflops(int ratio, int m, double secs){ | ||||
| double mm = (double)m; | double mm = (double)m; | ||||
| @@ -145,7 +105,6 @@ int main(int argc, char *argv[]){ | |||||
| FLOAT maxerr; | FLOAT maxerr; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -220,20 +179,19 @@ int main(int argc, char *argv[]){ | |||||
| SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| POTRF(uplo[uplos], &m, b, &m, &info); | POTRF(uplo[uplos], &m, b, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info != 0) { | if (info != 0) { | ||||
| fprintf(stderr, "Info = %d\n", info); | fprintf(stderr, "Info = %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| maxerr = 0.; | |||||
| if (!(uplos & 1)) { | if (!(uplos & 1)) { | ||||
| for (j = 0; j < m; j++) { | for (j = 0; j < m; j++) { | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef COPY | #undef COPY | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -128,11 +57,9 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1 = 0.0, timeg = 0.0; | double time1 = 0.0, timeg = 0.0; | ||||
| long nanos = 0; | long nanos = 0; | ||||
| time_t seconds = 0; | time_t seconds = 0; | ||||
| struct timespec time_start = { 0, 0 }, time_end = { 0, 0 }; | |||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -176,15 +103,10 @@ int main(int argc, char *argv[]){ | |||||
| for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
| { | { | ||||
| clock_gettime(CLOCK_REALTIME, &time_start); | |||||
| begin(); | |||||
| COPY (&m, x, &inc_x, y, &inc_y ); | COPY (&m, x, &inc_x, y, &inc_y ); | ||||
| clock_gettime(CLOCK_REALTIME, &time_end); | |||||
| nanos = time_end.tv_nsec - time_start.tv_nsec; | |||||
| seconds = time_end.tv_sec - time_start.tv_sec; | |||||
| time1 = seconds + nanos / 1.e9; | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef DOT | #undef DOT | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define DOT BLASFUNC(ddot) | #define DOT BLASFUNC(ddot) | ||||
| #else | #else | ||||
| #define DOT BLASFUNC(sdot) | #define DOT BLASFUNC(sdot) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -122,7 +49,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -169,15 +95,12 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| result = DOT (&m, x, &inc_x, y, &inc_y ); | result = DOT (&m, x, &inc_x, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| @@ -36,13 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GEEV | #undef GEEV | ||||
| @@ -74,71 +68,6 @@ extern void GEEV( char* jobvl, char* jobvr, blasint* n, FLOAT* a, | |||||
| FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info ); | FLOAT* vr, blasint* ldvr, FLOAT* work, blasint* lwork, FLOAT *rwork, blasint* info ); | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; | FLOAT *a,*vl,*vr,*wi,*wr,*work,*rwork; | ||||
| @@ -154,7 +83,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -223,7 +151,7 @@ int main(int argc, char *argv[]){ | |||||
| for(m = from; m <= to; m += step){ | for(m = from; m <= to; m += step){ | ||||
| fprintf(stderr, " %6d : ", (int)m); | fprintf(stderr, " %6d : ", (int)m); | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| lwork = -1; | lwork = -1; | ||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| @@ -239,14 +167,14 @@ int main(int argc, char *argv[]){ | |||||
| GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); | GEEV (&job, &jobr, &m, a, &m, wr, vl, &m, vr, &m, work, &lwork,rwork, &info); | ||||
| #endif | #endif | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info) { | if (info) { | ||||
| fprintf(stderr, "failed to compute eigenvalues .. %d\n", info); | fprintf(stderr, "failed to compute eigenvalues .. %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops : %10.2f Sec : %d\n", | " %10.2f MFlops : %10.2f Sec : %d\n", | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GEMM | #undef GEMM | ||||
| @@ -55,71 +49,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| IFLOAT *a, *b; | IFLOAT *a, *b; | ||||
| @@ -139,7 +68,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -228,14 +156,14 @@ int main(int argc, char *argv[]){ | |||||
| ldc = m; | ldc = m; | ||||
| fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); | fprintf(stderr, " M=%4d, N=%4d, K=%4d : ", (int)m, (int)n, (int)k); | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| for (j=0; j<loops; j++) { | for (j=0; j<loops; j++) { | ||||
| GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); | GEMM (&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); | ||||
| } | } | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| time1 = getsec(); | |||||
| timeg = time1/loops; | timeg = time1/loops; | ||||
| fprintf(stderr, | fprintf(stderr, | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GEMM | #undef GEMM | ||||
| @@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
| @@ -133,7 +62,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -187,16 +115,12 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | GEMM (&trans, &trans, &m, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GEMV | #undef GEMV | ||||
| @@ -52,72 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -137,7 +66,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -211,10 +139,10 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -248,10 +176,10 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | GEMV (&trans, &m, &n, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GER | #undef GER | ||||
| @@ -49,72 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -131,7 +59,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -198,16 +125,13 @@ int main(int argc, char *argv[]){ | |||||
| for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
| { | { | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m); | GER (&m, &n, alpha, x, &inc_x, y, &inc_y, a , &m); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| end(); | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| @@ -36,12 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| double fabs(double); | double fabs(double); | ||||
| @@ -66,71 +61,6 @@ double fabs(double); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b; | FLOAT *a, *b; | ||||
| @@ -142,7 +72,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -194,22 +123,18 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GESV (&m, &m, a, &m, ipiv, b, &m, &info); | GESV (&m, &m, a, &m, ipiv, b, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| "%10.2f MFlops %10.6f s\n", | "%10.2f MFlops %10.6f s\n", | ||||
| COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); | COMPSIZE * COMPSIZE * (2. / 3. * (double)m * (double)m * (double)m + 2. * (double)m * (double)m * (double)m ) / (time1) * 1.e-6 , time1); | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -36,12 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GETRF | #undef GETRF | ||||
| #undef GETRI | #undef GETRI | ||||
| @@ -72,71 +67,6 @@ | |||||
| extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); | extern void GETRI(blasint *m, FLOAT *a, blasint *lda, blasint *ipiv, FLOAT *work, blasint *lwork, blasint *info); | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a,*work; | FLOAT *a,*work; | ||||
| @@ -148,7 +78,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -205,21 +134,21 @@ int main(int argc, char *argv[]){ | |||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| lwork = -1; | lwork = -1; | ||||
| GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | GETRI(&m, a, &m, ipiv, wkopt, &lwork, &info); | ||||
| lwork = (blasint)wkopt[0]; | lwork = (blasint)wkopt[0]; | ||||
| GETRI(&m, a, &m, ipiv, work, &lwork, &info); | GETRI(&m, a, &m, ipiv, work, &lwork, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info) { | if (info) { | ||||
| fprintf(stderr, "failed compute inverse matrix .. %d\n", info); | fprintf(stderr, "failed compute inverse matrix .. %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops : %10.2f Sec : %d\n", | " %10.2f MFlops : %10.2f Sec : %d\n", | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HBMV | #undef HBMV | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HBMV BLASFUNC(zhbmv) | #define HBMV BLASFUNC(zhbmv) | ||||
| #else | #else | ||||
| #define HBMV BLASFUNC(chbmv) | #define HBMV BLASFUNC(chbmv) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz) { | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -125,7 +52,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -186,15 +112,13 @@ int main(int argc, char *argv[]){ | |||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | HBMV (&uplo, &m, &k, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| timeg += time1; | |||||
| timeg += getsec(); | |||||
| } | } | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HEMM | #undef HEMM | ||||
| @@ -41,72 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define HEMM BLASFUNC(chemm) | #define HEMM BLASFUNC(chemm) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
| @@ -126,7 +54,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -170,13 +97,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | HEMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HEMV | #undef HEMV | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HEMV BLASFUNC(zhemv) | #define HEMV BLASFUNC(zhemv) | ||||
| #else | #else | ||||
| #define HEMV BLASFUNC(chemv) | #define HEMV BLASFUNC(chemv) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -124,7 +51,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -182,13 +108,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | HEMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HER | #undef HER | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HER BLASFUNC(zher) | #define HER BLASFUNC(zher) | ||||
| #else | #else | ||||
| #define HER BLASFUNC(cher) | #define HER BLASFUNC(cher) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x; | FLOAT *a, *x; | ||||
| @@ -126,8 +53,6 @@ int main(int argc, char *argv[]){ | |||||
| int from = 1; | int from = 1; | ||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -166,15 +91,13 @@ int main(int argc, char *argv[]){ | |||||
| x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HER (&uplo, &m, alpha, x, &incx, a, &m ); | HER (&uplo, &m, alpha, x, &incx, a, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HER2 | #undef HER2 | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HER2 BLASFUNC(zher2) | #define HER2 BLASFUNC(zher2) | ||||
| #else | #else | ||||
| #define HER2 BLASFUNC(cher2) | #define HER2 BLASFUNC(cher2) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -127,7 +54,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -169,16 +95,13 @@ int main(int argc, char *argv[]){ | |||||
| y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[ (long)j * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m ); | HER2 (&uplo, &m, alpha, x, &inc, y, &inc, a, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| end(); | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HER2K | #undef HER2K | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| @@ -40,72 +34,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define HER2K BLASFUNC(cher2k) | #define HER2K BLASFUNC(cher2k) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
| @@ -125,7 +53,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -169,13 +96,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | HER2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HERK | #undef HERK | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HERK BLASFUNC(zherk) | #define HERK BLASFUNC(zherk) | ||||
| #else | #else | ||||
| #define HERK BLASFUNC(cherk) | #define HERK BLASFUNC(cherk) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *c; | FLOAT *a, *c; | ||||
| @@ -127,7 +54,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -167,18 +93,17 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | HERK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | ||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -25,89 +25,16 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef HPMV | #undef HPMV | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define HPMV BLASFUNC(zhpmv) | #define HPMV BLASFUNC(zhpmv) | ||||
| #else | #else | ||||
| #define HPMV BLASFUNC(chpmv) | #define HPMV BLASFUNC(chpmv) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz) { | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -124,7 +51,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -183,13 +109,13 @@ int main(int argc, char *argv[]){ | |||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); | HPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef IAMAX | #undef IAMAX | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -127,7 +56,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -166,13 +94,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| IAMAX (&m, x, &inc_x); | IAMAX (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef IAMIN | #undef IAMIN | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -127,7 +56,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -166,13 +94,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| IAMIN (&m, x, &inc_x); | IAMIN (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef IMAX | #undef IMAX | ||||
| @@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -121,7 +50,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -160,13 +88,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| IMAX (&m, x, &inc_x); | IMAX (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef IMIN | #undef IMIN | ||||
| @@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -121,7 +50,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -160,13 +88,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| IMIN (&m, x, &inc_x); | IMIN (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -36,12 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| double fabs(double); | double fabs(double); | ||||
| @@ -72,71 +67,6 @@ double fabs(double); | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b; | FLOAT *a, *b; | ||||
| @@ -151,7 +81,6 @@ int main(int argc, char *argv[]){ | |||||
| FLOAT maxerr; | FLOAT maxerr; | ||||
| struct timeval start, stop; | |||||
| double time1, time2; | double time1, time2; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -198,31 +127,31 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GETRF (&m, &m, a, &m, ipiv, &info); | GETRF (&m, &m, a, &m, ipiv, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info) { | if (info) { | ||||
| fprintf(stderr, "Matrix is not singular .. %d\n", info); | fprintf(stderr, "Matrix is not singular .. %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); | GETRS("N", &m, &unit, a, &m, ipiv, b, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info) { | if (info) { | ||||
| fprintf(stderr, "Matrix is not singular .. %d\n", info); | fprintf(stderr, "Matrix is not singular .. %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time2 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time2 = getsec(); | |||||
| maxerr = 0.; | maxerr = 0.; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef NAMAX | #undef NAMAX | ||||
| @@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -121,7 +50,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -160,13 +88,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| NAMAX (&m, x, &inc_x); | NAMAX (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef NAMIN | #undef NAMIN | ||||
| @@ -43,71 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -121,7 +50,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -160,13 +88,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| NAMIN (&m, x, &inc_x); | NAMIN (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef NRM2 | #undef NRM2 | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x; | FLOAT *x; | ||||
| @@ -127,7 +56,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -166,13 +94,13 @@ int main(int argc, char *argv[]){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| NRM2 (&m, x, &inc_x); | NRM2 (&m, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -36,12 +36,7 @@ | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| double fabs(double); | double fabs(double); | ||||
| @@ -86,37 +81,7 @@ double fabs(double); | |||||
| // extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info); | // extern void POTRI(char *uplo, blasint *m, FLOAT *a, blasint *lda, blasint *info); | ||||
| // extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info); | // extern void POTRS(char *uplo, blasint *m, blasint *n, FLOAT *a, blasint *lda, FLOAT *b, blasint *ldb, blasint *info); | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| @@ -141,7 +106,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -217,18 +181,18 @@ int main(int argc, char *argv[]){ | |||||
| SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | SYRK(uplo[uplos], trans[uplos], &m, &m, alpha, a, &m, beta, b, &m); | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| POTRF(uplo[uplos], &m, b, &m, &info); | POTRF(uplo[uplos], &m, b, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info != 0) { | if (info != 0) { | ||||
| fprintf(stderr, "Potrf info = %d\n", info); | fprintf(stderr, "Potrf info = %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; | flops = COMPSIZE * COMPSIZE * (1.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 1.0/6.0* (double)m) / time1 * 1.e-6; | ||||
| if ( btest == 'S' ) | if ( btest == 'S' ) | ||||
| @@ -240,17 +204,17 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); | POTRS(uplo[uplos], &m, &m, b, &m, a, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info != 0) { | if (info != 0) { | ||||
| fprintf(stderr, "Potrs info = %d\n", info); | fprintf(stderr, "Potrs info = %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; | flops = COMPSIZE * COMPSIZE * (2.0 * (double)m * (double)m *(double)m ) / time1 * 1.e-6; | ||||
| } | } | ||||
| @@ -258,18 +222,18 @@ int main(int argc, char *argv[]){ | |||||
| if ( btest == 'I' ) | if ( btest == 'I' ) | ||||
| { | { | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| POTRI(uplo[uplos], &m, b, &m, &info); | POTRI(uplo[uplos], &m, b, &m, &info); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| if (info != 0) { | if (info != 0) { | ||||
| fprintf(stderr, "Potri info = %d\n", info); | fprintf(stderr, "Potri info = %d\n", info); | ||||
| exit(1); | exit(1); | ||||
| } | } | ||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; | flops = COMPSIZE * COMPSIZE * (2.0/3.0 * (double)m * (double)m *(double)m +1.0/2.0* (double)m *(double)m + 5.0/6.0* (double)m) / time1 * 1.e-6; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef ROT | #undef ROT | ||||
| @@ -52,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -133,7 +63,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -179,13 +108,13 @@ int main(int argc, char *argv[]){ | |||||
| for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
| { | { | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| ROT (&m, x, &inc_x, y, &inc_y, c, s); | ROT (&m, x, &inc_x, y, &inc_y, c, s); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,12 +25,7 @@ LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF | |||||
| THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef ROTM | #undef ROTM | ||||
| @@ -40,72 +35,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ROTM BLASFUNC(srotm) | #define ROTM BLASFUNC(srotm) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz) | |||||
| { | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) | |||||
| { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid = | |||||
| shmget(IPC_PRIVATE, (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT | 0600)) < 0) { | |||||
| printf("Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1) { | |||||
| printf("Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]) | int main(int argc, char *argv[]) | ||||
| { | { | ||||
| @@ -122,7 +51,7 @@ int main(int argc, char *argv[]) | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| argc--; | argc--; | ||||
| @@ -188,14 +117,13 @@ int main(int argc, char *argv[]) | |||||
| } | } | ||||
| for (l = 0; l < loops; l++) { | for (l = 0; l < loops; l++) { | ||||
| gettimeofday(&start, (struct timezone *)0); | |||||
| begin(); | |||||
| ROTM(&m, x, &inc_x, y, &inc_y, param); | ROTM(&m, x, &inc_x, y, &inc_y, param); | ||||
| gettimeofday(&stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + | |||||
| (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SCAL | #undef SCAL | ||||
| @@ -49,71 +43,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -128,7 +57,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -174,13 +102,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SCAL (&m, alpha, x, &inc_x); | SCAL (&m, alpha, x, &inc_x); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,17 +25,10 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SPMV | #undef SPMV | ||||
| #ifndef COMPLEX | #ifndef COMPLEX | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| @@ -54,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -135,7 +63,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -193,13 +120,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); | SPMV (&uplo, &m, alpha, a, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SPR | #undef SPR | ||||
| @@ -41,73 +35,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SPR BLASFUNC(sspr) | #define SPR BLASFUNC(sspr) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a,*c; | FLOAT *a,*c; | ||||
| @@ -129,7 +56,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -173,13 +99,13 @@ int main(int argc, char *argv[]){ | |||||
| c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SPR (&uplo, &m, alpha, c, &inc_x, a); | SPR (&uplo, &m, alpha, c, &inc_x, a); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SPR2 | #undef SPR2 | ||||
| @@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a,*b,*c; | FLOAT *a,*b,*c; | ||||
| @@ -129,7 +58,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -182,13 +110,13 @@ int main(int argc, char *argv[]){ | |||||
| c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | c[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a); | SPR2 (&uplo, &m, alpha, c, &inc_x, b, &inc_y, a); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SWAP | #undef SWAP | ||||
| @@ -49,71 +44,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -128,7 +58,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -175,13 +104,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SWAP (&m, x, &inc_x, y, &inc_y ); | SWAP (&m, x, &inc_x, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYMM | #undef SYMM | ||||
| @@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
| @@ -137,7 +66,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -181,13 +109,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | SYMM (&side, &uplo, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYMV | #undef SYMV | ||||
| @@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x, *y; | FLOAT *a, *x, *y; | ||||
| @@ -134,7 +63,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -192,13 +120,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | SYMV (&uplo, &m, alpha, a, &m, x, &inc_x, beta, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYR | #undef SYR | ||||
| @@ -42,72 +37,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x,*a; | FLOAT *x,*a; | ||||
| @@ -124,7 +53,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -165,13 +93,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYR (&uplo, &m, alpha, x, &inc_x, a, &m ); | SYR (&uplo, &m, alpha, x, &inc_x, a, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYR2 | #undef SYR2 | ||||
| @@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYR2 BLASFUNC(ssyr2) | #define SYR2 BLASFUNC(ssyr2) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y, *a; | FLOAT *x, *y, *a; | ||||
| @@ -125,7 +53,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -174,13 +101,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m ); | SYR2 (&uplo, &m, alpha, x, &inc_x, y, &inc_y, a, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYR2K | #undef SYR2K | ||||
| @@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b, *c; | FLOAT *a, *b, *c; | ||||
| @@ -137,7 +67,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -181,13 +110,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | SYR2K (&uplo, &trans, &m, &m, alpha, a, &m, b, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef SYRK | #undef SYRK | ||||
| @@ -53,71 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *c; | FLOAT *a, *c; | ||||
| @@ -137,7 +66,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -177,13 +105,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | SYRK (&uplo, &trans, &m, &m, alpha, a, &m, beta, c, &m ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops\n", | " %10.2f MFlops\n", | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef TPMV | #undef TPMV | ||||
| @@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) | |||||
| { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1) { | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]) | int main(int argc, char *argv[]) | ||||
| { | { | ||||
| @@ -112,7 +73,6 @@ int main(int argc, char *argv[]) | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -153,11 +113,11 @@ int main(int argc, char *argv[]) | |||||
| } | } | ||||
| for (l = 0; l < loops; l++) { | for (l = 0; l < loops; l++) { | ||||
| clock_gettime(CLOCK_REALTIME, &start); | |||||
| begin(); | |||||
| TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x); | TPMV (&uplo, &trans, &diag, &n, a, x, &inc_x); | ||||
| clock_gettime(CLOCK_REALTIME, &stop); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef TPSV | #undef TPSV | ||||
| @@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) | |||||
| { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1) { | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]) | int main(int argc, char *argv[]) | ||||
| { | { | ||||
| @@ -112,7 +73,6 @@ int main(int argc, char *argv[]) | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -153,11 +113,11 @@ int main(int argc, char *argv[]) | |||||
| } | } | ||||
| for (l = 0; l < loops; l++) { | for (l = 0; l < loops; l++) { | ||||
| clock_gettime(CLOCK_REALTIME, &start); | |||||
| begin(); | |||||
| TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x); | TPSV (&uplo, &trans, &diag, &n, a, x, &inc_x); | ||||
| clock_gettime(CLOCK_REALTIME, &stop); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef TRMM | #undef TRMM | ||||
| @@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b; | FLOAT *a, *b; | ||||
| @@ -141,7 +71,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -180,13 +109,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | TRMM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| fprintf(stderr, | fprintf(stderr, | ||||
| " %10.2f MFlops %10.6f sec\n", | " %10.2f MFlops %10.6f sec\n", | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef TRMV | #undef TRMV | ||||
| @@ -52,40 +47,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size) | |||||
| { | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1) { | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]) | int main(int argc, char *argv[]) | ||||
| { | { | ||||
| @@ -112,7 +73,6 @@ int main(int argc, char *argv[]) | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timespec start = { 0, 0 }, stop = { 0, 0 }; | |||||
| double time1, timeg; | double time1, timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -153,11 +113,11 @@ int main(int argc, char *argv[]) | |||||
| } | } | ||||
| for (l = 0; l < loops; l++) { | for (l = 0; l < loops; l++) { | ||||
| clock_gettime(CLOCK_REALTIME, &start); | |||||
| begin(); | |||||
| TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x); | TRMV (&uplo, &trans, &diag, &n, a, &n, x, &inc_x); | ||||
| clock_gettime(CLOCK_REALTIME, &stop); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_nsec - start.tv_nsec)) / 1.e9; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,12 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef TRSM | #undef TRSM | ||||
| @@ -53,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *b; | FLOAT *a, *b; | ||||
| @@ -151,7 +81,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1; | double time1; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -196,13 +125,13 @@ int main(int argc, char *argv[]){ | |||||
| } | } | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| @@ -25,14 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include <time.h> | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef GEMV | #undef GEMV | ||||
| #undef TRSV | #undef TRSV | ||||
| @@ -55,71 +48,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *a, *x; | FLOAT *a, *x; | ||||
| @@ -133,7 +61,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timespec time_start, time_end; | |||||
| time_t seconds = 0; | time_t seconds = 0; | ||||
| double time1,timeg; | double time1,timeg; | ||||
| @@ -189,19 +116,13 @@ int main(int argc, char *argv[]){ | |||||
| for(l =0;l< loops;l++){ | for(l =0;l< loops;l++){ | ||||
| clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_start); | |||||
| begin(); | |||||
| TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x); | TRSV(&uplo,&transa,&diag,&n,a,&n,x,&inc_x); | ||||
| clock_gettime(CLOCK_PROCESS_CPUTIME_ID,&time_end); | |||||
| nanos = time_end.tv_nsec - time_start.tv_nsec; | |||||
| seconds = time_end.tv_sec - time_start.tv_sec; | |||||
| time1 = seconds + nanos /1.e9; | |||||
| end(); | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| } | } | ||||
| timeg /= loops; | timeg /= loops; | ||||
| long long muls = n*(n+1)/2.0; | long long muls = n*(n+1)/2.0; | ||||
| long long adds = (n - 1.0)*n/2.0; | long long adds = (n - 1.0)*n/2.0; | ||||
| @@ -25,90 +25,18 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #define RETURN_BY_STACK 1 | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #define RETURN_BY_STACK 1 | |||||
| #undef DOT | #undef DOT | ||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define DOT BLASFUNC(zdotu) | #define DOT BLASFUNC(zdotu) | ||||
| #else | #else | ||||
| #define DOT BLASFUNC(cdotu) | #define DOT BLASFUNC(cdotu) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -123,7 +51,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -170,13 +97,13 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| DOT (&result, &m, x, &inc_x, y, &inc_y ); | DOT (&result, &m, x, &inc_x, y, &inc_y ); | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -25,13 +25,7 @@ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||||
| *****************************************************************************/ | *****************************************************************************/ | ||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #ifdef __CYGWIN32__ | |||||
| #include <sys/time.h> | |||||
| #endif | |||||
| #include "common.h" | |||||
| #include "bench.h" | |||||
| #undef DOT | #undef DOT | ||||
| @@ -42,72 +36,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define DOT BLASFUNC(cdotu) | #define DOT BLASFUNC(cdotu) | ||||
| #endif | #endif | ||||
| #if defined(__WIN32__) || defined(__WIN64__) | |||||
| #ifndef DELTA_EPOCH_IN_MICROSECS | |||||
| #define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
| #endif | |||||
| int gettimeofday(struct timeval *tv, void *tz){ | |||||
| FILETIME ft; | |||||
| unsigned __int64 tmpres = 0; | |||||
| static int tzflag; | |||||
| if (NULL != tv) | |||||
| { | |||||
| GetSystemTimeAsFileTime(&ft); | |||||
| tmpres |= ft.dwHighDateTime; | |||||
| tmpres <<= 32; | |||||
| tmpres |= ft.dwLowDateTime; | |||||
| /*converting file time to unix epoch*/ | |||||
| tmpres /= 10; /*convert into microseconds*/ | |||||
| tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
| tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
| tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| #endif | |||||
| #if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
| static void *huge_malloc(BLASLONG size){ | |||||
| int shmid; | |||||
| void *address; | |||||
| #ifndef SHM_HUGETLB | |||||
| #define SHM_HUGETLB 04000 | |||||
| #endif | |||||
| if ((shmid =shmget(IPC_PRIVATE, | |||||
| (size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
| SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
| printf( "Memory allocation failed(shmget).\n"); | |||||
| exit(1); | |||||
| } | |||||
| address = shmat(shmid, NULL, SHM_RND); | |||||
| if ((BLASLONG)address == -1){ | |||||
| printf( "Memory allocation failed(shmat).\n"); | |||||
| exit(1); | |||||
| } | |||||
| shmctl(shmid, IPC_RMID, 0); | |||||
| return address; | |||||
| } | |||||
| #define malloc huge_malloc | |||||
| #endif | |||||
| int main(int argc, char *argv[]){ | int main(int argc, char *argv[]){ | ||||
| FLOAT *x, *y; | FLOAT *x, *y; | ||||
| @@ -122,7 +50,6 @@ int main(int argc, char *argv[]){ | |||||
| int to = 200; | int to = 200; | ||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | |||||
| double time1,timeg; | double time1,timeg; | ||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -169,15 +96,15 @@ int main(int argc, char *argv[]){ | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | ||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | ||||
| } | } | ||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| begin(); | |||||
| #ifdef RETURN_BY_STACK | #ifdef RETURN_BY_STACK | ||||
| DOT (&result , &m, x, &inc_x, y, &inc_y ); | DOT (&result , &m, x, &inc_x, y, &inc_y ); | ||||
| #else | #else | ||||
| result = DOT (&m, x, &inc_x, y, &inc_y ); | result = DOT (&m, x, &inc_x, y, &inc_y ); | ||||
| #endif | #endif | ||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| end(); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| time1 = getsec(); | |||||
| timeg += time1; | timeg += time1; | ||||
| @@ -6,7 +6,8 @@ | |||||
| # Checking cross compile | # Checking cross compile | ||||
| $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | $hostos = `uname -s | sed -e s/\-.*//`; chop($hostos); | ||||
| $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | $hostarch = `uname -m | sed -e s/i.86/x86/`;chop($hostarch); | ||||
| $hostarch = `uname -p` if ($hostos eq "AIX"); | |||||
| $hostarch = `uname -p` if ($hostos eq "AIX" || $hostos eq "SunOS"); | |||||
| chop($hostarch); | |||||
| $hostarch = "x86_64" if ($hostarch eq "amd64"); | $hostarch = "x86_64" if ($hostarch eq "amd64"); | ||||
| $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); | $hostarch = "arm" if ($hostarch ne "arm64" && $hostarch =~ /^arm.*/); | ||||
| $hostarch = "arm64" if ($hostarch eq "aarch64"); | $hostarch = "arm64" if ($hostarch eq "aarch64"); | ||||
| @@ -92,6 +93,7 @@ $architecture = ia64 if ($data =~ /ARCH_IA64/); | |||||
| $architecture = arm if ($data =~ /ARCH_ARM/); | $architecture = arm if ($data =~ /ARCH_ARM/); | ||||
| $architecture = arm64 if ($data =~ /ARCH_ARM64/); | $architecture = arm64 if ($data =~ /ARCH_ARM64/); | ||||
| $architecture = zarch if ($data =~ /ARCH_ZARCH/); | $architecture = zarch if ($data =~ /ARCH_ZARCH/); | ||||
| $architecture = riscv64 if ($data =~ /ARCH_RISCV64/); | |||||
| $defined = 0; | $defined = 0; | ||||
| @@ -136,6 +138,11 @@ if (($architecture eq "x86") && ($os ne Darwin) && ($os ne SunOS)) { | |||||
| $binary =32; | $binary =32; | ||||
| } | } | ||||
| if ($architecture eq "riscv64") { | |||||
| $defined = 1; | |||||
| $binary = 64; | |||||
| } | |||||
| if ($compiler eq "PGI") { | if ($compiler eq "PGI") { | ||||
| $compiler_name .= " -tp p7" if ($binary eq "32"); | $compiler_name .= " -tp p7" if ($binary eq "32"); | ||||
| $compiler_name .= " -tp p7-64" if ($binary eq "64"); | $compiler_name .= " -tp p7-64" if ($binary eq "64"); | ||||
| @@ -192,7 +199,7 @@ if (($architecture eq "mips") || ($architecture eq "mips64")) { | |||||
| } else { | } else { | ||||
| $tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | $tmpf = new File::Temp( SUFFIX => '.c' , UNLINK => 1 ); | ||||
| $code = '"addvi.b $w0, $w1, 1"'; | $code = '"addvi.b $w0, $w1, 1"'; | ||||
| $msa_flags = "-mmsa -mfp64 -msched-weight -mload-store-pairs"; | |||||
| $msa_flags = "-mmsa -mfp64 -mload-store-pairs"; | |||||
| print $tmpf "#include <msa.h>\n\n"; | print $tmpf "#include <msa.h>\n\n"; | ||||
| print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | print $tmpf "void main(void){ __asm__ volatile($code); }\n"; | ||||
| @@ -270,6 +277,15 @@ if ($data =~ /HAVE_C11/) { | |||||
| } | } | ||||
| } | } | ||||
| if ($compiler eq "GCC" &&( ($architecture eq "x86") || ($architecture eq "x86_64"))) { | |||||
| $no_avx2 = 0; | |||||
| $oldgcc = 0; | |||||
| $data = `$compiler_name -dumpversion`; | |||||
| if ($data <= 4.6) { | |||||
| $no_avx2 = 1; | |||||
| $oldgcc = 1; | |||||
| } | |||||
| } | |||||
| $data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | $data = `$compiler_name $flags -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | ||||
| @@ -362,6 +378,8 @@ print MAKEFILE "CEXTRALIB=$linker_L $linker_l $linker_a\n"; | |||||
| print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | print MAKEFILE "HAVE_MSA=1\n" if $have_msa eq 1; | ||||
| print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | print MAKEFILE "MSA_FLAGS=$msa_flags\n" if $have_msa eq 1; | ||||
| print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | print MAKEFILE "NO_AVX512=1\n" if $no_avx512 eq 1; | ||||
| print MAKEFILE "NO_AVX2=1\n" if $no_avx2 eq 1; | |||||
| print MAKEFILE "OLDGCC=1\n" if $oldgcc eq 1; | |||||
| $os =~ tr/[a-z]/[A-Z]/; | $os =~ tr/[a-z]/[A-Z]/; | ||||
| $architecture =~ tr/[a-z]/[A-Z]/; | $architecture =~ tr/[a-z]/[A-Z]/; | ||||
| @@ -393,6 +393,7 @@ void cblas_sbf16tos(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPE | |||||
| void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); | void cblas_dbf16tod(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *in, OPENBLAS_CONST blasint incin, double *out, OPENBLAS_CONST blasint incout); | ||||
| /* dot production of BFLOAT16 input arrays, and output as float */ | /* dot production of BFLOAT16 input arrays, and output as float */ | ||||
| float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); | float cblas_sbdot(OPENBLAS_CONST blasint n, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST bfloat16 *y, OPENBLAS_CONST blasint incy); | ||||
| void cblas_sbgemv(OPENBLAS_CONST enum CBLAS_ORDER order, OPENBLAS_CONST enum CBLAS_TRANSPOSE trans, OPENBLAS_CONST blasint m, OPENBLAS_CONST blasint n, OPENBLAS_CONST float alpha, OPENBLAS_CONST bfloat16 *a, OPENBLAS_CONST blasint lda, OPENBLAS_CONST bfloat16 *x, OPENBLAS_CONST blasint incx, OPENBLAS_CONST float beta, float *y, OPENBLAS_CONST blasint incy); | |||||
| #ifdef __cplusplus | #ifdef __cplusplus | ||||
| } | } | ||||
| @@ -96,7 +96,7 @@ if (${CMAKE_C_COMPILER_ID} STREQUAL "SUN") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL "SKYLAKEX") | |||||
| if (${CORE} STREQUAL SKYLAKEX) | |||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| if (NOT NO_AVX512) | if (NOT NO_AVX512) | ||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") | set (CCOMMON_OPT "${CCOMMON_OPT} -march=skylake-avx512") | ||||
| @@ -104,7 +104,7 @@ if (${CORE} STREQUAL "SKYLAKEX") | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (${CORE} STREQUAL "COOPERLAKE") | |||||
| if (${CORE} STREQUAL COOPERLAKE) | |||||
| if (NOT DYNAMIC_ARCH) | if (NOT DYNAMIC_ARCH) | ||||
| if (NOT NO_AVX512) | if (NOT NO_AVX512) | ||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | ||||
| @@ -124,6 +124,9 @@ if (NOT DYNAMIC_ARCH) | |||||
| if (HAVE_AVX) | if (HAVE_AVX) | ||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") | set (CCOMMON_OPT "${CCOMMON_OPT} -mavx") | ||||
| endif () | endif () | ||||
| if (HAVE_FMA3) | |||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -mfma") | |||||
| endif () | |||||
| if (HAVE_SSE) | if (HAVE_SSE) | ||||
| set (CCOMMON_OPT "${CCOMMON_OPT} -msse") | set (CCOMMON_OPT "${CCOMMON_OPT} -msse") | ||||
| endif () | endif () | ||||
| @@ -184,8 +184,8 @@ macro(SetDefaultL2) | |||||
| set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | set(XHEMV_V_KERNEL ../generic/zhemv_k.c) | ||||
| set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | set(XHEMV_M_KERNEL ../generic/zhemv_k.c) | ||||
| if (BUILD_BFLOAT16) | if (BUILD_BFLOAT16) | ||||
| set(SBGEMVNKERNEL ../arm/gemv_n.c) | |||||
| set(SBGEMVTKERNEL ../arm/gemv_t.c) | |||||
| set(SBGEMVNKERNEL ../x86_64/sbgemv_n.c) | |||||
| set(SBGEMVTKERNEL ../x86_64/sbgemv_t.c) | |||||
| set(SHGERKERNEL ../generic/ger.c) | set(SHGERKERNEL ../generic/ger.c) | ||||
| endif () | endif () | ||||
| endmacro () | endmacro () | ||||
| @@ -84,6 +84,14 @@ if (X86) | |||||
| set(NO_EXPRECISION 1) | set(NO_EXPRECISION 1) | ||||
| endif () | endif () | ||||
| if (DYNAMIC_ARCH) | |||||
| if (TARGET) | |||||
| if (${TARGET} STREQUAL "GENERIC") | |||||
| set(NO_EXPRECISION 1) | |||||
| endif () | |||||
| endif () | |||||
| endif () | |||||
| if (UTEST_CHECK) | if (UTEST_CHECK) | ||||
| set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") | set(CCOMMON_OPT "${CCOMMON_OPT} -DUTEST_CHECK") | ||||
| set(SANITY_CHECK 1) | set(SANITY_CHECK 1) | ||||
| @@ -556,6 +556,21 @@ else(NOT CMAKE_CROSSCOMPILING) | |||||
| MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") | MESSAGE(FATAL_ERROR "Compiling getarch failed ${GETARCH_LOG}") | ||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| unset (HAVE_AVX2) | |||||
| unset (HAVE_AVX) | |||||
| unset (HAVE_FMA3) | |||||
| unset (HAVE_MMX) | |||||
| unset (HAVE_SSE) | |||||
| unset (HAVE_SSE2) | |||||
| unset (HAVE_SSE3) | |||||
| unset (HAVE_SSSE3) | |||||
| unset (HAVE_SSE4A) | |||||
| unset (HAVE_SSE4_1) | |||||
| unset (HAVE_SSE4_2) | |||||
| unset (HAVE_NEON) | |||||
| unset (HAVE_VFP) | |||||
| unset (HAVE_VFPV3) | |||||
| unset (HAVE_VFPV4) | |||||
| message(STATUS "Running getarch") | message(STATUS "Running getarch") | ||||
| # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | # use the cmake binary w/ the -E param to run a shell command in a cross-platform way | ||||
| @@ -44,50 +44,9 @@ if (DEFINED BINARY AND DEFINED TARGET AND BINARY EQUAL 32) | |||||
| endif () | endif () | ||||
| endif () | endif () | ||||
| if (DEFINED TARGET) | |||||
| if (${TARGET} STREQUAL "COOPERLAKE" AND NOT NO_AVX512) | |||||
| # if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||||
| else() | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| # elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| # set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| # endif() | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "SKYLAKEX" AND NOT NO_AVX512) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL "HASWELL" AND NOT NO_AVX2) | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE4_1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED TARGET) | if (DEFINED TARGET) | ||||
| message(STATUS "-- -- -- -- -- -- -- -- -- -- -- -- --") | |||||
| message(STATUS "Targeting the ${TARGET} architecture.") | message(STATUS "Targeting the ${TARGET} architecture.") | ||||
| set(GETARCH_FLAGS "-DFORCE_${TARGET}") | set(GETARCH_FLAGS "-DFORCE_${TARGET}") | ||||
| endif () | endif () | ||||
| @@ -187,6 +146,63 @@ else() | |||||
| endif () | endif () | ||||
| include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | include("${PROJECT_SOURCE_DIR}/cmake/prebuild.cmake") | ||||
| if (DEFINED TARGET) | |||||
| if (${TARGET} STREQUAL COOPERLAKE AND NOT NO_AVX512) | |||||
| # if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 10.1 OR ${GCC_VERSION} VERSION_EQUAL 10.1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=cooperlake") | |||||
| else() | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| # elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| # set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| # endif() | |||||
| endif() | |||||
| if (${TARGET} STREQUAL SKYLAKEX AND NOT NO_AVX512) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -march=skylake-avx512") | |||||
| endif() | |||||
| if (${TARGET} STREQUAL HASWELL AND NOT NO_AVX2) | |||||
| if (${CMAKE_C_COMPILER_ID} STREQUAL "GNU") | |||||
| execute_process(COMMAND ${CMAKE_C_COMPILER} -dumpversion OUTPUT_VARIABLE GCC_VERSION) | |||||
| if (${GCC_VERSION} VERSION_GREATER 4.7 OR ${GCC_VERSION} VERSION_EQUAL 4.7) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| elseif (${CMAKE_C_COMPILER_ID} STREQUAL "CLANG") | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_AVX) | |||||
| if (NOT NO_AVX) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_AVX2) | |||||
| if (NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mavx2") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_FMA3) | |||||
| if (NOT NO_AVX2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mfma") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE2) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse2") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSSE3) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -mssse3") | |||||
| endif() | |||||
| if (DEFINED HAVE_SSE4_1) | |||||
| set (KERNEL_DEFINITIONS "${KERNEL_DEFINITIONS} -msse4.1") | |||||
| endif() | |||||
| endif() | |||||
| if (DEFINED BINARY) | if (DEFINED BINARY) | ||||
| message(STATUS "Compiling a ${BINARY}-bit binary.") | message(STATUS "Compiling a ${BINARY}-bit binary.") | ||||
| endif () | endif () | ||||
| @@ -437,6 +437,11 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||||
| #include "common_mips.h" | #include "common_mips.h" | ||||
| #endif | #endif | ||||
| #ifdef ARCH_RISCV64 | |||||
| #include "common_riscv64.h" | |||||
| #endif | |||||
| #ifdef ARCH_MIPS64 | #ifdef ARCH_MIPS64 | ||||
| #include "common_mips64.h" | #include "common_mips64.h" | ||||
| #endif | #endif | ||||
| @@ -142,14 +142,8 @@ REALNAME: | |||||
| #define HUGE_PAGESIZE ( 4 << 20) | #define HUGE_PAGESIZE ( 4 << 20) | ||||
| #ifndef BUFFERSIZE | #ifndef BUFFERSIZE | ||||
| #if defined(CORTEXA57) | |||||
| #define BUFFER_SIZE (20 << 20) | |||||
| #elif defined(TSV110) || defined(EMAG8180) | |||||
| #define BUFFER_SIZE (32 << 20) | #define BUFFER_SIZE (32 << 20) | ||||
| #else | #else | ||||
| #define BUFFER_SIZE (16 << 20) | |||||
| #endif | |||||
| #else | |||||
| #define BUFFER_SIZE (32 << BUFFERSIZE) | #define BUFFER_SIZE (32 << BUFFERSIZE) | ||||
| #endif | #endif | ||||
| @@ -250,6 +250,8 @@ void BLASFUNC(xgeru)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | |||||
| void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | void BLASFUNC(xgerc)(blasint *, blasint *, xdouble *, xdouble *, blasint *, | ||||
| xdouble *, blasint *, xdouble *, blasint *); | xdouble *, blasint *, xdouble *, blasint *); | ||||
| void BLASFUNC(sbgemv)(char *, blasint *, blasint *, float *, bfloat16 *, blasint *, | |||||
| bfloat16 *, blasint *, float *, float *, blasint *); | |||||
| void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, | void BLASFUNC(sgemv)(char *, blasint *, blasint *, float *, float *, blasint *, | ||||
| float *, blasint *, float *, float *, blasint *); | float *, blasint *, float *, float *, blasint *); | ||||
| void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *, | void BLASFUNC(dgemv)(char *, blasint *, blasint *, double *, double *, blasint *, | ||||
| @@ -44,6 +44,10 @@ | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| int sbgemv_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||||
| int sbgemv_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||||
| int sbgemv_thread_n(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int); | |||||
| int sbgemv_thread_t(BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG, int); | |||||
| int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | int sger_k (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | ||||
| int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | int dger_k (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | ||||
| int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | int qger_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | ||||
| @@ -75,18 +75,10 @@ static inline int my_mbind(void *addr, unsigned long len, int mode, | |||||
| // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | // https://lsbbugs.linuxfoundation.org/show_bug.cgi?id=3482 | ||||
| return 0; | return 0; | ||||
| #else | #else | ||||
| #if defined (LOONGSON3B) | |||||
| #if defined (__64BIT__) | |||||
| return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); | |||||
| #else | |||||
| return 0; //NULL Implementation on Loongson 3B 32bit. | |||||
| #endif | |||||
| #else | |||||
| //Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34 | //Fixed randomly SEGFAULT when nodemask==NULL with above Linux 2.6.34 | ||||
| // unsigned long null_nodemask=0; | // unsigned long null_nodemask=0; | ||||
| return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); | return syscall(SYS_mbind, addr, len, mode, nodemask, maxnode, flags); | ||||
| #endif | #endif | ||||
| #endif | |||||
| } | } | ||||
| static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { | static inline int my_set_mempolicy(int mode, const unsigned long *addr, unsigned long flag) { | ||||
| @@ -646,10 +646,12 @@ | |||||
| #elif defined(BFLOAT16) | #elif defined(BFLOAT16) | ||||
| #define D_TO_BF16_K SBDTOBF16_K | |||||
| #define D_BF16_TO_K DBF16TOD_K | |||||
| #define S_TO_BF16_K SBSTOBF16_K | |||||
| #define S_BF16_TO_K SBF16TOS_K | |||||
| #define D_TO_BF16_K SBDTOBF16_K | |||||
| #define D_BF16_TO_K DBF16TOD_K | |||||
| #define S_TO_BF16_K SBSTOBF16_K | |||||
| #define S_BF16_TO_K SBF16TOS_K | |||||
| #define SBGEMV_N SBGEMV_N_K | |||||
| #define SBGEMV_T SBGEMV_T_K | |||||
| #define AMAX_K SAMAX_K | #define AMAX_K SAMAX_K | ||||
| #define AMIN_K SAMIN_K | #define AMIN_K SAMIN_K | ||||
| @@ -229,12 +229,7 @@ REALNAME: ;\ | |||||
| #define BUFFER_SIZE ( 32 << 21) | #define BUFFER_SIZE ( 32 << 21) | ||||
| #if defined(LOONGSON3A) | |||||
| #define PAGESIZE (16UL << 10) | |||||
| #define FIXED_PAGESIZE (16UL << 10) | |||||
| #endif | |||||
| #if defined(LOONGSON3B) | |||||
| #if defined(LOONGSON3R3) || defined(LOONGSON3R4) | |||||
| #define PAGESIZE (16UL << 10) | #define PAGESIZE (16UL << 10) | ||||
| #define FIXED_PAGESIZE (16UL << 10) | #define FIXED_PAGESIZE (16UL << 10) | ||||
| #endif | #endif | ||||
| @@ -250,7 +245,7 @@ REALNAME: ;\ | |||||
| #define MAP_ANONYMOUS MAP_ANON | #define MAP_ANONYMOUS MAP_ANON | ||||
| #endif | #endif | ||||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | |||||
| #if defined(LOONGSON3R3) || defined(LOONGSON3R4) | |||||
| #define PREFETCHD_(x) ld $0, x | #define PREFETCHD_(x) ld $0, x | ||||
| #define PREFETCHD(x) PREFETCHD_(x) | #define PREFETCHD(x) PREFETCHD_(x) | ||||
| #else | #else | ||||
| @@ -78,8 +78,8 @@ BLASLONG (*isbmin_k) (BLASLONG, float *, BLASLONG); | |||||
| int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | int (*sbscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | int (*sbswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| int (*sbgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
| int (*sbgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
| int (*sbgemv_n) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||||
| int (*sbgemv_t) (BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float, float *, BLASLONG); | |||||
| int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | int (*sbger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | ||||
| int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | int (*sbsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | ||||
| @@ -849,6 +849,10 @@ Lmcount$lazy_ptr: | |||||
| #else | #else | ||||
| #define BUFFER_SIZE ( 16 << 20) | #define BUFFER_SIZE ( 16 << 20) | ||||
| #endif | #endif | ||||
| #ifdef DYNAMIC_ARCH | |||||
| #undef BUFFER_SIZE | |||||
| #define BUFFER_SIZE (64 << 22) | |||||
| #endif | |||||
| #ifndef PAGESIZE | #ifndef PAGESIZE | ||||
| #define PAGESIZE ( 4 << 10) | #define PAGESIZE ( 4 << 10) | ||||
| @@ -0,0 +1,98 @@ | |||||
| /***************************************************************************** | |||||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written | |||||
| permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **********************************************************************************/ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #ifndef COMMON_RISCV64 | |||||
| #define COMMON_RISCV64 | |||||
| #define MB __sync_synchronize() | |||||
| #define WMB __sync_synchronize() | |||||
| #define RMB __sync_synchronize() | |||||
| #define INLINE inline | |||||
| #ifndef ASSEMBLER | |||||
| static inline int blas_quickdivide(blasint x, blasint y){ | |||||
| return x / y; | |||||
| } | |||||
| #endif | |||||
| #define BUFFER_SIZE ( 32 << 20) | |||||
| #define SEEK_ADDRESS | |||||
| #if defined(C910V) | |||||
| #include <riscv-vector.h> | |||||
| #endif | |||||
| #endif | |||||
| @@ -8,6 +8,8 @@ | |||||
| #define SBDTOBF16_K sbdtobf16_k | #define SBDTOBF16_K sbdtobf16_k | ||||
| #define SBF16TOS_K sbf16tos_k | #define SBF16TOS_K sbf16tos_k | ||||
| #define DBF16TOD_K dbf16tod_k | #define DBF16TOD_K dbf16tod_k | ||||
| #define SBGEMV_N_K sbgemv_n | |||||
| #define SBGEMV_T_K sbgemv_t | |||||
| #define SBGEMM_ONCOPY sbgemm_oncopy | #define SBGEMM_ONCOPY sbgemm_oncopy | ||||
| #define SBGEMM_OTCOPY sbgemm_otcopy | #define SBGEMM_OTCOPY sbgemm_otcopy | ||||
| @@ -29,6 +31,8 @@ | |||||
| #define SBDTOBF16_K gotoblas -> sbdtobf16_k | #define SBDTOBF16_K gotoblas -> sbdtobf16_k | ||||
| #define SBF16TOS_K gotoblas -> sbf16tos_k | #define SBF16TOS_K gotoblas -> sbf16tos_k | ||||
| #define DBF16TOD_K gotoblas -> dbf16tod_k | #define DBF16TOD_K gotoblas -> dbf16tod_k | ||||
| #define SBGEMV_N_K gotoblas -> sbgemv_n | |||||
| #define SBGEMV_T_K gotoblas -> sbgemv_t | |||||
| #define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy | #define SBGEMM_ONCOPY gotoblas -> sbgemm_oncopy | ||||
| #define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy | #define SBGEMM_OTCOPY gotoblas -> sbgemm_otcopy | ||||
| @@ -78,6 +78,12 @@ static __inline unsigned long rpcc(void){ | |||||
| #define __BIG_ENDIAN__ | #define __BIG_ENDIAN__ | ||||
| #endif | #endif | ||||
| #ifdef C_SUN | |||||
| #ifndef __64BIT | |||||
| #define RETURN_BY_STACK | |||||
| #endif | |||||
| #endif | |||||
| #ifdef DOUBLE | #ifdef DOUBLE | ||||
| #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") | #define GET_IMAGE(res) __asm__ __volatile__("fmovd %%f2, %0" : "=f"(res) : : "memory") | ||||
| #else | #else | ||||
| @@ -70,19 +70,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| /* or implied, of The University of Texas at Austin. */ | /* or implied, of The University of Texas at Austin. */ | ||||
| /*********************************************************************/ | /*********************************************************************/ | ||||
| #define CPU_UNKNOWN 0 | |||||
| #define CPU_SICORTEX 1 | |||||
| #define CPU_LOONGSON3A 2 | |||||
| #define CPU_LOONGSON3B 3 | |||||
| #define CPU_I6400 4 | |||||
| #define CPU_P6600 5 | |||||
| #define CPU_I6500 6 | |||||
| #define CPU_UNKNOWN 0 | |||||
| #define CPU_SICORTEX 1 | |||||
| #define CPU_LOONGSON3R3 2 | |||||
| #define CPU_LOONGSON3R4 3 | |||||
| #define CPU_I6400 4 | |||||
| #define CPU_P6600 5 | |||||
| #define CPU_I6500 6 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| "SICORTEX", | "SICORTEX", | ||||
| "LOONGSON3A", | |||||
| "LOONGSON3B", | |||||
| "LOONGSON3R3", | |||||
| "LOONGSON3R4", | |||||
| "I6400", | "I6400", | ||||
| "P6600", | "P6600", | ||||
| "I6500" | "I6500" | ||||
| @@ -90,48 +90,13 @@ static char *cpuname[] = { | |||||
| int detect(void){ | int detect(void){ | ||||
| #ifdef __linux | |||||
| #ifdef linux | |||||
| FILE *infile; | FILE *infile; | ||||
| char buffer[512], *p; | char buffer[512], *p; | ||||
| p = (char *)NULL; | p = (char *)NULL; | ||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)){ | |||||
| if (!strncmp("cpu", buffer, 3)){ | |||||
| p = strchr(buffer, ':') + 2; | |||||
| #if 0 | |||||
| fprintf(stderr, "%s\n", p); | |||||
| #endif | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if(p != NULL){ | |||||
| if (strstr(p, "Loongson-3A")){ | |||||
| return CPU_LOONGSON3A; | |||||
| }else if(strstr(p, "Loongson-3B")){ | |||||
| return CPU_LOONGSON3B; | |||||
| }else if (strstr(p, "Loongson-3")){ | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| p = (char *)NULL; | |||||
| while (fgets(buffer, sizeof(buffer), infile)){ | |||||
| if (!strncmp("system type", buffer, 11)){ | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if (strstr(p, "loongson3a")) | |||||
| return CPU_LOONGSON3A; | |||||
| }else{ | |||||
| return CPU_SICORTEX; | |||||
| } | |||||
| } | |||||
| //Check model name for Loongson3 | //Check model name for Loongson3 | ||||
| infile = fopen("/proc/cpuinfo", "r"); | infile = fopen("/proc/cpuinfo", "r"); | ||||
| p = (char *)NULL; | |||||
| while (fgets(buffer, sizeof(buffer), infile)){ | while (fgets(buffer, sizeof(buffer), infile)){ | ||||
| if (!strncmp("model name", buffer, 10)){ | if (!strncmp("model name", buffer, 10)){ | ||||
| p = strchr(buffer, ':') + 2; | p = strchr(buffer, ':') + 2; | ||||
| @@ -140,14 +105,16 @@ int detect(void){ | |||||
| } | } | ||||
| fclose(infile); | fclose(infile); | ||||
| if(p != NULL){ | if(p != NULL){ | ||||
| if (strstr(p, "Loongson-3A")){ | |||||
| return CPU_LOONGSON3A; | |||||
| }else if(strstr(p, "Loongson-3B")){ | |||||
| return CPU_LOONGSON3B; | |||||
| } | |||||
| if (strstr(p, "Loongson-3A3000") || strstr(p, "Loongson-3B3000")){ | |||||
| return CPU_LOONGSON3R3; | |||||
| }else if(strstr(p, "Loongson-3A4000") || strstr(p, "Loongson-3B4000")){ | |||||
| return CPU_LOONGSON3R4; | |||||
| } else{ | |||||
| return CPU_SICORTEX; | |||||
| } | } | ||||
| #endif | #endif | ||||
| return CPU_UNKNOWN; | return CPU_UNKNOWN; | ||||
| } | |||||
| } | } | ||||
| char *get_corename(void){ | char *get_corename(void){ | ||||
| @@ -159,10 +126,10 @@ void get_architecture(void){ | |||||
| } | } | ||||
| void get_subarchitecture(void){ | void get_subarchitecture(void){ | ||||
| if(detect()==CPU_LOONGSON3A) { | |||||
| printf("LOONGSON3A"); | |||||
| }else if(detect()==CPU_LOONGSON3B){ | |||||
| printf("LOONGSON3B"); | |||||
| if(detect()==CPU_LOONGSON3R3) { | |||||
| printf("LOONGSON3R3"); | |||||
| }else if(detect()==CPU_LOONGSON3R4){ | |||||
| printf("LOONGSON3R4"); | |||||
| }else if(detect()==CPU_I6400){ | }else if(detect()==CPU_I6400){ | ||||
| printf("I6400"); | printf("I6400"); | ||||
| }else if(detect()==CPU_P6600){ | }else if(detect()==CPU_P6600){ | ||||
| @@ -179,8 +146,8 @@ void get_subdirname(void){ | |||||
| } | } | ||||
| void get_cpuconfig(void){ | void get_cpuconfig(void){ | ||||
| if(detect()==CPU_LOONGSON3A) { | |||||
| printf("#define LOONGSON3A\n"); | |||||
| if(detect()==CPU_LOONGSON3R3) { | |||||
| printf("#define LOONGSON3R3\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | printf("#define L1_DATA_SIZE 65536\n"); | ||||
| printf("#define L1_DATA_LINESIZE 32\n"); | printf("#define L1_DATA_LINESIZE 32\n"); | ||||
| printf("#define L2_SIZE 512488\n"); | printf("#define L2_SIZE 512488\n"); | ||||
| @@ -188,8 +155,8 @@ void get_cpuconfig(void){ | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | printf("#define DTB_DEFAULT_ENTRIES 64\n"); | ||||
| printf("#define DTB_SIZE 4096\n"); | printf("#define DTB_SIZE 4096\n"); | ||||
| printf("#define L2_ASSOCIATIVE 4\n"); | printf("#define L2_ASSOCIATIVE 4\n"); | ||||
| }else if(detect()==CPU_LOONGSON3B){ | |||||
| printf("#define LOONGSON3B\n"); | |||||
| }else if(detect()==CPU_LOONGSON3R4){ | |||||
| printf("#define LOONGSON3R4\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | printf("#define L1_DATA_SIZE 65536\n"); | ||||
| printf("#define L1_DATA_LINESIZE 32\n"); | printf("#define L1_DATA_LINESIZE 32\n"); | ||||
| printf("#define L2_SIZE 512488\n"); | printf("#define L2_SIZE 512488\n"); | ||||
| @@ -237,10 +204,10 @@ void get_cpuconfig(void){ | |||||
| } | } | ||||
| void get_libname(void){ | void get_libname(void){ | ||||
| if(detect()==CPU_LOONGSON3A) { | |||||
| printf("loongson3a\n"); | |||||
| }else if(detect()==CPU_LOONGSON3B) { | |||||
| printf("loongson3b\n"); | |||||
| if(detect()==CPU_LOONGSON3R3) { | |||||
| printf("loongson3r3\n"); | |||||
| }else if(detect()==CPU_LOONGSON3R4) { | |||||
| printf("loongson3r4\n"); | |||||
| }else if(detect()==CPU_I6400) { | }else if(detect()==CPU_I6400) { | ||||
| printf("i6400\n"); | printf("i6400\n"); | ||||
| }else if(detect()==CPU_P6600) { | }else if(detect()==CPU_P6600) { | ||||
| @@ -0,0 +1,113 @@ | |||||
| /***************************************************************************** | |||||
| Copyright (c) 2011-2014, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written | |||||
| permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| **********************************************************************************/ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #define CPU_UNKNOWN 0 | |||||
| #define CPU_C910V 1 | |||||
| static char *cpuname[] = { | |||||
| "UNKOWN", | |||||
| "C910V" | |||||
| }; | |||||
| int detect(void){ | |||||
| return CPU_UNKNOWN; | |||||
| } | |||||
| char *get_corename(void){ | |||||
| return cpuname[detect()]; | |||||
| } | |||||
| void get_architecture(void){ | |||||
| printf("RISCV64"); | |||||
| } | |||||
| void get_subarchitecture(void){ | |||||
| } | |||||
| void get_subdirname(void){ | |||||
| printf("riscv64"); | |||||
| } | |||||
| void get_cpuconfig(void){ | |||||
| printf("#define UNKNOWN\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||||
| printf("#define L2_SIZE 512488\n"); | |||||
| printf("#define L2_LINESIZE 32\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| } | |||||
| void get_libname(void){ | |||||
| printf("riscv64\n"); | |||||
| } | |||||
| @@ -202,7 +202,7 @@ int support_avx(){ | |||||
| if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ | if ((ecx & (1 << 28)) != 0 && (ecx & (1 << 27)) != 0 && (ecx & (1 << 26)) != 0){ | ||||
| xgetbv(0, &eax, &edx); | xgetbv(0, &eax, &edx); | ||||
| if((eax & 6) == 6){ | if((eax & 6) == 6){ | ||||
| ret=1; //OS support AVX | |||||
| ret=1; //OS supports saving xmm and ymm registers (6 = (1<<1) | (1<<2)) | |||||
| } | } | ||||
| } | } | ||||
| return ret; | return ret; | ||||
| @@ -219,8 +219,8 @@ int support_avx2(){ | |||||
| if (!support_avx()) | if (!support_avx()) | ||||
| return 0; | return 0; | ||||
| cpuid(7, &eax, &ebx, &ecx, &edx); | cpuid(7, &eax, &ebx, &ecx, &edx); | ||||
| if((ebx & (1<<7)) != 0) | |||||
| ret=1; //OS supports AVX2 | |||||
| if((ebx & (1<<5)) != 0) | |||||
| ret=1; //CPU supports AVX2 | |||||
| return ret; | return ret; | ||||
| #else | #else | ||||
| return 0; | return 0; | ||||
| @@ -235,14 +235,14 @@ int support_avx512(){ | |||||
| if (!support_avx()) | if (!support_avx()) | ||||
| return 0; | return 0; | ||||
| cpuid(7, &eax, &ebx, &ecx, &edx); | cpuid(7, &eax, &ebx, &ecx, &edx); | ||||
| if((ebx & 32) != 32){ | |||||
| ret=0; //OS does not even support AVX2 | |||||
| if((ebx & (1<<5)) == 0){ | |||||
| ret=0; //cpu does not have avx2 flag | |||||
| } | } | ||||
| if((ebx & (1<<31)) != 0){ | |||||
| if((ebx & (1<<31)) != 0){ //AVX512VL flag | |||||
| xgetbv(0, &eax, &edx); | xgetbv(0, &eax, &edx); | ||||
| if((eax & 0xe0) == 0xe0) | if((eax & 0xe0) == 0xe0) | ||||
| ret=1; //OS supports AVX512VL | |||||
| } | |||||
| ret=1; //OS supports saving zmm registers | |||||
| } | |||||
| return ret; | return ret; | ||||
| #else | #else | ||||
| return 0; | return 0; | ||||
| @@ -153,6 +153,11 @@ ARCH_ARM | |||||
| ARCH_ARM64 | ARCH_ARM64 | ||||
| #endif | #endif | ||||
| #if defined(__riscv) | |||||
| ARCH_RISCV64 | |||||
| #endif | |||||
| #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) | #if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) | ||||
| HAVE_C11 | HAVE_C11 | ||||
| #endif | #endif | ||||
| @@ -61,7 +61,7 @@ endif | |||||
| all1: $(all1targets) | all1: $(all1targets) | ||||
| ifndef CROSS | |||||
| ifneq ($(CROSS), 1) | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| ifeq ($(BUILD_SINGLE),1) | ifeq ($(BUILD_SINGLE),1) | ||||
| OMP_NUM_THREADS=2 ./xscblat1 | OMP_NUM_THREADS=2 ./xscblat1 | ||||
| @@ -106,7 +106,7 @@ endif | |||||
| all2: $(all2targets) | all2: $(all2targets) | ||||
| ifndef CROSS | |||||
| ifneq ($(CROSS), 1) | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| ifeq ($(BUILD_SINGLE),1) | ifeq ($(BUILD_SINGLE),1) | ||||
| OMP_NUM_THREADS=2 ./xscblat2 < sin2 | OMP_NUM_THREADS=2 ./xscblat2 < sin2 | ||||
| @@ -152,7 +152,7 @@ endif | |||||
| all3: $(all3targets) | all3: $(all3targets) | ||||
| ifndef CROSS | |||||
| ifneq ($(CROSS), 1) | |||||
| ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
| ifeq ($(BUILD_SINGLE),1) | ifeq ($(BUILD_SINGLE),1) | ||||
| OMP_NUM_THREADS=2 ./xscblat3 < sin3 | OMP_NUM_THREADS=2 ./xscblat3 < sin3 | ||||
| @@ -413,7 +413,13 @@ XBLASOBJS += \ | |||||
| xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \ | xtbmv_thread_RUU.$(SUFFIX) xtbmv_thread_RUN.$(SUFFIX) \ | ||||
| xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \ | xtbmv_thread_RLU.$(SUFFIX) xtbmv_thread_RLN.$(SUFFIX) \ | ||||
| xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \ | xtbmv_thread_CUU.$(SUFFIX) xtbmv_thread_CUN.$(SUFFIX) \ | ||||
| xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) \ | |||||
| xtbmv_thread_CLU.$(SUFFIX) xtbmv_thread_CLN.$(SUFFIX) | |||||
| ifeq ($(BUILD_BFLOAT16),1) | |||||
| SBBLASOBJS += \ | |||||
| sbgemv_thread_n$(TSUFFIX).$(SUFFIX) \ | |||||
| sbgemv_thread_t$(TSUFFIX).$(SUFFIX) | |||||
| endif | |||||
| endif | endif | ||||
| @@ -3693,4 +3699,12 @@ xtrsv_CUU.$(SUFFIX) xtrsv_CUU.$(PSUFFIX) : ztrsv_L.c ../../param.h | |||||
| xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h | xtrsv_CUN.$(SUFFIX) xtrsv_CUN.$(PSUFFIX) : ztrsv_L.c ../../param.h | ||||
| $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F) | $(CC) -c $(CFLAGS) -DXDOUBLE -DCOMPLEX -DTRANSA=4 -UUNIT $< -o $(@F) | ||||
| ifeq ($(BUILD_BFLOAT16),1) | |||||
| sbgemv_thread_n.$(SUFFIX) sbgemv_thread_n.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -UTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||||
| sbgemv_thread_t.$(SUFFIX) sbgemv_thread_t.$(PSUFFIX) : sbgemv_thread.c ../../common.h | |||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DTRANSA -UCONJ -UXCONJ $< -o $(@F) | |||||
| endif | |||||
| include ../../Makefile.tail | include ../../Makefile.tail | ||||
| @@ -0,0 +1,149 @@ | |||||
| /*********************************************************************/ | |||||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||||
| /* All rights reserved. */ | |||||
| /* */ | |||||
| /* Redistribution and use in source and binary forms, with or */ | |||||
| /* without modification, are permitted provided that the following */ | |||||
| /* conditions are met: */ | |||||
| /* */ | |||||
| /* 1. Redistributions of source code must retain the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer. */ | |||||
| /* */ | |||||
| /* 2. Redistributions in binary form must reproduce the above */ | |||||
| /* copyright notice, this list of conditions and the following */ | |||||
| /* disclaimer in the documentation and/or other materials */ | |||||
| /* provided with the distribution. */ | |||||
| /* */ | |||||
| /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||||
| /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||||
| /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||||
| /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||||
| /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||||
| /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||||
| /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||||
| /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||||
| /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||||
| /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||||
| /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||||
| /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||||
| /* POSSIBILITY OF SUCH DAMAGE. */ | |||||
| /* */ | |||||
| /* The views and conclusions contained in the software and */ | |||||
| /* documentation are those of the authors and should not be */ | |||||
| /* interpreted as representing official policies, either expressed */ | |||||
| /* or implied, of The University of Texas at Austin. */ | |||||
| /*********************************************************************/ | |||||
| #include <stdio.h> | |||||
| #include <stdlib.h> | |||||
| #include "common.h" | |||||
| #ifndef TRANSA | |||||
| #define SBGEMV SBGEMV_N | |||||
| #else | |||||
| #define SBGEMV SBGEMV_T | |||||
| #endif | |||||
| static int sbgemv_kernel(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *dummy1, FLOAT *dummy2, BLASLONG dummy3){ | |||||
| bfloat16 *a, *x; | |||||
| float *y; | |||||
| BLASLONG lda, incx, incy; | |||||
| BLASLONG m_from, m_to, n_from, n_to; | |||||
| a = (bfloat16 *)args->a; | |||||
| x = (bfloat16 *)args->b; | |||||
| y = (float *)args->c; | |||||
| lda = args->lda; | |||||
| incx = args->ldb; | |||||
| incy = args->ldc; | |||||
| #ifndef TRANSA // N | |||||
| m_from = *(range_m + 0); | |||||
| m_to = *(range_m + 1); | |||||
| n_from = 0; | |||||
| n_to = args -> n; | |||||
| a += m_from; | |||||
| y += m_from * incy; | |||||
| #else // T | |||||
| m_from = 0; | |||||
| m_to = args->m; | |||||
| n_from = *(range_n + 0); | |||||
| n_to = *(range_n + 1); | |||||
| a += n_from * lda; | |||||
| y += n_from * incy; | |||||
| #endif | |||||
| SBGEMV(m_to - m_from, n_to - n_from, *((FLOAT *)(args->alpha)), a, lda, x, incx, *((FLOAT *)(args->beta)), y, incy); | |||||
| return 0; | |||||
| } | |||||
| int CNAME(BLASLONG m, BLASLONG n, float alpha, bfloat16 *a, BLASLONG lda, bfloat16 *x, BLASLONG incx, float beta, float *y, BLASLONG incy, int threads) | |||||
| { | |||||
| blas_arg_t args; | |||||
| blas_queue_t queue[MAX_CPU_NUMBER]; | |||||
| BLASLONG range[MAX_CPU_NUMBER + 1]; | |||||
| #ifndef TRANSA | |||||
| BLASLONG width_for_split = m; | |||||
| #else | |||||
| BLASLONG width_for_split = n; | |||||
| #endif | |||||
| BLASLONG BLOCK_WIDTH = width_for_split/threads; | |||||
| int mode = BLAS_BFLOAT16 | BLAS_REAL; | |||||
| args.m = m; | |||||
| args.n = n; | |||||
| args.a = (void *)a; | |||||
| args.b = (void *)x; | |||||
| args.c = (void *)y; | |||||
| args.lda = lda; | |||||
| args.ldb = incx; | |||||
| args.ldc = incy; | |||||
| args.alpha = (void *)α | |||||
| args.beta = (void *)β | |||||
| range[0] = 0; | |||||
| int thread_idx; | |||||
| for (thread_idx=0; thread_idx<threads; thread_idx++) { | |||||
| if (thread_idx != threads-1) { | |||||
| range[thread_idx + 1] = range[thread_idx] + BLOCK_WIDTH; | |||||
| } else { | |||||
| range[thread_idx + 1] = range[thread_idx] + width_for_split; | |||||
| } | |||||
| queue[thread_idx].mode = mode; | |||||
| queue[thread_idx].routine = sbgemv_kernel; | |||||
| queue[thread_idx].args = &args; | |||||
| #ifndef TRANSA | |||||
| queue[thread_idx].range_m = &range[thread_idx]; | |||||
| queue[thread_idx].range_n = NULL; | |||||
| #else | |||||
| queue[thread_idx].range_m = NULL; | |||||
| queue[thread_idx].range_n = &range[thread_idx]; | |||||
| #endif | |||||
| queue[thread_idx].sa = NULL; | |||||
| queue[thread_idx].sb = NULL; | |||||
| queue[thread_idx].next = &queue[thread_idx + 1]; | |||||
| width_for_split -= BLOCK_WIDTH; | |||||
| } | |||||
| if (thread_idx) { | |||||
| queue[0].sa = NULL; | |||||
| queue[0].sb = NULL; | |||||
| queue[thread_idx - 1].next = NULL; | |||||
| exec_blas(thread_idx, queue); | |||||
| } | |||||
| return 0; | |||||
| } | |||||
| @@ -206,7 +206,7 @@ ifdef SMP | |||||
| COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(SUFFIX) gemm_thread_variable.$(SUFFIX) | COMMONOBJS += gemm_thread_m.$(SUFFIX) gemm_thread_n.$(SUFFIX) gemm_thread_mn.$(SUFFIX) gemm_thread_variable.$(SUFFIX) | ||||
| COMMONOBJS += syrk_thread.$(SUFFIX) | COMMONOBJS += syrk_thread.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| ifeq ($(BUILD_BFLOAT16),1) | ifeq ($(BUILD_BFLOAT16),1) | ||||
| SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | SBBLASOBJS += sbgemm_thread_nn.$(SUFFIX) sbgemm_thread_nt.$(SUFFIX) sbgemm_thread_tn.$(SUFFIX) sbgemm_thread_tt.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -282,7 +282,7 @@ HPLOBJS = \ | |||||
| dtrsm_RNUU.$(SUFFIX) dtrsm_RNUN.$(SUFFIX) dtrsm_RNLU.$(SUFFIX) dtrsm_RNLN.$(SUFFIX) \ | dtrsm_RNUU.$(SUFFIX) dtrsm_RNUN.$(SUFFIX) dtrsm_RNLU.$(SUFFIX) dtrsm_RNLN.$(SUFFIX) \ | ||||
| dtrsm_RTUU.$(SUFFIX) dtrsm_RTUN.$(SUFFIX) dtrsm_RTLU.$(SUFFIX) dtrsm_RTLN.$(SUFFIX) | dtrsm_RTUU.$(SUFFIX) dtrsm_RTUN.$(SUFFIX) dtrsm_RTLU.$(SUFFIX) dtrsm_RTLN.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ | HPLOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) \ | ||||
| dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -297,13 +297,13 @@ ifeq ($(BUILD_DOUBLE),1) | |||||
| strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ | strsm_RTUU.$(SUFFIX) strsm_RTUN.$(SUFFIX) strsm_RTLU.$(SUFFIX) strsm_RTLN.$(SUFFIX) \ | ||||
| ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ | ssyrk_UN.$(SUFFIX) ssyrk_UT.$(SUFFIX) ssyrk_LN.$(SUFFIX) ssyrk_LT.$(SUFFIX) \ | ||||
| ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) | ssyrk_kernel_U.$(SUFFIX) ssyrk_kernel_L.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) | SBLASOBJS += ssyrk_thread_UN.$(SUFFIX) ssyrk_thread_UT.$(SUFFIX) ssyrk_thread_LN.$(SUFFIX) ssyrk_thread_LT.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| ifeq ($(BUILD_COMPLEX),1) | ifeq ($(BUILD_COMPLEX),1) | ||||
| SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) | SBLASOBJS = sgemm_nn.$(SUFFIX) sgemm_nt.$(SUFFIX) sgemm_tn.$(SUFFIX) sgemm_tt.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | SBLASOBJS += sgemm_thread_nn.$(SUFFIX) sgemm_thread_nt.$(SUFFIX) sgemm_thread_tn.$(SUFFIX) sgemm_thread_tt.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -312,7 +312,7 @@ ifneq ($(BUILD_DOUBLE),1) | |||||
| DBLASOBJS= | DBLASOBJS= | ||||
| ifeq ($(BUILD_COMPLEX16),1) | ifeq ($(BUILD_COMPLEX16),1) | ||||
| DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) | DBLASOBJS = dgemm_nn.$(SUFFIX) dgemm_nt.$(SUFFIX) dgemm_tn.$(SUFFIX) dgemm_tt.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | DBLASOBJS += dgemm_thread_nn.$(SUFFIX) dgemm_thread_nt.$(SUFFIX) dgemm_thread_tn.$(SUFFIX) dgemm_thread_tt.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -332,7 +332,7 @@ ifeq ($(BUILD_COMPLEX16),1) | |||||
| ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ | ctrsm_RTUU.$(SUFFIX) ctrsm_RTUN.$(SUFFIX) ctrsm_RTLU.$(SUFFIX) ctrsm_RTLN.$(SUFFIX) \ | ||||
| ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ | ctrsm_RRUU.$(SUFFIX) ctrsm_RRUN.$(SUFFIX) ctrsm_RRLU.$(SUFFIX) ctrsm_RRLN.$(SUFFIX) \ | ||||
| ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) | ctrsm_RCUU.$(SUFFIX) ctrsm_RCUN.$(SUFFIX) ctrsm_RCLU.$(SUFFIX) ctrsm_RCLN.$(SUFFIX) | ||||
| ifndef USE_SIMPLE_THREADED_LEVEL3 | |||||
| ifneq ($(USE_SIMPLE_THREADED_LEVEL3), 1) | |||||
| CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) | CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread_LN.$(SUFFIX) cherk_thread_LC.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -339,8 +339,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| #else | #else | ||||
| if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
| else | else | ||||
| if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
| /* | |||||
| if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | |||||
| else | else | ||||
| */ | |||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | ||||
| #endif | #endif | ||||
| @@ -373,8 +373,10 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, | |||||
| #else | #else | ||||
| if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | if (min_jj >= 3*GEMM_UNROLL_N) min_jj = 3*GEMM_UNROLL_N; | ||||
| else | else | ||||
| /* | |||||
| if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | if (min_jj >= 2*GEMM_UNROLL_N) min_jj = 2*GEMM_UNROLL_N; | ||||
| else | else | ||||
| */ | |||||
| if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | if (min_jj > GEMM_UNROLL_N) min_jj = GEMM_UNROLL_N; | ||||
| #endif | #endif | ||||
| /* Copy part of local region of B into workspace */ | /* Copy part of local region of B into workspace */ | ||||
| @@ -7,7 +7,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) | |||||
| ifdef SMP | ifdef SMP | ||||
| COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) | COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) | ||||
| ifndef NO_AFFINITY | |||||
| ifneq ($(NO_AFFINITY), 1) | |||||
| COMMONOBJS += init.$(SUFFIX) | COMMONOBJS += init.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| @@ -24,19 +24,23 @@ else | |||||
| ifeq ($(ARCH),zarch) | ifeq ($(ARCH),zarch) | ||||
| COMMONOBJS += dynamic_zarch.$(SUFFIX) | COMMONOBJS += dynamic_zarch.$(SUFFIX) | ||||
| else | else | ||||
| ifeq ($(ARCH),mips64) | |||||
| COMMONOBJS += dynamic_mips64.$(SUFFIX) | |||||
| else | |||||
| COMMONOBJS += dynamic.$(SUFFIX) | COMMONOBJS += dynamic.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| else | else | ||||
| COMMONOBJS += parameter.$(SUFFIX) | COMMONOBJS += parameter.$(SUFFIX) | ||||
| endif | endif | ||||
| ifdef EXPRECISION | |||||
| ifeq ($(EXPRECISION), 1) | |||||
| COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) | COMMONOBJS += x_abs.$(SUFFIX) qlamch.$(SUFFIX) qlamc3.$(SUFFIX) | ||||
| endif | endif | ||||
| ifdef QUAD_PRECISION | |||||
| ifeq ($(QUAD_PRECISION), 1) | |||||
| COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) | COMMONOBJS += addx.$(SUFFIX) mulx.$(SUFFIX) | ||||
| endif | endif | ||||
| @@ -46,11 +50,9 @@ ifeq ($(C_COMPILER), PGI) | |||||
| endif | endif | ||||
| endif | endif | ||||
| ifdef USE_CUDA | |||||
| ifeq ($(USE_CUDA), 1) | ifeq ($(USE_CUDA), 1) | ||||
| COMMONOBJS += cuda_init.$(SUFFIX) | COMMONOBJS += cuda_init.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | |||||
| ifdef FUNCTION_PROFILE | ifdef FUNCTION_PROFILE | ||||
| COMMONOBJS += profile.$(SUFFIX) | COMMONOBJS += profile.$(SUFFIX) | ||||
| @@ -94,10 +96,14 @@ else | |||||
| ifeq ($(ARCH),zarch) | ifeq ($(ARCH),zarch) | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_zarch.$(SUFFIX) | ||||
| else | else | ||||
| ifeq ($(ARCH),mips64) | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic_mips64.$(SUFFIX) | |||||
| else | |||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) dynamic.$(SUFFIX) | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | endif | ||||
| endif | |||||
| else | else | ||||
| HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | HPLOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) parameter.$(SUFFIX) | ||||
| endif | endif | ||||