Browse Source

Merge pull request #108 from xianyi/develop

rebase
tags/v0.3.12
Martin Kroeker GitHub 5 years ago
parent
commit
680f744abf
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 14 additions and 9 deletions
  1. +1
    -1
      Makefile.x86_64
  2. +2
    -2
      common_power.h
  3. +9
    -4
      kernel/Makefile
  4. +1
    -1
      kernel/power/zgemv_t_4.c
  5. +1
    -1
      lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c

+ 1
- 1
Makefile.x86_64 View File

@@ -90,7 +90,7 @@ ifeq ($(F_COMPILER), GFORTRAN)
GCCVERSIONGTEQ4 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(FC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(FC) -dumpversion | cut -f2 -d.` \>= 7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCVERSIONMINORGTEQ7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
FCOMMON_OPT += -mavx2
endif


+ 2
- 2
common_power.h View File

@@ -844,8 +844,8 @@ Lmcount$lazy_ptr:
#define BUFFER_SIZE ( 2 << 20)
#elif defined(PPC440FP2)
#define BUFFER_SIZE ( 16 << 20)
#elif defined(POWER8) || defined(POWER9) || defined(POWER10)
#define BUFFER_SIZE ( 64 << 20)
#elif defined(POWER6) || defined(POWER8) || defined(POWER9) || defined(POWER10)
#define BUFFER_SIZE ( 64 << 22)
#else
#define BUFFER_SIZE ( 16 << 20)
#endif


+ 9
- 4
kernel/Makefile View File

@@ -22,20 +22,25 @@ ifeq ($(C_COMPILER), CLANG)
override CFLAGS += -fno-integrated-as
endif
endif

AVX2OPT =
ifeq ($(C_COMPILER), GCC)
# AVX2 support was added in 4.7.0
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7), 11)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ7 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 7)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
AVX2OPT = -mavx2
endif
endif
ifeq ($(C_COMPILER), CLANG)
# Any clang posing as gcc 4.2 should be new enough (3.4 or later)
GCCVERSIONGTEQ4 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 4)
GCCVERSIONGTEQ5 := $(shell expr `$(CC) -dumpversion | cut -f1 -d.` \>= 5)
GCCMINORVERSIONGTEQ2 := $(shell expr `$(CC) -dumpversion | cut -f2 -d.` \>= 2)
ifeq ($(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ2), 11)
GCCVERSIONCHECK := $(GCCVERSIONGTEQ5)$(GCCVERSIONGTEQ4)$(GCCMINORVERSIONGTEQ7)
ifeq ($(GCCVERSIONCHECK), $(filter $(GCCVERSIONCHECK), 011 110 111))
AVX2OPT = -mavx2
endif
endif


+ 1
- 1
kernel/power/zgemv_t_4.c View File

@@ -513,7 +513,7 @@ static void zgemv_kernel_4x1(BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT al

#endif

static __attribute__((always_inline)) void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) {
static __attribute__((always_inline)) inline void copy_x(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_src) {
BLASLONG i;
for (i = 0; i < n; i++) {
*dest = *src;


+ 1
- 1
lapack-netlib/LAPACKE/src/lapacke_zgesvdq.c View File

@@ -71,7 +71,7 @@ lapack_int LAPACKE_zgesvdq( int matrix_layout, char joba, char jobp,
goto exit_level_0;
}
liwork = iwork_query;
lcwork = LAPACK_C2INT(cwork_query);
lcwork = LAPACK_Z2INT(cwork_query);
lrwork = (lapack_int)rwork_query;
/* Allocate memory for work arrays */
iwork = (lapack_int*)LAPACKE_malloc( sizeof(lapack_int) * liwork );


Loading…
Cancel
Save