Browse Source

enabled optimized dgemm kernel for NEHALEM

tags/v0.2.10.rc1^2
wernsaar 11 years ago
parent
commit
aa2709c4e0
3 changed files with 14 additions and 15 deletions
  1. +11
    -12
      kernel/x86_64/KERNEL.NEHALEM
  2. +1
    -1
      lapack-netlib/TESTING/nep.in
  3. +2
    -2
      param.h

+ 11
- 12
kernel/x86_64/KERNEL.NEHALEM View File

@@ -13,13 +13,13 @@ SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX)
SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX)




DGEMMKERNEL = gemm_kernel_4x4_core2.S
DGEMMINCOPY =
DGEMMITCOPY =
DGEMMONCOPY = gemm_ncopy_4.S
DGEMMOTCOPY = gemm_tcopy_4.S
DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ =
DGEMMKERNEL = gemm_kernel_2x8_nehalem.S
DGEMMINCOPY = ../generic/gemm_ncopy_2.c
DGEMMITCOPY = ../generic/gemm_tcopy_2.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c
DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)


@@ -48,11 +48,10 @@ STRSMKERNEL_LT = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S STRSMKERNEL_RN = trsm_kernel_LT_4x8_nehalem.S
STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S STRSMKERNEL_RT = trsm_kernel_RT_4x8_nehalem.S


DTRSMKERNEL_LN = trsm_kernel_LN_4x4_core2.S
DTRSMKERNEL_LT = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RN = trsm_kernel_LT_4x4_core2.S
DTRSMKERNEL_RT = trsm_kernel_RT_4x4_core2.S

DTRSMKERNEL_LN = trsm_kernel_LN_2x8_nehalem.S
DTRSMKERNEL_LT = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RN = trsm_kernel_LT_2x8_nehalem.S
DTRSMKERNEL_RT = trsm_kernel_RT_2x8_nehalem.S


CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S CTRSMKERNEL_LN = ztrsm_kernel_LN_2x4_nehalem.S
CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S CTRSMKERNEL_LT = ztrsm_kernel_LT_2x4_nehalem.S


+ 1
- 1
lapack-netlib/TESTING/nep.in View File

@@ -10,7 +10,7 @@ NEP: Data file for testing Nonsymmetric Eigenvalue Problem routines
0 5 7 3 200 Values of INIBL (nibble crossover point) 0 5 7 3 200 Values of INIBL (nibble crossover point)
1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts) 1 2 4 2 1 Values of ISHFTS (number of simultaneous shifts)
0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2) 0 1 2 0 1 Values of IACC22 (select structured matrix multiply: 0, 1 or 2)
20.0 Threshold value
70.0 Threshold value
T Put T to test the error exits T Put T to test the error exits
1 Code to interpret the seed 1 Code to interpret the seed
NEP 21 NEP 21

+ 2
- 2
param.h View File

@@ -1032,14 +1032,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define XGEMM_DEFAULT_UNROLL_N 1 #define XGEMM_DEFAULT_UNROLL_N 1
#else #else
#define SGEMM_DEFAULT_UNROLL_M 4 #define SGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 4
#define DGEMM_DEFAULT_UNROLL_M 2
#define QGEMM_DEFAULT_UNROLL_M 2 #define QGEMM_DEFAULT_UNROLL_M 2
#define CGEMM_DEFAULT_UNROLL_M 2 #define CGEMM_DEFAULT_UNROLL_M 2
#define ZGEMM_DEFAULT_UNROLL_M 1 #define ZGEMM_DEFAULT_UNROLL_M 1
#define XGEMM_DEFAULT_UNROLL_M 1 #define XGEMM_DEFAULT_UNROLL_M 1


#define SGEMM_DEFAULT_UNROLL_N 8 #define SGEMM_DEFAULT_UNROLL_N 8
#define DGEMM_DEFAULT_UNROLL_N 4
#define DGEMM_DEFAULT_UNROLL_N 8
#define QGEMM_DEFAULT_UNROLL_N 2 #define QGEMM_DEFAULT_UNROLL_N 2
#define CGEMM_DEFAULT_UNROLL_N 4 #define CGEMM_DEFAULT_UNROLL_N 4
#define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_N 4


Loading…
Cancel
Save