Browse Source

Merge branch 'develop' of https://github.com/wernsaar/OpenBLAS into wernsaar-develop

tags/v0.2.9.rc1
Zhang Xianyi 12 years ago
parent
commit
c0159d44a3
3 changed files with 1085 additions and 20 deletions
  1. +2
    -20
      kernel/x86_64/KERNEL.BULLDOZER
  2. +8
    -0
      kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S
  3. +1075
    -0
      kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S

+ 2
- 20
kernel/x86_64/KERNEL.BULLDOZER View File

@@ -54,9 +54,8 @@ STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
#DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
DTRSMKERNEL_LT = dtrsm_kernel_LT_8x2_bulldozer.S
DTRSMKERNEL_RN = dtrsm_kernel_RN_8x2_bulldozer.S
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c
@@ -69,21 +68,4 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c

#STRMMKERNEL = ../generic/trmmkernel_16x2.c
STRMMKERNEL = sgemm_kernel_16x2_bulldozer.S
#STRMMKERNEL_RT = ../generic/trmmkernel_16x2.c
#STRMMKERNEL_RN = ../generic/trmmkernel_16x2.c

DTRMMKERNEL = dgemm_kernel_8x2_bulldozer.S
#DTRMMKERNEL_RT = ../generic/trmmkernel_8x2.c
#DTRMMKERNEL_RN = ../generic/trmmkernel_8x2.c

CTRMMKERNEL = cgemm_kernel_4x2_bulldozer.S

ZTRMMKERNEL = zgemm_kernel_2x2_bulldozer.S
#ZTRMMKERNEL = ../generic/ztrmmkernel_4x2.c
#ZTRMMKERNEL_RR = ../generic/ztrmmkernel_2x2.c
#ZTRMMKERNEL_RC = ../generic/ztrmmkernel_2x2.c




+ 8
- 0
kernel/x86_64/dtrsm_kernel_LT_8x2_bulldozer.S View File

@@ -84,6 +84,9 @@

#endif

#define A_PR1 384
#define B_PR1 192


.macro KERNEL8x2_SUB
vmovddup -16*SIZE(BO,%rax,2), %xmm1
@@ -708,9 +711,14 @@
ALIGN_4

.L52:
prefetcht0 A_PR1(AO,%rax,8)
prefetcht0 B_PR1(BO,%rax,2)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB
prefetcht0 A_PR1(AO,%rax,8)
KERNEL8x2_SUB

jl .L52


+ 1075
- 0
kernel/x86_64/dtrsm_kernel_RN_8x2_bulldozer.S
File diff suppressed because it is too large
View File


Loading…
Cancel
Save