Browse Source

updated cgemm- and ctrmm-kernel for POWER8

tags/v0.2.18^2
Werner Saar 9 years ago
parent
commit
d4c0330967
5 changed files with 2697 additions and 2581 deletions
  1. +33
    -13
      kernel/power/cgemm_kernel_8x4_power8.S
  2. +280
    -278
      kernel/power/cgemm_logic_8x4_power8.S
  3. +2078
    -1997
      kernel/power/cgemm_macros_8x4_power8.S
  4. +29
    -15
      kernel/power/ctrmm_kernel_8x4_power8.S
  5. +277
    -278
      kernel/power/ctrmm_logic_8x4_power8.S

+ 33
- 13
kernel/power/cgemm_kernel_8x4_power8.S View File

@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

/**************************************************************************************
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* 2016/04/03 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK
* CTEST : OK
* TEST : OK
* LAPACK-TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/

/*********************************************************************/
@@ -130,10 +130,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif

#define o0 0
#define alpha_r vs30
#define alpha_i vs31

#define TBUFFER r14
#define alpha_dr vs28
#define alpha_di vs29
#define alpha_sr vs30
#define alpha_si vs31


#define NOTUSED r14
#define L r15
#define o12 r16
#define o4 r17
@@ -271,21 +275,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cgemm_macros_8x4_power8.S"

cmpwi cr0, M, 0
ble .L999_H1
ble L999_H1
cmpwi cr0, N, 0
ble .L999_H1
ble L999_H1
cmpwi cr0, K, 0
ble .L999_H1
ble L999_H1

slwi LDC, LDC, ZBASE_SHIFT
li PRE, 256
li PRE, 384
li o4 , 4
li o8 , 8
li o12 , 12
li o16 , 16
li o32 , 32
li o48 , 48
addi TBUFFER, SP, 360

#ifdef __64BIT__
@@ -294,14 +297,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi T1 , SP, 224
#endif

lxsspx alpha_r, 0, T1
lxsspx alpha_i, o8, T1
stxsspx vs1, 0, T1
lxsspx alpha_dr, 0, T1
stxsspx vs2, o8 , T1
lxsspx alpha_di, o8, T1
addi T1, SP, 360
li T2, 0

stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_dr, o12, T1
lxvw4x alpha_sr, o0 , T1
addi T1, T1, 16

stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_di, o12, T1
lxvw4x alpha_si, o0 , T1

.align 5

#include "cgemm_logic_8x4_power8.S"

.L999:
L999:
addi r3, 0, 0

lfd f14, 0(SP)


+ 280
- 278
kernel/power/cgemm_logic_8x4_power8.S
File diff suppressed because it is too large
View File


+ 2078
- 1997
kernel/power/cgemm_macros_8x4_power8.S
File diff suppressed because it is too large
View File


+ 29
- 15
kernel/power/ctrmm_kernel_8x4_power8.S View File

@@ -26,11 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/

/**************************************************************************************
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* 2016/04/03 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK
* CTEST : OK
* TEST : OK
* LAPACK-TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/

/*********************************************************************/
@@ -129,18 +129,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif

#define o0 0
#define alpha_r vs30
#define alpha_i vs31
#define alpha_vr vs28
#define alpha_vi vs29

#define alpha_dr vs28
#define alpha_di vs29
#define alpha_sr vs30
#define alpha_si vs31

#define o12 r12
#define KKK r13
#define K1 r14
#define L r15
#define o16 r16
#define TBUFFER r17
#define NOTUSED r17
#define T2 r19
#define KK r20
#define o8 r21
@@ -278,21 +278,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "cgemm_macros_8x4_power8.S"

cmpwi cr0, M, 0
ble .L999_H1
ble L999_H1
cmpwi cr0, N, 0
ble .L999_H1
ble L999_H1
cmpwi cr0, K, 0
ble .L999_H1
ble L999_H1

slwi LDC, LDC, ZBASE_SHIFT
li PRE, 256
li PRE, 384
li o4 , 4
li o8 , 8
li o12 , 12
li o16 , 16
li o32 , 32
li o48 , 48
addi TBUFFER, SP, 360


#ifdef __64BIT__
@@ -301,14 +300,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi T1, SP, 224
#endif

lxsspx alpha_r, 0, T1
lxsspx alpha_i, o8, T1
lxsspx alpha_dr, 0, T1
lxsspx alpha_di, o8, T1
addi T1, SP, 360
li T2, 0

stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_dr, o12, T1
lxvw4x alpha_sr, o0 , T1
addi T1, T1, 16

stw T2, 0(T1)
stw T2, 4(T1)
stw T2, 8(T1)
stxsspx alpha_di, o12, T1
lxvw4x alpha_si, o0 , T1

.align 5

#include "ctrmm_logic_8x4_power8.S"

.L999:
L999:
addi r3, 0, 0

lfd f14, 0(SP)


+ 277
- 278
kernel/power/ctrmm_logic_8x4_power8.S
File diff suppressed because it is too large
View File


Loading…
Cancel
Save