Browse Source

fixed sgemm- and strmm-kernel

tags/v0.2.18^2
Werner Saar 9 years ago
parent
commit
e1df5a6e23
6 changed files with 2597 additions and 436 deletions
  1. +11
    -11
      kernel/power/sgemm_kernel_16x8_power8.S
  2. +2
    -2
      kernel/power/sgemm_logic_16x8_power8.S
  3. +2569
    -409
      kernel/power/sgemm_macros_16x8_power8.S
  4. +11
    -10
      kernel/power/strmm_kernel_16x8_power8.S
  5. +2
    -2
      kernel/power/strmm_logic_16x8_power8.S
  6. +2
    -2
      param.h

+ 11
- 11
kernel/power/sgemm_kernel_16x8_power8.S View File

@@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


/************************************************************************************** /**************************************************************************************
* 2016/03/14 Werner Saar (wernsaar@googlemail.com)
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/ **************************************************************************************/


/*********************************************************************/ /*********************************************************************/
@@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif


#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320
#define STACKSIZE 340
#define ALPHA_SP 296(SP) #define ALPHA_SP 296(SP)
#define FZERO 304(SP) #define FZERO 304(SP)
#else #else
@@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif


#define alpha_r vs30 #define alpha_r vs30
#define alpha_vr vs31


#define o0 0 #define o0 0


#define TBUFFER r14
#define o4 r15 #define o4 r15
#define o12 r16 #define o12 r16
#define o8 r17 #define o8 r17
@@ -202,6 +203,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r17, 256(SP) std r17, 256(SP)
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP)
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@@ -220,6 +222,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r17, 200(SP) stw r17, 200(SP)
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP)
#endif #endif


// stfd f1, ALPHA_SP // stfd f1, ALPHA_SP
@@ -259,24 +262,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmpwi cr0, K, 0 cmpwi cr0, K, 0
ble .L999_H1 ble .L999_H1


li PRE, 384
li PRE, 256
li o4 , 4 li o4 , 4
li o8 , 8 li o8 , 8
li o12, 12 li o12, 12
li o16, 16 li o16, 16
li o32, 32 li o32, 32
li o48, 48 li o48, 48
addi TBUFFER, SP, 320


addi T1, SP, 300 addi T1, SP, 300
stfs f1, 0(T1) stfs f1, 0(T1)
stfs f1, 4(T1)
stfs f1, 8(T1)
stfs f1,12(T1)


lxsspx vs28, 0, T1

xxspltw alpha_r, vs28 , 0
lxvw4x alpha_vr, 0, T1
lxsspx alpha_r, 0, T1






@@ -326,6 +324,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld r17, 256(SP) ld r17, 256(SP)
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP)
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@@ -344,6 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lwz r17, 200(SP) lwz r17, 200(SP)
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP)
#endif #endif


addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE


+ 2
- 2
kernel/power/sgemm_logic_16x8_power8.S View File

@@ -26,13 +26,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


/************************************************************************************** /**************************************************************************************
* 2016/03/14 Werner Saar (wernsaar@googlemail.com)
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/ **************************************************************************************/



srawi. J, N, 3 srawi. J, N, 3
ble .LSGEMM_L8_END ble .LSGEMM_L8_END




+ 2569
- 409
kernel/power/sgemm_macros_16x8_power8.S
File diff suppressed because it is too large
View File


+ 11
- 10
kernel/power/strmm_kernel_16x8_power8.S View File

@@ -26,10 +26,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


/************************************************************************************** /**************************************************************************************
* 2016/03/14 Werner Saar (wernsaar@googlemail.com)
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/ **************************************************************************************/


/*********************************************************************/ /*********************************************************************/
@@ -81,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif


#ifdef __64BIT__ #ifdef __64BIT__
#define STACKSIZE 320
#define STACKSIZE 340
#define ALPHA_SP 296(SP) #define ALPHA_SP 296(SP)
#define FZERO 304(SP) #define FZERO 304(SP)
#else #else
@@ -127,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endif #endif


#define alpha_r vs30 #define alpha_r vs30
#define alpha_vr vs31


#define o0 0 #define o0 0


#define TBUFFER r13
#define o12 r14 #define o12 r14
#define o4 r15 #define o4 r15
#define K1 r16 #define K1 r16
@@ -138,7 +139,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define L r18 #define L r18
#define T1 r19 #define T1 r19
#define KK r20 #define KK r20
#define KKK 21
#define KKK r21
#define I r22 #define I r22
#define J r23 #define J r23
#define AO r24 #define AO r24
@@ -204,6 +205,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
std r16, 264(SP) std r16, 264(SP)
std r15, 272(SP) std r15, 272(SP)
std r14, 280(SP) std r14, 280(SP)
std r13, 288(SP)
#else #else
stw r31, 144(SP) stw r31, 144(SP)
stw r30, 148(SP) stw r30, 148(SP)
@@ -223,6 +225,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
stw r16, 204(SP) stw r16, 204(SP)
stw r15, 208(SP) stw r15, 208(SP)
stw r14, 212(SP) stw r14, 212(SP)
stw r13, 216(SP)
#endif #endif


// stfd f1, ALPHA_SP // stfd f1, ALPHA_SP
@@ -274,17 +277,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
li o16, 16 li o16, 16
li o32, 32 li o32, 32
li o48, 48 li o48, 48
addi TBUFFER, SP, 320


addi T1, SP, 300 addi T1, SP, 300
stfs f1, 0(T1) stfs f1, 0(T1)
stfs f1, 4(T1)
stfs f1, 8(T1)
stfs f1,12(T1)


lxsspx vs28, 0, T1
lxsspx alpha_r, 0, T1


xxspltw alpha_r, vs28 , 0
lxvw4x alpha_vr, 0, T1






@@ -335,6 +334,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ld r16, 264(SP) ld r16, 264(SP)
ld r15, 272(SP) ld r15, 272(SP)
ld r14, 280(SP) ld r14, 280(SP)
ld r13, 288(SP)
#else #else
lwz r31, 144(SP) lwz r31, 144(SP)
lwz r30, 148(SP) lwz r30, 148(SP)
@@ -354,6 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
lwz r16, 204(SP) lwz r16, 204(SP)
lwz r15, 208(SP) lwz r15, 208(SP)
lwz r14, 212(SP) lwz r14, 212(SP)
lwz r13, 216(SP)
#endif #endif


addi SP, SP, STACKSIZE addi SP, SP, STACKSIZE


+ 2
- 2
kernel/power/strmm_logic_16x8_power8.S View File

@@ -26,14 +26,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/ *****************************************************************************/


/************************************************************************************** /**************************************************************************************
* 2016/03/14 Werner Saar (wernsaar@googlemail.com)
* 2016/03/18 Werner Saar (wernsaar@googlemail.com)
* BLASTEST : OK * BLASTEST : OK
* CTEST : OK * CTEST : OK
* TEST : OK * TEST : OK
* LAPACK-TEST : OK
**************************************************************************************/ **************************************************************************************/





srawi. J, N, 3 srawi. J, N, 3
ble .LSTRMM_L8_END ble .LSTRMM_L8_END




+ 2
- 2
param.h View File

@@ -1977,12 +1977,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 8 #define ZGEMM_DEFAULT_UNROLL_M 8
#define ZGEMM_DEFAULT_UNROLL_N 2 #define ZGEMM_DEFAULT_UNROLL_N 2


#define SGEMM_DEFAULT_P 960
#define SGEMM_DEFAULT_P 480
#define DGEMM_DEFAULT_P 480 #define DGEMM_DEFAULT_P 480
#define CGEMM_DEFAULT_P 480 #define CGEMM_DEFAULT_P 480
#define ZGEMM_DEFAULT_P 240 #define ZGEMM_DEFAULT_P 240


#define SGEMM_DEFAULT_Q 720
#define SGEMM_DEFAULT_Q 1440
#define DGEMM_DEFAULT_Q 720 #define DGEMM_DEFAULT_Q 720
#define CGEMM_DEFAULT_Q 720 #define CGEMM_DEFAULT_Q 720
#define ZGEMM_DEFAULT_Q 360 #define ZGEMM_DEFAULT_Q 360


Loading…
Cancel
Save