Browse Source

changed to conventional register save area

tags/v0.2.20^2
Abdurrauf 8 years ago
parent
commit
e831d6924e
3 changed files with 29 additions and 37 deletions
  1. +4
    -8
      kernel/zarch/gemm8x4V.S
  2. +10
    -13
      kernel/zarch/trmm8x4V.S
  3. +15
    -16
      kernel/zarch/ztrmm4x4V.S

+ 4
- 8
kernel/zarch/gemm8x4V.S View File

@@ -73,16 +73,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER
#include "common.h"

/************** Notes ON IBM abi and IBM assembly**********************************************
* General registers r0 and r1 should be used internally whenever possible
* General registers r2 to r5 should be second choice
* General registers r12 to r15 should only be used for their standard function.
* r0 should not be used as address disp register
/*
#BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc
##bm=r2,bn=r3, bk=r4, alpha=f0,ba=r5,bb=r6,stack[160] ,ldc=stack[168]
**********************************************************************************************/
/*Note: r0 can not be used as address disp register */

#define BM %r2
#define BM_CUR %r0
@@ -109,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

PROLOGUE
stmg %r6,%r12,40(%r15)
stmg %r6,%r12,48(%r15)
lg CIJ, 160(%r15)
lg LOCAL_VAR1, 168(%r15)
srlg BN_CUR,BN,2
@@ -606,7 +602,7 @@ la B,0(B,LOCAL_VAR2) /*refresh B=B+Bk*1*sizeof(double) */
ALIGN_2
.L_FUNC_END:
/*end*/
lmg %r6,%r12,40(%r15)
lmg %r6,%r12,48(%r15)
br %r14
.end



+ 10
- 13
kernel/zarch/trmm8x4V.S View File

@@ -73,17 +73,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER
#include "common.h"

/************** Notes ON IBM abi and IBM assembly**********************************************
* General registers r0 and r1 should be used internally whenever possible
* General registers r2 to r5 should be second choice
* General registers r12 to r15 should only be used for their standard function.
* r0 should not be used as address disp register

/*
#BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FLOAT* C,BLASLONG ldc
##bm=r2,bn=r3, bk=r4, alpha=f0,ba=r5,bb=r6,stack[160] ,ldc=stack[168]
offset=stack[176]
**********************************************************************************************/
/*Note: r0 can not be used as address disp register */

#define BM %r2
#define BM_CUR %r0
@@ -131,16 +128,16 @@ offset=stack[176]
/***********************************DGEMM***********************************************************/

PROLOGUE
#if defined(TRMMKERNEL)
stmg %r6,%r13,40(%r15)
#if defined(TRMMKERNEL)
std OFFSET,40(%r15)
stmg %r6,%r13,48(%r15)
#else
stmg %r6,%r12,40(%r15)
stmg %r6,%r12,48(%r15)
#endif
lg CIJ, 160(%r15)
lg LOCAL_VAR1, 168(%r15)
#if defined(TRMMKERNEL)
lg OFF,176(%r15)
std OFFSET,32(%r15)
ldgr OFFSET ,OFF
#endif
srlg BN_CUR,BN,2
@@ -861,10 +858,10 @@ ALIGN_2
.L_FUNC_END:
/*end*/
#if defined(TRMMKERNEL)
ld %f8,32(%r15)
lmg %r6,%r13,40(%r15)
ld OFFSET,40(%r15)
lmg %r6,%r13,48(%r15)
#else
lmg %r6,%r12,40(%r15)
lmg %r6,%r12,48(%r15)
#endif
br %r14
.end


+ 15
- 16
kernel/zarch/ztrmm4x4V.S View File

@@ -73,11 +73,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER
#include "common.h"

/************** Notes ON IBM abi and IBM assembly**********************************************
* General registers r0 and r1 should be used internally whenever possible
* General registers r2 to r5 should be second choice
* General registers r12 to r15 should only be used for their standard function.
* r0 should not be used as address disp register
/*

BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* ba,FLOAT* bb,
FLOAT* C,BLASLONG ldc, BLASLONG offset)
@@ -85,7 +82,7 @@ BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* ba,FLOAT* b
offset=stack[176]

**********************************************************************************************/
/*Note: r0 can not be used as address disp register */

#define BM %r2
#define BM_CUR %r0
@@ -118,21 +115,21 @@ offset=stack[176]
/***********************************ZGEMM**4x4*******************************************************/

PROLOGUE
#if defined(TRMMKERNEL)
#if defined(TRMMKERNEL)
std OFFSET ,40(%r15)
stmg %r6,%r13,48(%r15)
#else
stmg %r6,%r12,48(%r15)
#endif
std %f11,8(%r15)
std %f10,16(%r15)
std %f9,24(%r15)
std %f12,32(%r15)
std %f9, 128(%r15)
std %f10,136(%r15)
std %f11,144(%r15)
std %f12,152(%r15)

lg CIJ, 160(%r15)
lg LOCAL_VAR1, 168(%r15)
#if defined(TRMMKERNEL)
lg OFF,176(%r15)
std OFFSET,40(%r15)
ldgr OFFSET ,OFF
#endif
srlg BN_CUR,BN,2
@@ -709,16 +706,18 @@ la B,0(B,LOCAL_VAR2) /*refresh B=B+Bk*1*sizeof(complex) */
ALIGN_2
.L_FUNC_END:
/*end*/
ld %f11,8(%r15)
ld %f10,16(%r15)
ld %f9,24(%r15)
ld %f12,32(%r15)


#if defined(TRMMKERNEL)
ld OFFSET,40(%r15)
lmg %r6,%r13,48(%r15)
#else
lmg %r6,%r12,48(%r15)
#endif
ld %f9, 128(%r15)
ld %f10,136(%r15)
ld %f11,144(%r15)
ld %f12,152(%r15)
br %r14
.end



Loading…
Cancel
Save