Save and restore VSX registerstags/v0.3.0
| @@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 32000 | |||||
| #define ALPHA_R_SP 296(SP) | |||||
| #define ALPHA_I_SP 304(SP) | |||||
| #define FZERO 312(SP) | |||||
| #define STACKSIZE 32196 | |||||
| #define ALPHA_R_SP 296+196(SP) | |||||
| #define ALPHA_I_SP 304+196(SP) | |||||
| #define FZERO 312+196(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 256 | |||||
| #define ALPHA_R_SP 224(SP) | |||||
| #define ALPHA_I_SP 232(SP) | |||||
| #define FZERO 240(SP) | |||||
| #define STACKSIZE 456 | |||||
| #define ALPHA_R_SP 224+200(SP) | |||||
| #define ALPHA_I_SP 232+200(SP) | |||||
| #define FZERO 240+200(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define FRAMEPOINTER r12 | #define FRAMEPOINTER r12 | ||||
| #define VECSAVE r11 | |||||
| #define BBUFFER r14 | #define BBUFFER r14 | ||||
| #define L r15 | #define L r15 | ||||
| #define o12 r16 | #define o12 r16 | ||||
| @@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| li r0, 0 | li r0, 0 | ||||
| stfd f14, 0(SP) | stfd f14, 0(SP) | ||||
| @@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| stfs f1, ALPHA_R_SP | stfs f1, ALPHA_R_SP | ||||
| stfs f2, ALPHA_I_SP | stfs f2, ALPHA_I_SP | ||||
| @@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi T1 , SP, 296 | |||||
| addi T1 , SP, 296+196 | |||||
| #else | #else | ||||
| addi T1 , SP, 224 | |||||
| addi T1 , SP, 224+200 | |||||
| #endif | #endif | ||||
| stxsspx vs1, 0, T1 | stxsspx vs1, 0, T1 | ||||
| @@ -375,6 +404,7 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -394,7 +424,32 @@ L999: | |||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| addi r11, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| @@ -404,4 +459,4 @@ L999: | |||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| #endif | |||||
| #endif^ | |||||
| @@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define J r12 | #define J r12 | ||||
| #define PREA r14 | #define PREA r14 | ||||
| #define PREB r15 | #define PREB r15 | ||||
| #define BO r16 | #define BO r16 | ||||
| @@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "cgemm_tcopy_macros_8_power8.S" | #include "cgemm_tcopy_macros_8_power8.S" | ||||
| #define STACKSIZE 384 | |||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| @@ -197,9 +223,33 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 400 | #define STACKSIZE 400 | ||||
| #define ALPHA_R_SP 304(SP) | |||||
| #define ALPHA_I_SP 312(SP) | |||||
| #define STACKSIZE 592 | |||||
| #define ALPHA_R_SP 304+192(SP) | |||||
| #define ALPHA_I_SP 312+192(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 256 | #define STACKSIZE 256 | ||||
| #define ALPHA_R_SP 224(SP) | |||||
| #define ALPHA_I_SP 232(SP) | |||||
| #define FZERO 240(SP) | |||||
| #define STACKSIZE 452 | |||||
| #define ALPHA_R_SP 224+196(SP) | |||||
| #define ALPHA_I_SP 232+196(SP) | |||||
| #define FZERO 240+196(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define alpha_sr vs30 | #define alpha_sr vs30 | ||||
| #define alpha_si vs31 | #define alpha_si vs31 | ||||
| #define VECSAVE r11 | |||||
| #define o12 r12 | #define o12 r12 | ||||
| #define KKK r13 | #define KKK r13 | ||||
| #define K1 r14 | #define K1 r14 | ||||
| @@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| std r13, 288(SP) | std r13, 288(SP) | ||||
| std r12, 296(SP) | std r12, 296(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| stw r13, 216(SP) | stw r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| stfs f1, ALPHA_R_SP | stfs f1, ALPHA_R_SP | ||||
| stfs f2, ALPHA_I_SP | stfs f2, ALPHA_I_SP | ||||
| @@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi T1, SP, 304 | |||||
| addi T1, SP, 304+192 | |||||
| #else | #else | ||||
| addi T1, SP, 224 | |||||
| addi T1, SP, 224+196 | |||||
| #endif | #endif | ||||
| lxsspx alpha_dr, 0, T1 | lxsspx alpha_dr, 0, T1 | ||||
| @@ -369,6 +399,7 @@ L999: | |||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| ld r13, 288(SP) | ld r13, 288(SP) | ||||
| ld r12, 296(SP) | ld r12, 296(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -389,10 +420,34 @@ L999: | |||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| lwz r13, 216(SP) | lwz r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | #define STACKSIZE 320 | ||||
| #define ALPHA_SP 296(SP) | |||||
| #define FZERO 304(SP) | |||||
| #define STACKSIZE 512 | |||||
| #define ALPHA_SP 296+192(SP) | |||||
| #define FZERO 304+192(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 240 | #define STACKSIZE 240 | ||||
| #define ALPHA_SP 224(SP) | |||||
| #define FZERO 232(SP) | |||||
| #define STACKSIZE 440 | |||||
| #define ALPHA_SP 224+200(SP) | |||||
| #define FZERO 232+200(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| addi r11,SP,224 | |||||
| #endif | #endif | ||||
| stvx v20, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v21, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v22, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v23, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v24, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v25, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v26, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v27, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v28, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v29, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v30, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v31, r11,r0 | |||||
| li r11,0 | |||||
| stfd f1, ALPHA_SP | stfd f1, ALPHA_SP | ||||
| stw r0, FZERO | stw r0, FZERO | ||||
| @@ -269,12 +297,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ble .L999_H1 | ble .L999_H1 | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi T1, SP, 296 | |||||
| addi T1, SP, 296+192 | |||||
| #else | #else | ||||
| addi T1, SP, 224 | |||||
| addi T1, SP, 224+200 | |||||
| #endif | #endif | ||||
| li PRE, 384 | |||||
| li PRE, 384 | |||||
| li o8 , 8 | li o8 , 8 | ||||
| li o16, 16 | li o16, 16 | ||||
| li o24, 24 | li o24, 24 | ||||
| @@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| addi r11,SP,224 | |||||
| #endif | #endif | ||||
| lvx v20, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v21, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v22, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v23, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v24, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v25, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v26, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v27, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v28, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v29, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v30, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v31, r11,r3 | |||||
| li r11,0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "dgemm_ncopy_macros_4_power8.S" | #include "dgemm_ncopy_macros_4_power8.S" | ||||
| #define STACKSIZE 384 | #define STACKSIZE 384 | ||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| //addi SP, SP, -208 | |||||
| li r0, 0 | li r0, 0 | ||||
| stfd f14, 0(SP) | stfd f14, 0(SP) | ||||
| @@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| stvx v20, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v21, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v22, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v23, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v24, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v25, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v26, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v27, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v28, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v29, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v30, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v31, r11,r0 | |||||
| li r11,0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| cmpwi cr0, N, 0 | cmpwi cr0, N, 0 | ||||
| @@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| slwi LDA, LDA, BASE_SHIFT | slwi LDA, LDA, BASE_SHIFT | ||||
| li PREA, 384 | |||||
| li PREB, 384 | |||||
| //li PREA, 384 | |||||
| //li PREB, 384 | |||||
| li PREA, 576 | |||||
| li PREB, 576 | |||||
| li o8, 8 | li o8, 8 | ||||
| li o16, 16 | li o16, 16 | ||||
| @@ -219,9 +249,34 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| lvx v20, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v21, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v22, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v23, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v24, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v25, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v26, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v27, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v28, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v29, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v30, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v31, r11,r3 | |||||
| li r11,0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| //addi SP, SP, 208 | |||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "dgemm_tcopy_macros_16_power8.S" | #include "dgemm_tcopy_macros_16_power8.S" | ||||
| #define STACKSIZE 384 | #define STACKSIZE 384 | ||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| addi SP, SP, -STACKSIZE | addi SP, SP, -STACKSIZE | ||||
| //addi SP, SP, -208 | |||||
| li r0, 0 | li r0, 0 | ||||
| std r31, 144(SP) | std r31, 144(SP) | ||||
| @@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| stvx v20, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v21, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v22, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v23, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v24, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v25, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v26, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v27, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v28, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v29, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v30, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v31, r11,r0 | |||||
| li r11,0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| @@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| add B2, B2, B | add B2, B2, B | ||||
| add B1, B1, B | add B1, B1, B | ||||
| li PREA, 384 | |||||
| //li PREA, 384 | |||||
| li PREA, 576 | |||||
| addi PREB, M16, 128 | addi PREB, M16, 128 | ||||
| li o8, 8 | li o8, 8 | ||||
| @@ -202,9 +231,34 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| lvx v20, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v21, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v22, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v23, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v24, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v25, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v26, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v27, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v28, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v29, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v30, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v31, r11,r3 | |||||
| li r11,0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| //addi SP, SP, 208 | |||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | #define STACKSIZE 320 | ||||
| #define ALPHA_SP 296(SP) | |||||
| #define FZERO 304(SP) | |||||
| #define STACKSIZE 520 | |||||
| #define ALPHA_SP 296+200(SP) | |||||
| #define FZERO 304+200(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 240 | |||||
| #define ALPHA_SP 224(SP) | |||||
| #define FZERO 232(SP) | |||||
| #define STACKSIZE 436 | |||||
| #define ALPHA_SP 224+196(SP) | |||||
| #define FZERO 232+196(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define PRE r30 | #define PRE r30 | ||||
| #define T2 r31 | #define T2 r31 | ||||
| #define VECSAVE r11 | |||||
| #include "dtrmm_macros_16x4_power8.S" | #include "dtrmm_macros_16x4_power8.S" | ||||
| @@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| std r13, 288(SP) | std r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| stw r13, 216(SP) | stw r13, 216(SP) | ||||
| addi r11, r0, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11 ,r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11,0 | |||||
| stw r31, 144(SP) | |||||
| stfd f1, ALPHA_SP | stfd f1, ALPHA_SP | ||||
| stw r0, FZERO | stw r0, FZERO | ||||
| @@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ble .L999_H1 | ble .L999_H1 | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi ALPHA, SP, 296 | |||||
| addi ALPHA, SP, 296+200 | |||||
| #else | #else | ||||
| addi ALPHA, SP, 224 | |||||
| addi ALPHA, SP, 224+196 | |||||
| #endif | #endif | ||||
| li PRE, 256 | li PRE, 256 | ||||
| @@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| ld r13, 288(SP) | ld r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| lwz r13, 216(SP) | lwz r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -48,8 +48,9 @@ | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | #define STACKSIZE 320 | ||||
| #define ALPHA 296(SP) | |||||
| #define FZERO 304(SP) | |||||
| #define STACKSIZE 520 | |||||
| #define ALPHA 296+200(SP) | |||||
| #define FZERO 304+200(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 240 | #define STACKSIZE 240 | ||||
| #define ALPHA 224(SP) | #define ALPHA 224(SP) | ||||
| @@ -112,6 +113,8 @@ | |||||
| #define o48 r30 | #define o48 r30 | ||||
| #define T1 r31 | #define T1 r31 | ||||
| #define VECSAVE r11 | |||||
| #include "dtrsm_macros_LT_16x4_power8.S" | #include "dtrsm_macros_LT_16x4_power8.S" | ||||
| #ifndef NEEDPARAM | #ifndef NEEDPARAM | ||||
| @@ -163,6 +166,7 @@ | |||||
| std r17, 256(SP) | std r17, 256(SP) | ||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| addi r11,SP,288 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -178,7 +182,32 @@ | |||||
| stw r20, 188(SP) | stw r20, 188(SP) | ||||
| stw r19, 192(SP) | stw r19, 192(SP) | ||||
| stw r18, 196(SP) | stw r18, 196(SP) | ||||
| addi r11,SP,208 | |||||
| #endif | #endif | ||||
| stvx v20, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v21, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v22, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v23, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v24, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v25, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v26, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v27, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v28, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v29, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v30, r11,r0 | |||||
| addi r11,r11,16 | |||||
| stvx v31, r11,r0 | |||||
| li r11,0 | |||||
| #if defined(_AIX) || defined(__APPLE__) | #if defined(_AIX) || defined(__APPLE__) | ||||
| @@ -269,6 +298,7 @@ L999: | |||||
| ld r17, 256(SP) | ld r17, 256(SP) | ||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| addi r11,SP,288 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -284,10 +314,35 @@ L999: | |||||
| lwz r20, 188(SP) | lwz r20, 188(SP) | ||||
| lwz r19, 192(SP) | lwz r19, 192(SP) | ||||
| lwz r18, 196(SP) | lwz r18, 196(SP) | ||||
| addi r11,SP,208 | |||||
| #endif | #endif | ||||
| lvx v20, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v21, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v22, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v23, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v24, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v25, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v26, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v27, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v28, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v29, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v30, r11,r3 | |||||
| addi r11,r11,16 | |||||
| lvx v31, r11,r3 | |||||
| li r11,0 | |||||
| addi SP, SP, STACKSIZE | |||||
| addi SP, SP, STACKSIZE | |||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 32752 | #define STACKSIZE 32752 | ||||
| #define ALPHA_SP 296(SP) | |||||
| #define FZERO 304(SP) | |||||
| #define ALPHA_SP 296+192(SP) | |||||
| #define FZERO 304+192(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 240 | |||||
| #define ALPHA_SP 224(SP) | |||||
| #define FZERO 232(SP) | |||||
| #define STACKSIZE 440 | |||||
| #define ALPHA_SP 224+200(SP) | |||||
| #define FZERO 232+200(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define o0 0 | #define o0 0 | ||||
| #define VECSAVE r11 | |||||
| #define FRAMEPOINTER r12 | #define FRAMEPOINTER r12 | ||||
| #define BBUFFER r14 | #define BBUFFER r14 | ||||
| @@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11,0 | |||||
| // stfd f1, ALPHA_SP | // stfd f1, ALPHA_SP | ||||
| // stw r0, FZERO | // stw r0, FZERO | ||||
| @@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| li T1, -4096 | li T1, -4096 | ||||
| and BBUFFER, BBUFFER, T1 | and BBUFFER, BBUFFER, T1 | ||||
| addi T1, SP, 300 | |||||
| addi T1, SP, 300+192 | |||||
| stxsspx f1, o0 , T1 | stxsspx f1, o0 , T1 | ||||
| stxsspx f1, o4 , T1 | stxsspx f1, o4 , T1 | ||||
| stxsspx f1, o8 , T1 | stxsspx f1, o8 , T1 | ||||
| @@ -339,6 +368,7 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -358,13 +388,38 @@ L999: | |||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemm_tcopy_macros_16_power8.S" | #include "sgemm_tcopy_macros_16_power8.S" | ||||
| #define STACKSIZE 384 | |||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11 ,SP, 288 | |||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| @@ -203,9 +227,33 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "sgemm_tcopy_macros_8_power8.S" | #include "sgemm_tcopy_macros_8_power8.S" | ||||
| #define STACKSIZE 384 | |||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| PROFCODE | PROFCODE | ||||
| @@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| @@ -198,9 +222,33 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11,SP,288 | |||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 340 | #define STACKSIZE 340 | ||||
| #define ALPHA_SP 296(SP) | |||||
| #define FZERO 304(SP) | |||||
| #define STACKSIZE 540 | |||||
| #define ALPHA_SP 296+200(SP) | |||||
| #define FZERO 304+200(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 240 | #define STACKSIZE 240 | ||||
| #define ALPHA_SP 224(SP) | #define ALPHA_SP 224(SP) | ||||
| @@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define o0 0 | #define o0 0 | ||||
| #define VECSAVE r11 | |||||
| #define TBUFFER r13 | #define TBUFFER r13 | ||||
| #define o12 r14 | #define o12 r14 | ||||
| #define o4 r15 | #define o4 r15 | ||||
| @@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| std r13, 288(SP) | std r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -226,8 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| stw r13, 216(SP) | |||||
| stw r13, 216(SP) | |||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| // stfd f1, ALPHA_SP | // stfd f1, ALPHA_SP | ||||
| // stw r0, FZERO | // stw r0, FZERO | ||||
| @@ -271,16 +301,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| cmpwi cr0, K, 0 | cmpwi cr0, K, 0 | ||||
| ble L999_H1 | ble L999_H1 | ||||
| li PRE, 256 | |||||
| li PRE, 256 | |||||
| li o4 , 4 | li o4 , 4 | ||||
| li o8 , 8 | li o8 , 8 | ||||
| li o12, 12 | li o12, 12 | ||||
| li o16, 16 | li o16, 16 | ||||
| li o32, 32 | li o32, 32 | ||||
| li o48, 48 | li o48, 48 | ||||
| addi TBUFFER, SP, 320 | |||||
| addi TBUFFER, SP, 320+200 | |||||
| addi T1, SP, 300 | |||||
| addi T1, SP, 300+200 | |||||
| stxsspx f1, o0 , T1 | stxsspx f1, o0 , T1 | ||||
| stxsspx f1, o4 , T1 | stxsspx f1, o4 , T1 | ||||
| stxsspx f1, o8 , T1 | stxsspx f1, o8 , T1 | ||||
| @@ -339,6 +369,7 @@ L999: | |||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| ld r13, 288(SP) | ld r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -359,10 +390,34 @@ L999: | |||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| lwz r13, 216(SP) | lwz r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 32000 | |||||
| #define ALPHA_R_SP 296(SP) | |||||
| #define ALPHA_I_SP 304(SP) | |||||
| #define FZERO 312(SP) | |||||
| #define STACKSIZE 32192 | |||||
| #define ALPHA_R_SP 296+192(SP) | |||||
| #define ALPHA_I_SP 304+192(SP) | |||||
| #define FZERO 312+192(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 256 | |||||
| #define ALPHA_R_SP 224(SP) | |||||
| #define ALPHA_I_SP 232(SP) | |||||
| #define FZERO 240(SP) | |||||
| #define STACKSIZE 460 | |||||
| #define ALPHA_R_SP 224+204(SP) | |||||
| #define ALPHA_I_SP 232+204(SP) | |||||
| #define FZERO 240+204(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define alpha_r vs30 | #define alpha_r vs30 | ||||
| #define alpha_i vs31 | #define alpha_i vs31 | ||||
| #define VECSAVE r11 | |||||
| #define FRAMEPOINTER r12 | #define FRAMEPOINTER r12 | ||||
| @@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r17, 200(SP) | stw r17, 200(SP) | ||||
| stw r16, 204(SP) | stw r16, 204(SP) | ||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11,0 | |||||
| stfd f1, ALPHA_R_SP | stfd f1, ALPHA_R_SP | ||||
| stfd f2, ALPHA_I_SP | stfd f2, ALPHA_I_SP | ||||
| @@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| and BBUFFER, BBUFFER, T1 | and BBUFFER, BBUFFER, T1 | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi ALPHA, SP, 296 | |||||
| addi ALPHA, SP, 296+192 | |||||
| #else | #else | ||||
| addi ALPHA, SP, 224 | |||||
| addi ALPHA, SP, 224+192+12 | |||||
| #endif | #endif | ||||
| lxsdx alpha_r, 0, ALPHA | lxsdx alpha_r, 0, ALPHA | ||||
| @@ -389,6 +416,7 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -407,13 +435,37 @@ L999: | |||||
| lwz r17, 200(SP) | lwz r17, 200(SP) | ||||
| lwz r16, 204(SP) | lwz r16, 204(SP) | ||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #include "zgemm_tcopy_macros_8_power8.S" | #include "zgemm_tcopy_macros_8_power8.S" | ||||
| #define STACKSIZE 384 | #define STACKSIZE 384 | ||||
| #define STACKSIZE 576 | |||||
| PROLOGUE | PROLOGUE | ||||
| @@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r16, 264(SP) | std r16, 264(SP) | ||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| addi r11, SP ,288 | |||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11 ,16 | |||||
| stvx v31, r11, r0 | |||||
| li r11,0 | |||||
| cmpwi cr0, M, 0 | cmpwi cr0, M, 0 | ||||
| ble- L999 | ble- L999 | ||||
| @@ -196,9 +222,33 @@ L999: | |||||
| ld r16, 264(SP) | ld r16, 264(SP) | ||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| addi r11, SP, 288 | |||||
| lvx v20, r11,r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11,0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||
| @@ -1,3 +1,4 @@ | |||||
| /*************************************************************************** | /*************************************************************************** | ||||
| Copyright (c) 2013-2016, The OpenBLAS Project | Copyright (c) 2013-2016, The OpenBLAS Project | ||||
| All rights reserved. | All rights reserved. | ||||
| @@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #endif | #endif | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define STACKSIZE 320 | |||||
| #define ALPHA_R_SP 296(SP) | |||||
| #define ALPHA_I_SP 304(SP) | |||||
| #define FZERO 312(SP) | |||||
| #define STACKSIZE 520 | |||||
| #define ALPHA_R_SP 296+200(SP) | |||||
| #define ALPHA_I_SP 304+200(SP) | |||||
| #define FZERO 312+200(SP) | |||||
| #else | #else | ||||
| #define STACKSIZE 256 | |||||
| #define ALPHA_R_SP 224(SP) | |||||
| #define ALPHA_I_SP 232(SP) | |||||
| #define FZERO 240(SP) | |||||
| #define STACKSIZE 452 | |||||
| #define ALPHA_R_SP 224+196(SP) | |||||
| #define ALPHA_I_SP 232+196(SP) | |||||
| #define FZERO 240+196(SP) | |||||
| #endif | #endif | ||||
| #define M r3 | #define M r3 | ||||
| @@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define alpha_r vs30 | #define alpha_r vs30 | ||||
| #define alpha_i vs31 | #define alpha_i vs31 | ||||
| #define VECSAVE r11 | |||||
| #define KKK r13 | #define KKK r13 | ||||
| #define K1 r14 | #define K1 r14 | ||||
| #define L r15 | #define L r15 | ||||
| @@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| std r15, 272(SP) | std r15, 272(SP) | ||||
| std r14, 280(SP) | std r14, 280(SP) | ||||
| std r13, 288(SP) | std r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| stw r31, 144(SP) | stw r31, 144(SP) | ||||
| stw r30, 148(SP) | stw r30, 148(SP) | ||||
| @@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| stw r15, 208(SP) | stw r15, 208(SP) | ||||
| stw r14, 212(SP) | stw r14, 212(SP) | ||||
| stw r13, 216(SP) | stw r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| stvx v20, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v21, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v22, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v23, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v24, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v25, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v26, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v27, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v28, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v29, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v30, r11, r0 | |||||
| addi r11, r11, 16 | |||||
| stvx v31, r11, r0 | |||||
| li r11, 0 | |||||
| stfd f1, ALPHA_R_SP | stfd f1, ALPHA_R_SP | ||||
| stfd f2, ALPHA_I_SP | stfd f2, ALPHA_I_SP | ||||
| @@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| li o48 , 48 | li o48 , 48 | ||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| addi ALPHA, SP, 296 | |||||
| addi ALPHA, SP, 296+200 | |||||
| #else | #else | ||||
| addi ALPHA, SP, 224 | |||||
| addi ALPHA, SP, 224+196 | |||||
| #endif | #endif | ||||
| lxsdx alpha_r, 0, ALPHA | lxsdx alpha_r, 0, ALPHA | ||||
| @@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| ld r15, 272(SP) | ld r15, 272(SP) | ||||
| ld r14, 280(SP) | ld r14, 280(SP) | ||||
| ld r13, 288(SP) | ld r13, 288(SP) | ||||
| addi r11, SP, 304 | |||||
| #else | #else | ||||
| lwz r31, 144(SP) | lwz r31, 144(SP) | ||||
| lwz r30, 148(SP) | lwz r30, 148(SP) | ||||
| @@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| lwz r15, 208(SP) | lwz r15, 208(SP) | ||||
| lwz r14, 212(SP) | lwz r14, 212(SP) | ||||
| lwz r13, 216(SP) | lwz r13, 216(SP) | ||||
| addi r11, SP, 224 | |||||
| #endif | #endif | ||||
| lvx v20, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v21, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v22, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v23, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v24, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v25, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v26, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v27, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v28, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v29, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v30, r11, r3 | |||||
| addi r11, r11, 16 | |||||
| lvx v31, r11, r3 | |||||
| li r11, 0 | |||||
| addi SP, SP, STACKSIZE | addi SP, SP, STACKSIZE | ||||
| blr | blr | ||||
| EPILOGUE | EPILOGUE | ||||