@@ -82,15 +82,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 32000 | |||
#define ALPHA_R_SP 296(SP) | |||
#define ALPHA_I_SP 304(SP) | |||
#define FZERO 312(SP) | |||
#define STACKSIZE 32196 | |||
#define ALPHA_R_SP 296+196(SP) | |||
#define ALPHA_I_SP 304+196(SP) | |||
#define FZERO 312+196(SP) | |||
#else | |||
#define STACKSIZE 256 | |||
#define ALPHA_R_SP 224(SP) | |||
#define ALPHA_I_SP 232(SP) | |||
#define FZERO 240(SP) | |||
#define STACKSIZE 456 | |||
#define ALPHA_R_SP 224+200(SP) | |||
#define ALPHA_I_SP 232+200(SP) | |||
#define FZERO 240+200(SP) | |||
#endif | |||
#define M r3 | |||
@@ -138,6 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define FRAMEPOINTER r12 | |||
#define VECSAVE r11 | |||
#define BBUFFER r14 | |||
#define L r15 | |||
#define o12 r16 | |||
@@ -167,6 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
stfd f14, 0(SP) | |||
@@ -211,6 +214,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -230,7 +234,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
stfs f1, ALPHA_R_SP | |||
stfs f2, ALPHA_I_SP | |||
@@ -301,9 +330,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
addi T1 , SP, 296 | |||
addi T1 , SP, 296+196 | |||
#else | |||
addi T1 , SP, 224 | |||
addi T1 , SP, 224+200 | |||
#endif | |||
stxsspx vs1, 0, T1 | |||
@@ -375,6 +404,7 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -394,7 +424,32 @@ L999: | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
addi r11, 224 | |||
#endif | |||
lvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r0 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
@@ -404,4 +459,4 @@ L999: | |||
blr | |||
EPILOGUE | |||
#endif | |||
#endif^ |
@@ -88,6 +88,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define J r12 | |||
#define PREA r14 | |||
#define PREB r15 | |||
#define BO r16 | |||
@@ -109,7 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "cgemm_tcopy_macros_8_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
@@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP, 288 | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
@@ -197,9 +223,33 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -83,13 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 400 | |||
#define ALPHA_R_SP 304(SP) | |||
#define ALPHA_I_SP 312(SP) | |||
#define STACKSIZE 592 | |||
#define ALPHA_R_SP 304+192(SP) | |||
#define ALPHA_I_SP 312+192(SP) | |||
#else | |||
#define STACKSIZE 256 | |||
#define ALPHA_R_SP 224(SP) | |||
#define ALPHA_I_SP 232(SP) | |||
#define FZERO 240(SP) | |||
#define STACKSIZE 452 | |||
#define ALPHA_R_SP 224+196(SP) | |||
#define ALPHA_I_SP 232+196(SP) | |||
#define FZERO 240+196(SP) | |||
#endif | |||
#define M r3 | |||
@@ -135,6 +137,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define alpha_sr vs30 | |||
#define alpha_si vs31 | |||
#define VECSAVE r11 | |||
#define o12 r12 | |||
#define KKK r13 | |||
#define K1 r14 | |||
@@ -208,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r14, 280(SP) | |||
std r13, 288(SP) | |||
std r12, 296(SP) | |||
addi r11, SP, 304 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -228,7 +233,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
stw r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
stfs f1, ALPHA_R_SP | |||
stfs f2, ALPHA_I_SP | |||
@@ -295,9 +325,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
addi T1, SP, 304 | |||
addi T1, SP, 304+192 | |||
#else | |||
addi T1, SP, 224 | |||
addi T1, SP, 224+196 | |||
#endif | |||
lxsspx alpha_dr, 0, T1 | |||
@@ -369,6 +399,7 @@ L999: | |||
ld r14, 280(SP) | |||
ld r13, 288(SP) | |||
ld r12, 296(SP) | |||
addi r11, SP, 304 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -389,10 +420,34 @@ L999: | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
lwz r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -83,12 +83,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#define STACKSIZE 512 | |||
#define ALPHA_SP 296+192(SP) | |||
#define FZERO 304+192(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
#define FZERO 232(SP) | |||
#define STACKSIZE 440 | |||
#define ALPHA_SP 224+200(SP) | |||
#define FZERO 232+200(SP) | |||
#endif | |||
#define M r3 | |||
@@ -210,6 +212,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11,SP,288 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -229,7 +232,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
addi r11,SP,224 | |||
#endif | |||
stvx v20, r11,r0 | |||
addi r11,r11,16 | |||
stvx v21, r11,r0 | |||
addi r11,r11,16 | |||
stvx v22, r11,r0 | |||
addi r11,r11,16 | |||
stvx v23, r11,r0 | |||
addi r11,r11,16 | |||
stvx v24, r11,r0 | |||
addi r11,r11,16 | |||
stvx v25, r11,r0 | |||
addi r11,r11,16 | |||
stvx v26, r11,r0 | |||
addi r11,r11,16 | |||
stvx v27, r11,r0 | |||
addi r11,r11,16 | |||
stvx v28, r11,r0 | |||
addi r11,r11,16 | |||
stvx v29, r11,r0 | |||
addi r11,r11,16 | |||
stvx v30, r11,r0 | |||
addi r11,r11,16 | |||
stvx v31, r11,r0 | |||
li r11,0 | |||
stfd f1, ALPHA_SP | |||
stw r0, FZERO | |||
@@ -269,12 +297,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ble .L999_H1 | |||
#ifdef __64BIT__ | |||
addi T1, SP, 296 | |||
addi T1, SP, 296+192 | |||
#else | |||
addi T1, SP, 224 | |||
addi T1, SP, 224+200 | |||
#endif | |||
li PRE, 384 | |||
li PRE, 384 | |||
li o8 , 8 | |||
li o16, 16 | |||
li o24, 24 | |||
@@ -334,6 +362,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11,SP,288 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -353,10 +382,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
addi r11,SP,224 | |||
#endif | |||
lvx v20, r11,r3 | |||
addi r11,r11,16 | |||
lvx v21, r11,r3 | |||
addi r11,r11,16 | |||
lvx v22, r11,r3 | |||
addi r11,r11,16 | |||
lvx v23, r11,r3 | |||
addi r11,r11,16 | |||
lvx v24, r11,r3 | |||
addi r11,r11,16 | |||
lvx v25, r11,r3 | |||
addi r11,r11,16 | |||
lvx v26, r11,r3 | |||
addi r11,r11,16 | |||
lvx v27, r11,r3 | |||
addi r11,r11,16 | |||
lvx v28, r11,r3 | |||
addi r11,r11,16 | |||
lvx v29, r11,r3 | |||
addi r11,r11,16 | |||
lvx v30, r11,r3 | |||
addi r11,r11,16 | |||
lvx v31, r11,r3 | |||
li r11,0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -110,12 +110,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "dgemm_ncopy_macros_4_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
PROFCODE | |||
addi SP, SP, -STACKSIZE | |||
//addi SP, SP, -208 | |||
li r0, 0 | |||
stfd f14, 0(SP) | |||
@@ -157,6 +158,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11,SP,288 | |||
stvx v20, r11,r0 | |||
addi r11,r11,16 | |||
stvx v21, r11,r0 | |||
addi r11,r11,16 | |||
stvx v22, r11,r0 | |||
addi r11,r11,16 | |||
stvx v23, r11,r0 | |||
addi r11,r11,16 | |||
stvx v24, r11,r0 | |||
addi r11,r11,16 | |||
stvx v25, r11,r0 | |||
addi r11,r11,16 | |||
stvx v26, r11,r0 | |||
addi r11,r11,16 | |||
stvx v27, r11,r0 | |||
addi r11,r11,16 | |||
stvx v28, r11,r0 | |||
addi r11,r11,16 | |||
stvx v29, r11,r0 | |||
addi r11,r11,16 | |||
stvx v30, r11,r0 | |||
addi r11,r11,16 | |||
stvx v31, r11,r0 | |||
li r11,0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
cmpwi cr0, N, 0 | |||
@@ -164,8 +191,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
slwi LDA, LDA, BASE_SHIFT | |||
li PREA, 384 | |||
li PREB, 384 | |||
//li PREA, 384 | |||
//li PREB, 384 | |||
li PREA, 576 | |||
li PREB, 576 | |||
li o8, 8 | |||
li o16, 16 | |||
@@ -219,9 +249,34 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11,SP,288 | |||
lvx v20, r11,r3 | |||
addi r11,r11,16 | |||
lvx v21, r11,r3 | |||
addi r11,r11,16 | |||
lvx v22, r11,r3 | |||
addi r11,r11,16 | |||
lvx v23, r11,r3 | |||
addi r11,r11,16 | |||
lvx v24, r11,r3 | |||
addi r11,r11,16 | |||
lvx v25, r11,r3 | |||
addi r11,r11,16 | |||
lvx v26, r11,r3 | |||
addi r11,r11,16 | |||
lvx v27, r11,r3 | |||
addi r11,r11,16 | |||
lvx v28, r11,r3 | |||
addi r11,r11,16 | |||
lvx v29, r11,r3 | |||
addi r11,r11,16 | |||
lvx v30, r11,r3 | |||
addi r11,r11,16 | |||
lvx v31, r11,r3 | |||
li r11,0 | |||
addi SP, SP, STACKSIZE | |||
//addi SP, SP, 208 | |||
blr | |||
EPILOGUE | |||
@@ -110,12 +110,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "dgemm_tcopy_macros_16_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
PROFCODE | |||
addi SP, SP, -STACKSIZE | |||
//addi SP, SP, -208 | |||
li r0, 0 | |||
std r31, 144(SP) | |||
@@ -136,6 +139,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11,SP,288 | |||
stvx v20, r11,r0 | |||
addi r11,r11,16 | |||
stvx v21, r11,r0 | |||
addi r11,r11,16 | |||
stvx v22, r11,r0 | |||
addi r11,r11,16 | |||
stvx v23, r11,r0 | |||
addi r11,r11,16 | |||
stvx v24, r11,r0 | |||
addi r11,r11,16 | |||
stvx v25, r11,r0 | |||
addi r11,r11,16 | |||
stvx v26, r11,r0 | |||
addi r11,r11,16 | |||
stvx v27, r11,r0 | |||
addi r11,r11,16 | |||
stvx v28, r11,r0 | |||
addi r11,r11,16 | |||
stvx v29, r11,r0 | |||
addi r11,r11,16 | |||
stvx v30, r11,r0 | |||
addi r11,r11,16 | |||
stvx v31, r11,r0 | |||
li r11,0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
@@ -170,7 +198,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
add B2, B2, B | |||
add B1, B1, B | |||
li PREA, 384 | |||
//li PREA, 384 | |||
li PREA, 576 | |||
addi PREB, M16, 128 | |||
li o8, 8 | |||
@@ -202,9 +231,34 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11,SP,288 | |||
lvx v20, r11,r3 | |||
addi r11,r11,16 | |||
lvx v21, r11,r3 | |||
addi r11,r11,16 | |||
lvx v22, r11,r3 | |||
addi r11,r11,16 | |||
lvx v23, r11,r3 | |||
addi r11,r11,16 | |||
lvx v24, r11,r3 | |||
addi r11,r11,16 | |||
lvx v25, r11,r3 | |||
addi r11,r11,16 | |||
lvx v26, r11,r3 | |||
addi r11,r11,16 | |||
lvx v27, r11,r3 | |||
addi r11,r11,16 | |||
lvx v28, r11,r3 | |||
addi r11,r11,16 | |||
lvx v29, r11,r3 | |||
addi r11,r11,16 | |||
lvx v30, r11,r3 | |||
addi r11,r11,16 | |||
lvx v31, r11,r3 | |||
li r11,0 | |||
addi SP, SP, STACKSIZE | |||
//addi SP, SP, 208 | |||
blr | |||
EPILOGUE | |||
@@ -83,12 +83,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#define STACKSIZE 520 | |||
#define ALPHA_SP 296+200(SP) | |||
#define FZERO 304+200(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
#define FZERO 232(SP) | |||
#define STACKSIZE 436 | |||
#define ALPHA_SP 224+196(SP) | |||
#define FZERO 232+196(SP) | |||
#endif | |||
#define M r3 | |||
@@ -152,6 +153,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define PRE r30 | |||
#define T2 r31 | |||
#define VECSAVE r11 | |||
#include "dtrmm_macros_16x4_power8.S" | |||
@@ -206,6 +209,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
std r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -226,7 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
stw r13, 216(SP) | |||
addi r11, r0, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11 ,r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11,0 | |||
stw r31, 144(SP) | |||
stfd f1, ALPHA_SP | |||
stw r0, FZERO | |||
@@ -270,9 +301,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ble .L999_H1 | |||
#ifdef __64BIT__ | |||
addi ALPHA, SP, 296 | |||
addi ALPHA, SP, 296+200 | |||
#else | |||
addi ALPHA, SP, 224 | |||
addi ALPHA, SP, 224+196 | |||
#endif | |||
li PRE, 256 | |||
@@ -332,6 +363,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
ld r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -352,10 +384,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
lwz r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -48,8 +48,9 @@ | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA 296(SP) | |||
#define FZERO 304(SP) | |||
#define STACKSIZE 520 | |||
#define ALPHA 296+200(SP) | |||
#define FZERO 304+200(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA 224(SP) | |||
@@ -112,6 +113,8 @@ | |||
#define o48 r30 | |||
#define T1 r31 | |||
#define VECSAVE r11 | |||
#include "dtrsm_macros_LT_16x4_power8.S" | |||
#ifndef NEEDPARAM | |||
@@ -163,6 +166,7 @@ | |||
std r17, 256(SP) | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
addi r11,SP,288 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -178,7 +182,32 @@ | |||
stw r20, 188(SP) | |||
stw r19, 192(SP) | |||
stw r18, 196(SP) | |||
addi r11,SP,208 | |||
#endif | |||
stvx v20, r11,r0 | |||
addi r11,r11,16 | |||
stvx v21, r11,r0 | |||
addi r11,r11,16 | |||
stvx v22, r11,r0 | |||
addi r11,r11,16 | |||
stvx v23, r11,r0 | |||
addi r11,r11,16 | |||
stvx v24, r11,r0 | |||
addi r11,r11,16 | |||
stvx v25, r11,r0 | |||
addi r11,r11,16 | |||
stvx v26, r11,r0 | |||
addi r11,r11,16 | |||
stvx v27, r11,r0 | |||
addi r11,r11,16 | |||
stvx v28, r11,r0 | |||
addi r11,r11,16 | |||
stvx v29, r11,r0 | |||
addi r11,r11,16 | |||
stvx v30, r11,r0 | |||
addi r11,r11,16 | |||
stvx v31, r11,r0 | |||
li r11,0 | |||
#if defined(_AIX) || defined(__APPLE__) | |||
@@ -269,6 +298,7 @@ L999: | |||
ld r17, 256(SP) | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
addi r11,SP,288 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -284,10 +314,35 @@ L999: | |||
lwz r20, 188(SP) | |||
lwz r19, 192(SP) | |||
lwz r18, 196(SP) | |||
addi r11,SP,208 | |||
#endif | |||
lvx v20, r11,r3 | |||
addi r11,r11,16 | |||
lvx v21, r11,r3 | |||
addi r11,r11,16 | |||
lvx v22, r11,r3 | |||
addi r11,r11,16 | |||
lvx v23, r11,r3 | |||
addi r11,r11,16 | |||
lvx v24, r11,r3 | |||
addi r11,r11,16 | |||
lvx v25, r11,r3 | |||
addi r11,r11,16 | |||
lvx v26, r11,r3 | |||
addi r11,r11,16 | |||
lvx v27, r11,r3 | |||
addi r11,r11,16 | |||
lvx v28, r11,r3 | |||
addi r11,r11,16 | |||
lvx v29, r11,r3 | |||
addi r11,r11,16 | |||
lvx v30, r11,r3 | |||
addi r11,r11,16 | |||
lvx v31, r11,r3 | |||
li r11,0 | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -83,12 +83,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 32752 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#define ALPHA_SP 296+192(SP) | |||
#define FZERO 304+192(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
#define FZERO 232(SP) | |||
#define STACKSIZE 440 | |||
#define ALPHA_SP 224+200(SP) | |||
#define FZERO 232+200(SP) | |||
#endif | |||
#define M r3 | |||
@@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define o0 0 | |||
#define VECSAVE r11 | |||
#define FRAMEPOINTER r12 | |||
#define BBUFFER r14 | |||
@@ -211,6 +213,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -230,7 +233,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11,0 | |||
// stfd f1, ALPHA_SP | |||
// stw r0, FZERO | |||
@@ -281,7 +310,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
li T1, -4096 | |||
and BBUFFER, BBUFFER, T1 | |||
addi T1, SP, 300 | |||
addi T1, SP, 300+192 | |||
stxsspx f1, o0 , T1 | |||
stxsspx f1, o4 , T1 | |||
stxsspx f1, o8 , T1 | |||
@@ -339,6 +368,7 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -358,13 +388,38 @@ L999: | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "sgemm_tcopy_macros_16_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
PROFCODE | |||
@@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11 ,SP, 288 | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
@@ -203,9 +227,33 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -110,8 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "sgemm_tcopy_macros_8_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
PROFCODE | |||
@@ -137,6 +136,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP, 288 | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
@@ -198,9 +222,33 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11,SP,288 | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -83,8 +83,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 340 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#define STACKSIZE 540 | |||
#define ALPHA_SP 296+200(SP) | |||
#define FZERO 304+200(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
@@ -132,6 +133,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define o0 0 | |||
#define VECSAVE r11 | |||
#define TBUFFER r13 | |||
#define o12 r14 | |||
#define o4 r15 | |||
@@ -207,6 +210,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
std r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -226,8 +230,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
stw r13, 216(SP) | |||
stw r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
// stfd f1, ALPHA_SP | |||
// stw r0, FZERO | |||
@@ -271,16 +301,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
cmpwi cr0, K, 0 | |||
ble L999_H1 | |||
li PRE, 256 | |||
li PRE, 256 | |||
li o4 , 4 | |||
li o8 , 8 | |||
li o12, 12 | |||
li o16, 16 | |||
li o32, 32 | |||
li o48, 48 | |||
addi TBUFFER, SP, 320 | |||
addi TBUFFER, SP, 320+200 | |||
addi T1, SP, 300 | |||
addi T1, SP, 300+200 | |||
stxsspx f1, o0 , T1 | |||
stxsspx f1, o4 , T1 | |||
stxsspx f1, o8 , T1 | |||
@@ -339,6 +369,7 @@ L999: | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
ld r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -359,10 +390,34 @@ L999: | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
lwz r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -117,15 +117,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 32000 | |||
#define ALPHA_R_SP 296(SP) | |||
#define ALPHA_I_SP 304(SP) | |||
#define FZERO 312(SP) | |||
#define STACKSIZE 32192 | |||
#define ALPHA_R_SP 296+192(SP) | |||
#define ALPHA_I_SP 304+192(SP) | |||
#define FZERO 312+192(SP) | |||
#else | |||
#define STACKSIZE 256 | |||
#define ALPHA_R_SP 224(SP) | |||
#define ALPHA_I_SP 232(SP) | |||
#define FZERO 240(SP) | |||
#define STACKSIZE 460 | |||
#define ALPHA_R_SP 224+204(SP) | |||
#define ALPHA_I_SP 232+204(SP) | |||
#define FZERO 240+204(SP) | |||
#endif | |||
#define M r3 | |||
@@ -168,6 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define alpha_r vs30 | |||
#define alpha_i vs31 | |||
#define VECSAVE r11 | |||
#define FRAMEPOINTER r12 | |||
@@ -245,6 +246,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -263,7 +265,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r17, 200(SP) | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11,0 | |||
stfd f1, ALPHA_R_SP | |||
stfd f2, ALPHA_I_SP | |||
@@ -332,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
and BBUFFER, BBUFFER, T1 | |||
#ifdef __64BIT__ | |||
addi ALPHA, SP, 296 | |||
addi ALPHA, SP, 296+192 | |||
#else | |||
addi ALPHA, SP, 224 | |||
addi ALPHA, SP, 224+192+12 | |||
#endif | |||
lxsdx alpha_r, 0, ALPHA | |||
@@ -389,6 +416,7 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -407,13 +435,37 @@ L999: | |||
lwz r17, 200(SP) | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -110,6 +110,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "zgemm_tcopy_macros_8_power8.S" | |||
#define STACKSIZE 384 | |||
#define STACKSIZE 576 | |||
PROLOGUE | |||
@@ -136,6 +137,31 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
addi r11, SP ,288 | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11 ,16 | |||
stvx v31, r11, r0 | |||
li r11,0 | |||
cmpwi cr0, M, 0 | |||
ble- L999 | |||
@@ -196,9 +222,33 @@ L999: | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
addi r11, SP, 288 | |||
lvx v20, r11,r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11,0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
@@ -1,3 +1,4 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2013-2016, The OpenBLAS Project | |||
All rights reserved. | |||
@@ -82,15 +83,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA_R_SP 296(SP) | |||
#define ALPHA_I_SP 304(SP) | |||
#define FZERO 312(SP) | |||
#define STACKSIZE 520 | |||
#define ALPHA_R_SP 296+200(SP) | |||
#define ALPHA_I_SP 304+200(SP) | |||
#define FZERO 312+200(SP) | |||
#else | |||
#define STACKSIZE 256 | |||
#define ALPHA_R_SP 224(SP) | |||
#define ALPHA_I_SP 232(SP) | |||
#define FZERO 240(SP) | |||
#define STACKSIZE 452 | |||
#define ALPHA_R_SP 224+196(SP) | |||
#define ALPHA_I_SP 232+196(SP) | |||
#define FZERO 240+196(SP) | |||
#endif | |||
#define M r3 | |||
@@ -133,6 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define alpha_r vs30 | |||
#define alpha_i vs31 | |||
#define VECSAVE r11 | |||
#define KKK r13 | |||
#define K1 r14 | |||
#define L r15 | |||
@@ -204,6 +207,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
std r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
@@ -224,7 +228,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
stw r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
stvx v20, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v21, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v22, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v23, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v24, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v25, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v26, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v27, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v28, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v29, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v30, r11, r0 | |||
addi r11, r11, 16 | |||
stvx v31, r11, r0 | |||
li r11, 0 | |||
stfd f1, ALPHA_R_SP | |||
stfd f2, ALPHA_I_SP | |||
@@ -289,9 +318,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
li o48 , 48 | |||
#ifdef __64BIT__ | |||
addi ALPHA, SP, 296 | |||
addi ALPHA, SP, 296+200 | |||
#else | |||
addi ALPHA, SP, 224 | |||
addi ALPHA, SP, 224+196 | |||
#endif | |||
lxsdx alpha_r, 0, ALPHA | |||
@@ -347,6 +376,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
ld r13, 288(SP) | |||
addi r11, SP, 304 | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
@@ -367,10 +397,34 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
lwz r13, 216(SP) | |||
addi r11, SP, 224 | |||
#endif | |||
lvx v20, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v21, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v22, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v23, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v24, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v25, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v26, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v27, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v28, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v29, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v30, r11, r3 | |||
addi r11, r11, 16 | |||
lvx v31, r11, r3 | |||
li r11, 0 | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||