@@ -798,7 +798,7 @@ Lmcount$lazy_ptr: | |||
#elif defined(PPC440FP2) | |||
#define BUFFER_SIZE ( 16 << 20) | |||
#elif defined(POWER8) | |||
#define BUFFER_SIZE ( 64 << 20) | |||
#define BUFFER_SIZE ( 32 << 20) | |||
#else | |||
#define BUFFER_SIZE ( 16 << 20) | |||
#endif | |||
@@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 512 | |||
#define STACKSIZE 32000 | |||
#define ALPHA_R_SP 296(SP) | |||
#define ALPHA_I_SP 304(SP) | |||
#define FZERO 312(SP) | |||
@@ -136,6 +136,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define alpha_sr vs30 | |||
#define alpha_si vs31 | |||
#define FRAMEPOINTER r12 | |||
#define BBUFFER r14 | |||
#define L r15 | |||
@@ -161,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
PROLOGUE | |||
PROFCODE | |||
mr FRAMEPOINTER, SP | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
@@ -233,37 +238,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifdef linux | |||
#ifdef __64BIT__ | |||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#endif | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#ifdef __64BIT__ | |||
ld LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
ld LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#else | |||
#ifdef DOUBLE | |||
lwz B, FRAMESLOT(0) + STACKSIZE(SP) | |||
lwz C, FRAMESLOT(1) + STACKSIZE(SP) | |||
lwz LDC, FRAMESLOT(2) + STACKSIZE(SP) | |||
lwz B, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
lwz C, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
lwz LDC, FRAMESLOT(2) + 0(FRAMEPOINTER) | |||
#else | |||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#endif | |||
#endif | |||
#endif | |||
#ifdef TRMMKERNEL | |||
#if defined(linux) && defined(__64BIT__) | |||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#ifdef __64BIT__ | |||
ld OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
ld OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
#else | |||
#ifdef DOUBLE | |||
lwz OFFSET, FRAMESLOT(3) + STACKSIZE(SP) | |||
lwz OFFSET, FRAMESLOT(3) + 0(FRAMEPOINTER) | |||
#else | |||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
#endif | |||
#endif | |||
#endif | |||
@@ -290,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
li o32 , 32 | |||
li o48 , 48 | |||
li T1, 512 | |||
slwi T1, T1, 16 | |||
add BBUFFER, A, T1 | |||
addi BBUFFER, SP, 512+4096 | |||
li T1, -4096 | |||
and BBUFFER, BBUFFER, T1 | |||
#ifdef __64BIT__ | |||
@@ -392,6 +397,9 @@ L999: | |||
#endif | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
blr | |||
@@ -82,7 +82,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 512 | |||
#define STACKSIZE 32752 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#else | |||
@@ -132,6 +132,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define o0 0 | |||
#define FRAMEPOINTER r12 | |||
#define BBUFFER r14 | |||
#define o4 r15 | |||
#define o12 r16 | |||
@@ -160,6 +162,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
PROLOGUE | |||
PROFCODE | |||
mr FRAMEPOINTER, SP | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
@@ -231,7 +237,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#if !defined(__64BIT__) && defined(DOUBLE) | |||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
lwz LDC, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#endif | |||
#endif | |||
@@ -239,17 +245,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#if defined(TRMMKERNEL) | |||
#if defined(linux) && defined(__64BIT__) | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#ifdef __64BIT__ | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
ld OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#else | |||
#ifdef DOUBLE | |||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
lwz OFFSET, FRAMESLOT(1) + 0(FRAMEPOINTER) | |||
#else | |||
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
lwz OFFSET, FRAMESLOT(0) + 0(FRAMEPOINTER) | |||
#endif | |||
#endif | |||
#endif | |||
@@ -271,9 +277,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
li o32, 32 | |||
li o48, 48 | |||
li T1, 512 | |||
slwi T1, T1, 16 | |||
add BBUFFER, A, T1 | |||
addi BBUFFER, SP, 512+4096 | |||
li T1, -4096 | |||
and BBUFFER, BBUFFER, T1 | |||
addi T1, SP, 300 | |||
stxsspx f1, o0 , T1 | |||
@@ -355,6 +361,9 @@ L999: | |||
#endif | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
addi SP, SP, STACKSIZE | |||
blr | |||
@@ -1964,8 +1964,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define SNUMOPT 16 | |||
#define DNUMOPT 8 | |||
#define GEMM_DEFAULT_OFFSET_A 131072 | |||
#define GEMM_DEFAULT_OFFSET_B 131072 | |||
#define GEMM_DEFAULT_OFFSET_A 4096 | |||
#define GEMM_DEFAULT_OFFSET_B 4096 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||
@@ -1987,9 +1987,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CGEMM_DEFAULT_Q 720 | |||
#define ZGEMM_DEFAULT_Q 720 | |||
#define SGEMM_DEFAULT_R 14400 | |||
#define SGEMM_DEFAULT_R 21600 | |||
#define DGEMM_DEFAULT_R 14400 | |||
#define CGEMM_DEFAULT_R 14400 | |||
#define CGEMM_DEFAULT_R 16200 | |||
#define ZGEMM_DEFAULT_R 14400 | |||
#define SYMV_P 8 | |||