|
@@ -3,7 +3,7 @@ |
|
|
#include "common.h" |
|
|
#include "common.h" |
|
|
|
|
|
|
|
|
#define FETCH ld |
|
|
#define FETCH ld |
|
|
#define STACKSIZE 192 |
|
|
|
|
|
|
|
|
#define STACKSIZE 160 |
|
|
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) |
|
|
#define gsLQC1(base,fq,ft,offset) .word(0x32<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) |
|
|
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) |
|
|
#define gsSQC1(base,fq,ft,offset) .word(0x3A<<26|base<<21|ft<<16|0x1<<15|offset<<6|0x1<<5|fq) |
|
|
|
|
|
|
|
@@ -127,7 +127,7 @@ |
|
|
# .ent gemm |
|
|
# .ent gemm |
|
|
# .type gemm, @function |
|
|
# .type gemm, @function |
|
|
#gemm: |
|
|
#gemm: |
|
|
# .frame $fp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 |
|
|
|
|
|
|
|
|
# .frame $sp,STACKSIZE,$31 # vars= 48, regs= 1/0, args= 0, gp= 0 |
|
|
# .mask 0x40000000,-8 |
|
|
# .mask 0x40000000,-8 |
|
|
# .fmask 0x00000000,0 |
|
|
# .fmask 0x00000000,0 |
|
|
# .set noreorder |
|
|
# .set noreorder |
|
@@ -137,34 +137,34 @@ |
|
|
PROLOGUE |
|
|
PROLOGUE |
|
|
|
|
|
|
|
|
daddiu $sp,$sp,-STACKSIZE |
|
|
daddiu $sp,$sp,-STACKSIZE |
|
|
sd $fp,184($sp) |
|
|
|
|
|
move $fp,$sp |
|
|
|
|
|
|
|
|
|
|
|
sd $16, 0($fp) |
|
|
|
|
|
sd $17, 8($fp) |
|
|
|
|
|
sd $18, 16($fp) |
|
|
|
|
|
sd $19, 24($fp) |
|
|
|
|
|
sd $20, 32($fp) |
|
|
|
|
|
sd $21, 40($fp) |
|
|
|
|
|
sd $22, 48($fp) |
|
|
|
|
|
|
|
|
|
|
|
ST $f24, 56($fp) |
|
|
|
|
|
ST $f25, 64($fp) |
|
|
|
|
|
ST $f26, 72($fp) |
|
|
|
|
|
ST $f27, 80($fp) |
|
|
|
|
|
ST $f28, 88($fp) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
sd $16, 0($sp) |
|
|
|
|
|
sd $17, 8($sp) |
|
|
|
|
|
sd $18, 16($sp) |
|
|
|
|
|
sd $19, 24($sp) |
|
|
|
|
|
sd $20, 32($sp) |
|
|
|
|
|
sd $21, 40($sp) |
|
|
|
|
|
sd $22, 48($sp) |
|
|
|
|
|
|
|
|
|
|
|
ST $f24, 56($sp) |
|
|
|
|
|
ST $f25, 64($sp) |
|
|
|
|
|
ST $f26, 72($sp) |
|
|
|
|
|
ST $f27, 80($sp) |
|
|
|
|
|
ST $f28, 88($sp) |
|
|
|
|
|
|
|
|
#if defined(TRMMKERNEL) |
|
|
#if defined(TRMMKERNEL) |
|
|
sd $23, 96($fp) |
|
|
|
|
|
sd $24, 104($fp) |
|
|
|
|
|
sd $25, 112($fp) |
|
|
|
|
|
|
|
|
sd $23, 96($sp) |
|
|
|
|
|
sd $24, 104($sp) |
|
|
|
|
|
sd $25, 112($sp) |
|
|
|
|
|
|
|
|
|
|
|
LDARG OFFSET, 160($sp) |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
#ifndef __64BIT__ |
|
|
#ifndef __64BIT__ |
|
|
ST $f20,120($fp) |
|
|
|
|
|
ST $f21,128($fp) |
|
|
|
|
|
ST $f22,136($fp) |
|
|
|
|
|
ST $f23,144($fp) |
|
|
|
|
|
|
|
|
ST $f20,120($sp) |
|
|
|
|
|
ST $f21,128($sp) |
|
|
|
|
|
ST $f22,136($sp) |
|
|
|
|
|
ST $f23,144($sp) |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
.align 4 |
|
|
.align 4 |
|
@@ -172,16 +172,12 @@ |
|
|
dsra J, N, 2 # NR=4 |
|
|
dsra J, N, 2 # NR=4 |
|
|
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE |
|
|
dsll LDC, LDC, BASE_SHIFT# LDC*SIZE |
|
|
|
|
|
|
|
|
#if defined(TRMMKERNEL) |
|
|
|
|
|
LD OFFSET, 192($fp) |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
#if defined(TRMMKERNEL) && !defined(LEFT) |
|
|
#if defined(TRMMKERNEL) && !defined(LEFT) |
|
|
neg KK, OFFSET |
|
|
neg KK, OFFSET |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
blez J, .L2 |
|
|
blez J, .L2 |
|
|
ST ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
ST ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
.L48: |
|
|
.L48: |
|
|
dsra I, M, 3 # MR=8 |
|
|
dsra I, M, 3 # MR=8 |
|
@@ -4670,7 +4666,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L480 |
|
|
blez L, .L480 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C21, C21, A2, B1 |
|
|
MADPS C21, C21, A2, B1 |
|
@@ -5273,7 +5269,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L440 |
|
|
blez L, .L440 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C21, C21, A2, B1 |
|
|
MADPS C21, C21, A2, B1 |
|
@@ -5653,7 +5649,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L420 |
|
|
blez L, .L420 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C11, C11, A1, B1 |
|
|
MADPS C12, C12, A1, B2 |
|
|
MADPS C12, C12, A1, B2 |
|
@@ -5968,7 +5964,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L410 |
|
|
blez L, .L410 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C12, C12, A1, B2 |
|
|
MADD C12, C12, A1, B2 |
|
@@ -6258,7 +6254,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L280 |
|
|
blez L, .L280 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C13, C13, A5, B1 |
|
|
MADD C13, C13, A5, B1 |
|
|
MADD C23, C23, A6, B1 |
|
|
MADD C23, C23, A6, B1 |
|
@@ -6574,7 +6570,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L240 |
|
|
blez L, .L240 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C21, C21, A2, B1 |
|
|
MADD C21, C21, A2, B1 |
|
@@ -6784,7 +6780,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L220 |
|
|
blez L, .L220 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C21, C21, A2, B1 |
|
|
MADD C21, C21, A2, B1 |
|
@@ -6953,7 +6949,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L210 |
|
|
blez L, .L210 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C12, C12, A1, B2 |
|
|
MADD C12, C12, A1, B2 |
|
@@ -7204,7 +7200,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L180 |
|
|
blez L, .L180 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C13, C13, A5, B1 |
|
|
MADD C13, C13, A5, B1 |
|
|
MADD C23, C23, A6, B1 |
|
|
MADD C23, C23, A6, B1 |
|
@@ -7435,7 +7431,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L140 |
|
|
blez L, .L140 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C21, C21, A2, B1 |
|
|
MADD C21, C21, A2, B1 |
|
@@ -7597,7 +7593,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L120 |
|
|
blez L, .L120 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C21, C21, A2, B1 |
|
|
MADD C21, C21, A2, B1 |
|
@@ -7730,7 +7726,7 @@ |
|
|
andi L, TEMP, 1 |
|
|
andi L, TEMP, 1 |
|
|
#endif |
|
|
#endif |
|
|
blez L, .L110 |
|
|
blez L, .L110 |
|
|
LD ALPHA, 152($fp) |
|
|
|
|
|
|
|
|
LD ALPHA, 152($sp) |
|
|
|
|
|
|
|
|
MADD C11, C11, A1, B1 |
|
|
MADD C11, C11, A1, B1 |
|
|
daddiu AO, AO, 1 * SIZE |
|
|
daddiu AO, AO, 1 * SIZE |
|
@@ -7762,35 +7758,33 @@ |
|
|
NOP |
|
|
NOP |
|
|
|
|
|
|
|
|
.L999: |
|
|
.L999: |
|
|
ld $16, 0($fp) |
|
|
|
|
|
ld $17, 8($fp) |
|
|
|
|
|
ld $18, 16($fp) |
|
|
|
|
|
ld $19, 24($fp) |
|
|
|
|
|
ld $20, 32($fp) |
|
|
|
|
|
ld $21, 40($fp) |
|
|
|
|
|
ld $22, 48($fp) |
|
|
|
|
|
|
|
|
|
|
|
LD $f24, 56($fp) |
|
|
|
|
|
LD $f25, 64($fp) |
|
|
|
|
|
LD $f26, 72($fp) |
|
|
|
|
|
LD $f27, 80($fp) |
|
|
|
|
|
LD $f28, 88($fp) |
|
|
|
|
|
|
|
|
ld $16, 0($sp) |
|
|
|
|
|
ld $17, 8($sp) |
|
|
|
|
|
ld $18, 16($sp) |
|
|
|
|
|
ld $19, 24($sp) |
|
|
|
|
|
ld $20, 32($sp) |
|
|
|
|
|
ld $21, 40($sp) |
|
|
|
|
|
ld $22, 48($sp) |
|
|
|
|
|
|
|
|
|
|
|
LD $f24, 56($sp) |
|
|
|
|
|
LD $f25, 64($sp) |
|
|
|
|
|
LD $f26, 72($sp) |
|
|
|
|
|
LD $f27, 80($sp) |
|
|
|
|
|
LD $f28, 88($sp) |
|
|
|
|
|
|
|
|
#if defined(TRMMKERNEL) |
|
|
#if defined(TRMMKERNEL) |
|
|
ld $23, 96($fp) |
|
|
|
|
|
ld $24, 104($fp) |
|
|
|
|
|
ld $25, 112($fp) |
|
|
|
|
|
|
|
|
ld $23, 96($sp) |
|
|
|
|
|
ld $24, 104($sp) |
|
|
|
|
|
ld $25, 112($sp) |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
#ifndef __64BIT__ |
|
|
#ifndef __64BIT__ |
|
|
LD $f20,120($fp) |
|
|
|
|
|
LD $f21,128($fp) |
|
|
|
|
|
LD $f22,136($fp) |
|
|
|
|
|
LD $f23,144($fp) |
|
|
|
|
|
|
|
|
LD $f20,120($sp) |
|
|
|
|
|
LD $f21,128($sp) |
|
|
|
|
|
LD $f22,136($sp) |
|
|
|
|
|
LD $f23,144($sp) |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
move $sp,$fp |
|
|
|
|
|
ld $fp,184($sp) |
|
|
|
|
|
daddiu $sp,$sp,STACKSIZE |
|
|
daddiu $sp,$sp,STACKSIZE |
|
|
j $31 |
|
|
j $31 |
|
|
nop |
|
|
nop |
|
|