Browse Source

LoongArch64: Update ssymv LASX version

tags/v0.3.29
gxw 8 months ago
parent
commit
20a8e48f25
2 changed files with 206 additions and 188 deletions
  1. +110
    -100
      kernel/loongarch64/ssymv_L_lasx.S
  2. +96
    -88
      kernel/loongarch64/ssymv_U_lasx.S

+ 110
- 100
kernel/loongarch64/ssymv_L_lasx.S View File

@@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER

#include "common.h"
#include "loongarch64_asm.S"

/* Param */
#define M $r4
@@ -57,6 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define T2 $r28
#define T3 $r29
#define T4 $r30
#define T5 $r17
#define T6 $r16

/* LSX vectors */
#define U0 $xr31
@@ -87,75 +90,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define a8 $f8
#define a9 $f9


PROLOGUE

LDARG BUFFER, $sp, 0

addi.d $sp, $sp, -88

SDARG $r23, $sp, 0
SDARG $r24, $sp, 8
SDARG $r25, $sp, 16
SDARG $r26, $sp, 32
SDARG $r27, $sp, 40
SDARG $r28, $sp, 48
SDARG $r29, $sp, 56
SDARG $r30, $sp, 64
SDARG $r31, $sp, 72
ST ALPHA, $sp, 80

xvldrepl.w VALPHA, $sp, 80

slli.d LDA, LDA, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT

bge $r0, M, .L999
bge $r0, N, .L999

move J, $r0
move JY, $r0
move JX, $r0
move AO1, A

beq J, N, .L999

.L01:
MTC a2, $r0 //temp2
fldx.s a6, X, JX
fmul.s a3, ALPHA, a6 //temp1
xvreplve0.w U3, U3
xvreplve0.w U2, U2

mul.w T0, J, LDA
slli.d T1, J, BASE_SHIFT
add.w T0, T0, T1
fldx.s a6, AO1, T0
fldx.s a4, Y, JY
fmadd.s a4, a3, a6, a4
fstx.s a4, Y, JY

move IY, JY
move IX, JX
addi.d II, J, 1
move I, II
slli.d II, II, BASE_SHIFT

sub.d T0, M, J
addi.d T0, T0, -1
srai.d T0, T0, 3
add.d T0, T0, J
addi.d T0, T0, 1
beq I, T0, .L03
bge I, T0, .L03

mul.w T1, J, LDA
add.d T1, T1, II

.L02: /* /8 */
xvldx U1, AO1, T1

.macro LOAD_Y_8
beqz T5, .L01_Y_0
add.d T2, IY, INCY
fldx.s $f4, Y, T2
add.d T2, T2, INCY
@@ -180,11 +116,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vextrins.w $vr8, $vr9, 0x10
vextrins.w $vr8, $vr10, 0x20
vextrins.w $vr8, $vr11, 0x30
xvpermi.q U4, U8, 0x02

xvfmadd.s U4, U3, U1, U4

xvpermi.d U8, U4, 0xee
xvpermi.q U4, U8, 0x02
b .L01_Y_1
.L01_Y_0:
add.d T3, IY, INCY
xvldx U4, Y, T3
.L01_Y_1:
.endm

.macro STORE_Y_8
beqz T5, .L01_Y_2
xvpermi.d U8, U4, 0xee
vextrins.w $vr5, $vr4, 0x01
vextrins.w $vr6, $vr4, 0x02
vextrins.w $vr7, $vr4, 0x03
@@ -209,10 +151,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstx.s $f10, Y, T2
add.d T2, T2, INCY
fstx.s $f11, Y, T2

slli.d T2, INCY, 3
add.d IY, IY, T2

b .L01_Y_3
.L01_Y_2:
xvstx U4, Y, T3
.L01_Y_3:
.endm

.macro LOAD_X_8
beqz T6, .L01_X_0
add.d T2, IX, INCX
fldx.s $f4, X, T2
add.d T2, T2, INCX
@@ -238,39 +184,103 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vextrins.w $vr8, $vr10, 0x20
vextrins.w $vr8, $vr11, 0x30
xvpermi.q U4, U8, 0x02
b .L01_X_1
.L01_X_0:
add.d T3, IX, INCX
xvldx U4, X, T3
.L01_X_1:
.endm

PROLOGUE

xvand.v $xr12, $xr2, $xr2
addi.d $sp, $sp, -88

xvfmadd.s U2, U1, U4, U2
xvfsub.s U2, U2, $xr12
SDARG $r23, $sp, 0
SDARG $r24, $sp, 8
SDARG $r25, $sp, 16
SDARG $r26, $sp, 32
SDARG $r27, $sp, 40
SDARG $r28, $sp, 48
SDARG $r29, $sp, 56
SDARG $r30, $sp, 64
SDARG $r31, $sp, 72
ST ALPHA, $sp, 80

xvpickve.w U4, U2, 0x01
xvpickve.w U5, U2, 0x02
xvpickve.w U6, U2, 0x03
xvpickve.w U7, U2, 0x04
xvpickve.w U8, U2, 0x05
xvpickve.w U9, U2, 0x06
xvpickve.w U10, U2, 0x07
xvldrepl.w VALPHA, $sp, 80

fadd.s $f2, $f2, $f4
fadd.s $f2, $f2, $f5
fadd.s $f2, $f2, $f6
fadd.s $f2, $f2, $f7
fadd.s $f2, $f2, $f8
fadd.s $f2, $f2, $f9
fadd.s $f2, $f2, $f10
fadd.s $f2, $f2, $f12
addi.d T5, INCY, -1
addi.d T6, INCX, -1
slli.d LDA, LDA, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT

xvreplve0.d U2, U2
bge $r0, M, .L999
bge $r0, N, .L999

move J, $r0
move JY, $r0
move JX, $r0
move AO1, A

slli.d T2, INCX, 3
add.d IX, IX, T2
beq J, N, .L999

.L01:
xvxor.v U2, U2, U2
fldx.s a6, X, JX
fmul.s a3, ALPHA, a6 //temp1
xvreplve0.w U3, U3

mul.w T0, J, LDA
slli.d T1, J, BASE_SHIFT
add.w T0, T0, T1
fldx.s a6, AO1, T0
fldx.s a4, Y, JY
fmadd.s a4, a3, a6, a4
fstx.s a4, Y, JY

move IY, JY
move IX, JX
addi.d II, J, 1
move I, II
slli.d II, II, BASE_SHIFT

sub.d T0, M, J
addi.d T0, T0, -1
srai.d T0, T0, 3
add.d T0, T0, J
addi.d T0, T0, 1
beq I, T0, .L03
bge I, T0, .L03

mul.w T1, J, LDA
add.d T1, T1, II

.L02: /* /8 */
xvldx U1, AO1, T1

LOAD_Y_8

xvfmadd.s U4, U3, U1, U4

STORE_Y_8

alsl.d IY, INCY, IY, 3

LOAD_X_8

xvfmadd.s U2, U1, U4, U2

alsl.d IX, INCX, IX, 3

addi.d II, II, 32
addi.d T1, T1, 32
addi.d I, I, 1
blt I, T0, .L02

//Acc U2
GACC xvf, s, U4, U2
fmov.d $f2, $f4

.L03: /* &4 */
sub.d T0, M, J
addi.d T0, T0, -1
@@ -433,4 +443,4 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d $sp, $sp, 88
jirl $r0, $r1, 0x0

EPILOGUE
EPILOGUE

+ 96
- 88
kernel/loongarch64/ssymv_U_lasx.S View File

@@ -28,6 +28,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ASSEMBLER

#include "common.h"
#include "loongarch64_asm.S"

/* Param */
#define M $r4
@@ -57,6 +58,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define T2 $r28
#define T3 $r29
#define T4 $r30
#define T5 $r17
#define T6 $r16

/* LSX vectors */
#define U0 $xr31
@@ -87,64 +90,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define a8 $f8
#define a9 $f9


PROLOGUE

LDARG BUFFER, $sp, 0

addi.d $sp, $sp, -88

SDARG $r23, $sp, 0
SDARG $r24, $sp, 8
SDARG $r25, $sp, 16
SDARG $r26, $sp, 32
SDARG $r27, $sp, 40
SDARG $r28, $sp, 48
SDARG $r29, $sp, 56
SDARG $r30, $sp, 64
SDARG $r31, $sp, 72
ST ALPHA, $sp, 80

xvldrepl.w VALPHA, $sp, 80

slli.d LDA, LDA, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT

bge $r0, M, .L999
bge $r0, N, .L999

sub.d M1, M, N

mul.d JY, M1, INCY
mul.d JX, M1, INCX

move J, M1
move AO1, A

beq J, M, .L999

.L01:
MTC $f2, $r0 //temp2
fldx.s $f6, X, JX
fmul.s $f3, ALPHA, $f6 //temp1
xvreplve0.w U3, U3
xvreplve0.w U2, U2

move IY, $r0
move IX, $r0
move II, $r0
move I, $r0

srai.d T0, J, 3
beq I, T0, .L03

mul.w T1, J, LDA
add.d T1, T1, II

.L02: /* /8 */
xvldx U1, AO1, T1

.macro LOAD_Y_8
beqz T5, .L01_Y_0
fldx.s $f4, Y, IY
add.d T2, IY, INCY
fldx.s $f5, Y, T2
@@ -168,10 +115,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vextrins.w $vr8, $vr9, 0x10
vextrins.w $vr8, $vr10, 0x20
vextrins.w $vr8, $vr11, 0x30
xvpermi.q U4, U8, 0x02

xvfmadd.s U4, U3, U1, U4

xvpermi.q U4, U8, 0x02
b .L01_Y_1
.L01_Y_0:
xvldx U4, Y, IY
.L01_Y_1:
.endm

.macro STORE_Y_8
beqz T5, .L01_Y_2
xvpermi.d U8, U4, 0xee
vextrins.w $vr5, $vr4, 0x01
vextrins.w $vr6, $vr4, 0x02
@@ -196,10 +148,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstx.s $f10, Y, T2
add.d T2, T2, INCY
fstx.s $f11, Y, T2

slli.d T2, INCY, 3
add.d IY, IY, T2

b .L01_Y_3
.L01_Y_2:
xvstx U4, Y, IY
.L01_Y_3:
.endm

.macro LOAD_X_8
beqz T6, .L01_X_0
fldx.s $f4, X, IX
add.d T2, IX, INCX
fldx.s $f5, X, T2
@@ -224,39 +180,91 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vextrins.w $vr8, $vr10, 0x20
vextrins.w $vr8, $vr11, 0x30
xvpermi.q U4, U8, 0x02
b .L01_X_1
.L01_X_0:
xvldx U4, X, IX
.L01_X_1:
.endm

PROLOGUE

xvand.v $xr12, $xr2, $xr2
addi.d $sp, $sp, -88

xvfmadd.s U2, U1, U4, U2
xvfsub.s U2, U2, $xr12
SDARG $r23, $sp, 0
SDARG $r24, $sp, 8
SDARG $r25, $sp, 16
SDARG $r26, $sp, 32
SDARG $r27, $sp, 40
SDARG $r28, $sp, 48
SDARG $r29, $sp, 56
SDARG $r30, $sp, 64
SDARG $r31, $sp, 72
ST ALPHA, $sp, 80

xvpickve.w U4, U2, 0x01
xvpickve.w U5, U2, 0x02
xvpickve.w U6, U2, 0x03
xvpickve.w U7, U2, 0x04
xvpickve.w U8, U2, 0x05
xvpickve.w U9, U2, 0x06
xvpickve.w U10, U2, 0x07
xvldrepl.w VALPHA, $sp, 80

fadd.s $f2, $f2, $f4
fadd.s $f2, $f2, $f5
fadd.s $f2, $f2, $f6
fadd.s $f2, $f2, $f7
fadd.s $f2, $f2, $f8
fadd.s $f2, $f2, $f9
fadd.s $f2, $f2, $f10
fadd.s $f2, $f2, $f12
addi.d T5, INCY, -1
addi.d T6, INCX, -1
slli.d LDA, LDA, BASE_SHIFT
slli.d INCX, INCX, BASE_SHIFT
slli.d INCY, INCY, BASE_SHIFT

xvreplve0.d U2, U2
bge $r0, M, .L999
bge $r0, N, .L999

sub.d M1, M, N

mul.d JY, M1, INCY
mul.d JX, M1, INCX

move J, M1
move AO1, A

beq J, M, .L999

.L01:
xvxor.v U2, U2, U2
fldx.s $f6, X, JX
fmul.s $f3, ALPHA, $f6 //temp1
xvreplve0.w U3, U3

slli.d T2, INCX, 3
add.d IX, IX, T2
move IY, $r0
move IX, $r0
move II, $r0
move I, $r0

srai.d T0, J, 3
beq I, T0, .L03

mul.w T1, J, LDA
add.d T1, T1, II

.L02: /* /8 */
xvldx U1, AO1, T1

LOAD_Y_8

xvfmadd.s U4, U3, U1, U4

STORE_Y_8

alsl.d IY, INCY, IY, 3

LOAD_X_8

xvfmadd.s U2, U1, U4, U2

alsl.d IX, INCX, IX, 3

addi.d II, II, 32
addi.d T1, T1, 32
addi.d I, I, 1
blt I, T0, .L02

//Acc U2
GACC xvf, s, U4, U2
fmov.d $f2, $f4

.L03: /* &4 */
andi T0, J, 4
beq $r0, T0, .L04
@@ -421,4 +429,4 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
addi.d $sp, $sp, 88
jirl $r0, $r1, 0x0

EPILOGUE
EPILOGUE

Loading…
Cancel
Save