Browse Source

expressly use fld.d/fst.d for floating point registers instead of LD/ST macros

tags/v0.3.29
Martin Kroeker GitHub 1 year ago
parent
commit
e05d98d00a
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
3 changed files with 61 additions and 61 deletions
  1. +21
    -21
      kernel/loongarch64/cgemm_kernel_16x4_lasx.S
  2. +19
    -19
      kernel/loongarch64/dgemm_kernel_16x4.S
  3. +21
    -21
      kernel/loongarch64/zgemm_kernel_8x4_lasx.S

+ 21
- 21
kernel/loongarch64/cgemm_kernel_16x4_lasx.S View File

@@ -196,17 +196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA_R,$sp, 112
ST ALPHA_I,$sp, 120
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA_R,$sp, 112
fst.d ALPHA_I,$sp, 120

xvldrepl.w VALPHAR, $sp, 112
xvldrepl.w VALPHAI, $sp, 120
@@ -3741,17 +3741,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104

addi.d $sp, $sp, 128
jirl $r0, $r1, 0x0

EPILOGUE
EPILOGUE

+ 19
- 19
kernel/loongarch64/dgemm_kernel_16x4.S View File

@@ -1098,16 +1098,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA, $sp, 112
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA, $sp, 112

#if defined (TRMMKERNEL) && !defined(LEFT)
sub.d OFF, ZERO, OFFSET
@@ -3504,15 +3504,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104
addi.d $sp, $sp, 120

jirl $r0, $r1, 0x0


+ 21
- 21
kernel/loongarch64/zgemm_kernel_8x4_lasx.S View File

@@ -196,17 +196,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SDARG $r25, $sp, 16
SDARG $r26, $sp, 24
SDARG $r27, $sp, 32
ST $f23, $sp, 40
ST $f24, $sp, 48
ST $f25, $sp, 56
ST $f26, $sp, 64
ST $f27, $sp, 72
ST $f28, $sp, 80
ST $f29, $sp, 88
ST $f30, $sp, 96
ST $f31, $sp, 104
ST ALPHA_R,$sp, 112
ST ALPHA_I,$sp, 120
fst.d $f23, $sp, 40
fst.d $f24, $sp, 48
fst.d $f25, $sp, 56
fst.d $f26, $sp, 64
fst.d $f27, $sp, 72
fst.d $f28, $sp, 80
fst.d $f29, $sp, 88
fst.d $f30, $sp, 96
fst.d $f31, $sp, 104
fst.d ALPHA_R,$sp, 112
fst.d ALPHA_I,$sp, 120

xvldrepl.d VALPHAR, $sp, 112
xvldrepl.d VALPHAI, $sp, 120
@@ -3529,17 +3529,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
LDARG $r25, $sp, 16
LDARG $r26, $sp, 24
LDARG $r27, $sp, 32
LD $f23, $sp, 40
LD $f24, $sp, 48
LD $f25, $sp, 56
LD $f26, $sp, 64
LD $f27, $sp, 72
LD $f28, $sp, 80
LD $f29, $sp, 88
LD $f30, $sp, 96
LD $f31, $sp, 104
fld.d $f23, $sp, 40
fld.d $f24, $sp, 48
fld.d $f25, $sp, 56
fld.d $f26, $sp, 64
fld.d $f27, $sp, 72
fld.d $f28, $sp, 80
fld.d $f29, $sp, 88
fld.d $f30, $sp, 96
fld.d $f31, $sp, 104

addi.d $sp, $sp, 128
jirl $r0, $r1, 0x0

EPILOGUE
EPILOGUE

Loading…
Cancel
Save