Browse Source

Merge pull request #6 from xianyi/develop

merge develop
tags/v0.3.4
Martin Kroeker GitHub 7 years ago
parent
commit
41951da6d4
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
63 changed files with 2845 additions and 1172 deletions
  1. +2
    -2
      .travis.yml
  2. +3
    -0
      Makefile.rule
  3. +1
    -1
      c_check
  4. +5
    -0
      cmake/fc.cmake
  5. +2
    -1
      cmake/openblas.pc.in
  6. +1
    -1
      cmake/system_check.cmake
  7. +2
    -2
      driver/others/memory.c
  8. +2
    -2
      exports/Makefile
  9. +1
    -1
      interface/lapack/laswp.c
  10. +1
    -1
      interface/lapack/zlaswp.c
  11. +38
    -38
      kernel/arm/asum_vfp.S
  12. +62
    -62
      kernel/arm/axpy_vfp.S
  13. +14
    -14
      kernel/arm/ccopy_vfp.S
  14. +20
    -20
      kernel/arm/cdot_vfp.S
  15. +22
    -22
      kernel/arm/cgemm_kernel_2x2_vfp.S
  16. +32
    -32
      kernel/arm/cgemm_kernel_2x2_vfpv3.S
  17. +4
    -4
      kernel/arm/cgemm_ncopy_2_vfp.S
  18. +10
    -10
      kernel/arm/cgemm_tcopy_2_vfp.S
  19. +16
    -16
      kernel/arm/cgemv_n_vfp.S
  20. +20
    -20
      kernel/arm/cgemv_t_vfp.S
  21. +16
    -16
      kernel/arm/ctrmm_kernel_2x2_vfp.S
  22. +26
    -26
      kernel/arm/ctrmm_kernel_2x2_vfpv3.S
  23. +14
    -14
      kernel/arm/dcopy_vfp.S
  24. +20
    -20
      kernel/arm/ddot_vfp.S
  25. +4
    -4
      kernel/arm/dgemm_kernel_4x4_vfpv3.S
  26. +4
    -4
      kernel/arm/dgemm_ncopy_2_vfp.S
  27. +8
    -8
      kernel/arm/dgemm_ncopy_4_vfp.S
  28. +30
    -30
      kernel/arm/dgemm_tcopy_4_vfp.S
  29. +13
    -13
      kernel/arm/dtrmm_kernel_4x4_vfpv3.S
  30. +50
    -50
      kernel/arm/gemv_n_vfp.S
  31. +60
    -60
      kernel/arm/gemv_n_vfpv3.S
  32. +84
    -84
      kernel/arm/gemv_t_vfp.S
  33. +84
    -84
      kernel/arm/gemv_t_vfpv3.S
  34. +16
    -16
      kernel/arm/iamax_vfp.S
  35. +8
    -8
      kernel/arm/nrm2_vfp.S
  36. +8
    -8
      kernel/arm/nrm2_vfpv3.S
  37. +112
    -112
      kernel/arm/rot_vfp.S
  38. +38
    -38
      kernel/arm/scal_vfp.S
  39. +16
    -16
      kernel/arm/scopy_vfp.S
  40. +36
    -36
      kernel/arm/sdot_vfp.S
  41. +2
    -2
      kernel/arm/sgemm_kernel_4x2_vfp.S
  42. +20
    -20
      kernel/arm/sgemm_kernel_4x4_vfpv3.S
  43. +4
    -4
      kernel/arm/sgemm_ncopy_2_vfp.S
  44. +8
    -8
      kernel/arm/sgemm_ncopy_4_vfp.S
  45. +35
    -35
      kernel/arm/sgemm_tcopy_4_vfp.S
  46. +2
    -2
      kernel/arm/strmm_kernel_4x2_vfp.S
  47. +17
    -17
      kernel/arm/strmm_kernel_4x4_vfpv3.S
  48. +56
    -56
      kernel/arm/swap_vfp.S
  49. +14
    -14
      kernel/arm/zcopy_vfp.S
  50. +20
    -20
      kernel/arm/zdot_vfp.S
  51. +12
    -12
      kernel/arm/zgemm_kernel_2x2_vfp.S
  52. +12
    -12
      kernel/arm/zgemm_kernel_2x2_vfpv3.S
  53. +4
    -4
      kernel/arm/zgemm_ncopy_2_vfp.S
  54. +10
    -10
      kernel/arm/zgemm_tcopy_2_vfp.S
  55. +16
    -16
      kernel/arm/zgemv_n_vfp.S
  56. +20
    -20
      kernel/arm/zgemv_t_vfp.S
  57. +6
    -6
      kernel/arm/ztrmm_kernel_2x2_vfp.S
  58. +6
    -6
      kernel/arm/ztrmm_kernel_2x2_vfpv3.S
  59. +14
    -0
      kernel/mips64/axpy_loongson3a.S
  60. +14
    -0
      kernel/mips64/daxpy_loongson3a_simd.S
  61. +5
    -11
      kernel/x86_64/KERNEL.SKYLAKEX
  62. +1642
    -0
      kernel/x86_64/dgemm_kernel_4x8_skylakex.c
  63. +1
    -1
      utest/test_fork.c

+ 2
- 2
.travis.yml View File

@@ -85,8 +85,8 @@ jobs:
sudo: true
language: minimal
before_install:
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.6.0/alpine-chroot-install' \
&& echo 'a827a4ba3d0817e7c88bae17fe34e50204983d1e alpine-chroot-install' | sha1sum -c || exit 1"
- "wget 'https://raw.githubusercontent.com/alpinelinux/alpine-chroot-install/v0.9.0/alpine-chroot-install' \
&& echo 'e5dfbbdc0c4b3363b99334510976c86bfa6cb251 alpine-chroot-install' | sha1sum -c || exit 1"
- alpine() { /alpine/enter-chroot -u "$USER" "$@"; }
install:
- sudo sh alpine-chroot-install -p 'build-base gfortran perl linux-headers'


+ 3
- 0
Makefile.rule View File

@@ -152,6 +152,9 @@ NO_AFFINITY = 1
# FUNCTION_PROFILE = 1

# Support for IEEE quad precision(it's *real* REAL*16)( under testing)
# This option should not be used - it is a holdover from unfinished code present
# in the original GotoBLAS2 library that may be usable as a starting point but
# is not even expected to compile in its present form.
# QUAD_PRECISION = 1

# Theads are still working for a while after finishing BLAS operation


+ 1
- 1
c_check View File

@@ -205,7 +205,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/);
$no_avx512= 0;
if (($architecture eq "x86") || ($architecture eq "x86_64")) {
$code = '"vbroadcastss -4 * 4(%rsi), %zmm2"';
print $tmpf "int main(void){ __asm__ volatile($code); }\n";
print $tmpf "#include <immintrin.h>\n\nint main(void){ __asm__ volatile($code); }\n";
$args = " -march=skylake-avx512 -o $tmpf.o -x c $tmpf";
my @cmd = ("$compiler_name $args >/dev/null 2>/dev/null");
system(@cmd) == 0;


+ 5
- 0
cmake/fc.cmake View File

@@ -3,6 +3,11 @@
## Description: Ported from portion of OpenBLAS/Makefile.system
## Sets Fortran related variables.

if (INTERFACE64)
set(SUFFIX64 64)
set(SUFFIX64_UNDERSCORE _64)
endif()

if (${F_COMPILER} STREQUAL "FLANG")
set(CCOMMON_OPT "${CCOMMON_OPT} -DF_INTERFACE_FLANG")
if (BINARY64 AND INTERFACE64)


+ 2
- 1
cmake/openblas.pc.in View File

@@ -1,4 +1,5 @@
libdir=@CMAKE_INSTALL_FULL_LIBDIR@
libsuffix=@SUFFIX64_UNDERSCORE@
includedir=@CMAKE_INSTALL_FULL_INCLUDEDIR@

openblas_config=USE_64BITINT=@USE_64BITINT@ NO_CBLAS=@NO_CBLAS@ NO_LAPACK=@NO_LAPACK@ NO_LAPACKE=@NO_LAPACKE@ DYNAMIC_ARCH=@DYNAMIC_ARCH@ DYNAMIC_OLDER=@DYNAMIC_OLDER@ NO_AFFINITY=@NO_AFFINITY@ USE_OPENMP=@USE_OPENMP@ @CORE@ MAX_THREADS=@NUM_THREADS@
@@ -6,5 +7,5 @@ Name: OpenBLAS
Description: OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version
Version: @OPENBLAS_VERSION@
URL: https://github.com/xianyi/OpenBLAS
Libs: -L${libdir} -lopenblas
Libs: -L${libdir} -lopenblas${libsuffix}
Cflags: -I${includedir}

+ 1
- 1
cmake/system_check.cmake View File

@@ -67,7 +67,7 @@ else()
endif()

if (X86_64 OR X86)
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "int main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
file(WRITE ${PROJECT_BINARY_DIR}/avx512.tmp "#include <immintrin.h>\n\nint main(void){ __asm__ volatile(\"vbroadcastss -4 * 4(%rsi), %zmm2\"); }")
execute_process(COMMAND ${CMAKE_C_COMPILER} -march=skylake-avx512 -v -o ${PROJECT_BINARY_DIR}/avx512.o -x c ${PROJECT_BINARY_DIR}/avx512.tmp OUTPUT_QUIET ERROR_QUIET RESULT_VARIABLE NO_AVX512)
if (NO_AVX512 EQUAL 1)
set (CCOMMON_OPT "${CCOMMON_OPT} -DNO_AVX512")


+ 2
- 2
driver/others/memory.c View File

@@ -2587,20 +2587,20 @@ void *blas_memory_alloc(int procpos){

position = 0;

LOCK_COMMAND(&alloc_lock);
do {
/* if (!memory[position].used) { */
LOCK_COMMAND(&alloc_lock);
/* blas_lock(&memory[position].lock);*/

if (!memory[position].used) goto allocation;
UNLOCK_COMMAND(&alloc_lock);
/* blas_unlock(&memory[position].lock);*/
/* } */

position ++;

} while (position < NUM_BUFFERS);
UNLOCK_COMMAND(&alloc_lock);

goto error;



+ 2
- 2
exports/Makefile View File

@@ -114,9 +114,9 @@ $(LIBDYNNAME) : ../$(LIBNAME).osx.renamed osx.def
endif
ifneq (,$(filter 1 2,$(NOFORTRAN)))
#only build without Fortran
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
$(CC) $(CFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
else
$(FC) $(FFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
$(FC) $(FFLAGS) $(LDFLAGS) -all_load -headerpad_max_install_names -install_name "$(CURDIR)/../$(LIBDYNNAME)" -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB)
endif

dllinit.$(SUFFIX) : dllinit.c


+ 1
- 1
interface/lapack/laswp.c View File

@@ -97,7 +97,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *

blas_level1_thread(mode, n, k1, k2, dummyalpha,
a, lda, NULL, 0, ipiv, incx,
laswp[flag], nthreads);
(int(*)())laswp[flag], nthreads);
}
#endif



+ 1
- 1
interface/lapack/zlaswp.c View File

@@ -96,7 +96,7 @@ int NAME(blasint *N, FLOAT *a, blasint *LDA, blasint *K1, blasint *K2, blasint *
mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif

blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, laswp[flag], nthreads);
blas_level1_thread(mode, n, k1, k2, dummyalpha, a, lda, NULL, 0, ipiv, incx, (int(*)())laswp[flag], nthreads);
}
#endif



+ 38
- 38
kernel/arm/asum_vfp.S View File

@@ -58,11 +58,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6
vadd.f64 d1 , d1, d5
vabs.f64 d7, d7
@@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4

@@ -82,22 +82,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S4

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
add X, X, INC_X

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
add X, X, INC_X

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
add X, X, INC_X

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
add X, X, INC_X
@@ -107,7 +107,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
add X, X, INC_X
@@ -118,11 +118,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6
vadd.f32 s1 , s1, s5
vabs.f32 s7, s7
@@ -133,7 +133,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4

@@ -142,22 +142,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S4

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
add X, X, INC_X

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
add X, X, INC_X

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
add X, X, INC_X

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
add X, X, INC_X
@@ -167,7 +167,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
add X, X, INC_X
@@ -184,11 +184,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6
vadd.f64 d1 , d1, d5
vabs.f64 d7, d7
@@ -196,11 +196,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vadd.f64 d1 , d1, d7

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
vabs.f64 d6, d6
vadd.f64 d1 , d1, d5
vabs.f64 d7, d7
@@ -212,11 +212,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4

@@ -226,28 +226,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S4

fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
vadd.f64 d0 , d0, d5
add X, X, INC_X

fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
vadd.f64 d0 , d0, d5
add X, X, INC_X

fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
vadd.f64 d0 , d0, d5
add X, X, INC_X

fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
@@ -259,7 +259,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 -d5 }
vldmia.f64 X, { d4 -d5 }
vabs.f64 d4, d4
vadd.f64 d0 , d0, d4
vabs.f64 d5, d5
@@ -273,22 +273,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6
vadd.f32 s1 , s1, s5
vabs.f32 s7, s7
vadd.f32 s0 , s0, s6
vadd.f32 s1 , s1, s7

fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
vabs.f32 s6, s6
vadd.f32 s1 , s1, s5
vabs.f32 s7, s7
@@ -300,11 +300,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4

@@ -313,28 +313,28 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S4

fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
vadd.f32 s0 , s0, s5
add X, X, INC_X

fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
vadd.f32 s0 , s0, s5
add X, X, INC_X

fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
vadd.f32 s0 , s0, s5
add X, X, INC_X

fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5
@@ -346,7 +346,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 -s5 }
vldmia.f32 X, { s4 -s5 }
vabs.f32 s4, s4
vadd.f32 s0 , s0, s4
vabs.f32 s5, s5


+ 62
- 62
kernel/arm/axpy_vfp.S View File

@@ -146,17 +146,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }
fmacd d8 , d0, d4
fstmiad Y!, { d8 }
vstmia.f64 Y!, { d8 }
fmacd d9 , d0, d5
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d9 }
fmacd d10, d0, d6
fstmiad Y!, { d10 }
vstmia.f64 Y!, { d10 }
fmacd d11, d0, d7
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d11 }


.endm
@@ -164,19 +164,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
fldmiad Y , { d8 }
vldmia.f64 X!, { d4 }
vldmia.f64 Y , { d8 }
fmacd d8 , d0, d4
fstmiad Y!, { d8 }
vstmia.f64 Y!, { d8 }

.endm

.macro KERNEL_S1

fldmiad X , { d4 }
fldmiad Y , { d8 }
vldmia.f64 X , { d4 }
vldmia.f64 Y , { d8 }
fmacd d8 , d0, d4
fstmiad Y , { d8 }
vstmia.f64 Y , { d8 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -186,16 +186,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X!, { s4 - s7 }
fldmias Y , { s8 - s11 }
vldmia.f32 X!, { s4 - s7 }
vldmia.f32 Y , { s8 - s11 }
fmacs s8 , s0, s4
fstmias Y!, { s8 }
vstmia.f32 Y!, { s8 }
fmacs s9 , s0, s5
fstmias Y!, { s9 }
vstmia.f32 Y!, { s9 }
fmacs s10, s0, s6
fstmias Y!, { s10 }
vstmia.f32 Y!, { s10 }
fmacs s11, s0, s7
fstmias Y!, { s11 }
vstmia.f32 Y!, { s11 }


.endm
@@ -203,19 +203,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 }
fldmias Y , { s8 }
vldmia.f32 X!, { s4 }
vldmia.f32 Y , { s8 }
fmacs s8 , s0, s4
fstmias Y!, { s8 }
vstmia.f32 Y!, { s8 }

.endm

.macro KERNEL_S1

fldmias X , { s4 }
fldmias Y , { s8 }
vldmia.f32 X , { s4 }
vldmia.f32 Y , { s8 }
fmacs s8 , s0, s4
fstmias Y , { s8 }
vstmia.f32 Y , { s8 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -231,42 +231,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }

FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }

FMAC_R1 d10, d0, d6
FMAC_R2 d10, d1, d7
FMAC_I1 d11, d0, d7
FMAC_I2 d11, d1, d6
fstmiad Y!, { d10 }
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d10 }
vstmia.f64 Y!, { d11 }

pld [ X, #X_PRE ]
fldmiad X!, { d4 - d7 }
vldmia.f64 X!, { d4 - d7 }
pld [ Y, #X_PRE ]
fldmiad Y , { d8 - d11 }
vldmia.f64 Y , { d8 - d11 }

FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }

FMAC_R1 d10, d0, d6
FMAC_R2 d10, d1, d7
FMAC_I1 d11, d0, d7
FMAC_I2 d11, d1, d6
fstmiad Y!, { d10 }
fstmiad Y!, { d11 }
vstmia.f64 Y!, { d10 }
vstmia.f64 Y!, { d11 }



@@ -277,15 +277,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 - d5 }
fldmiad Y , { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y , { d8 - d9 }

FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4
fstmiad Y!, { d8 }
fstmiad Y!, { d9 }
vstmia.f64 Y!, { d8 }
vstmia.f64 Y!, { d9 }



@@ -293,14 +293,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X , { d4 - d5 }
fldmiad Y , { d8 - d9 }
vldmia.f64 X , { d4 - d5 }
vldmia.f64 Y , { d8 - d9 }

FMAC_R1 d8 , d0, d4
FMAC_R2 d8 , d1, d5
FMAC_I1 d9 , d0, d5
FMAC_I2 d9 , d1, d4
fstmiad Y , { d8 - d9 }
vstmia.f64 Y , { d8 - d9 }

add X, X, INC_X
add Y, Y, INC_Y
@@ -314,40 +314,40 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmias X!, { s4 - s7 }
vldmia.f32 X!, { s4 - s7 }
pld [ Y, #X_PRE ]
fldmias Y , { s8 - s11 }
vldmia.f32 Y , { s8 - s11 }

FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }

FMAC_R1 s10, s0, s6
FMAC_R2 s10, s1, s7
FMAC_I1 s11, s0, s7
FMAC_I2 s11, s1, s6
fstmias Y!, { s10 }
fstmias Y!, { s11 }
vstmia.f32 Y!, { s10 }
vstmia.f32 Y!, { s11 }

fldmias X!, { s4 - s7 }
fldmias Y , { s8 - s11 }
vldmia.f32 X!, { s4 - s7 }
vldmia.f32 Y , { s8 - s11 }

FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }

FMAC_R1 s10, s0, s6
FMAC_R2 s10, s1, s7
FMAC_I1 s11, s0, s7
FMAC_I2 s11, s1, s6
fstmias Y!, { s10 }
fstmias Y!, { s11 }
vstmia.f32 Y!, { s10 }
vstmia.f32 Y!, { s11 }



@@ -358,15 +358,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 - s5 }
fldmias Y , { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y , { s8 - s9 }

FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4
fstmias Y!, { s8 }
fstmias Y!, { s9 }
vstmia.f32 Y!, { s8 }
vstmia.f32 Y!, { s9 }



@@ -374,14 +374,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X , { s4 - s5 }
fldmias Y , { s8 - s9 }
vldmia.f32 X , { s4 - s5 }
vldmia.f32 Y , { s8 - s9 }

FMAC_R1 s8 , s0, s4
FMAC_R2 s8 , s1, s5
FMAC_I1 s9 , s0, s5
FMAC_I2 s9 , s1, s4
fstmias Y , { s8 - s9 }
vstmia.f32 Y , { s8 - s9 }

add X, X, INC_X
add Y, Y, INC_Y


+ 14
- 14
kernel/arm/ccopy_vfp.S View File

@@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F4

pld [ X, #X_PRE ]
fldmias X!, { s0 - s7 }
fstmias Y!, { s0 - s7 }
vldmia.f32 X!, { s0 - s7 }
vstmia.f32 Y!, { s0 - s7 }

.endm

.macro COPY_F1

fldmias X!, { s0 - s1 }
fstmias Y!, { s0 - s1 }
vldmia.f32 X!, { s0 - s1 }
vstmia.f32 Y!, { s0 - s1 }

.endm

@@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4

nop
fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s2 - s3 }
fstmias Y, { s2 - s3 }
vldmia.f32 X, { s2 - s3 }
vstmia.f32 Y, { s2 - s3 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s2 - s3 }
fstmias Y, { s2 - s3 }
vldmia.f32 X, { s2 - s3 }
vstmia.f32 Y, { s2 - s3 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY_S1

fldmias X, { s0 - s1 }
fstmias Y, { s0 - s1 }
vldmia.f32 X, { s0 - s1 }
vstmia.f32 Y, { s0 - s1 }
add X, X, INC_X
add Y, Y, INC_Y



+ 20
- 20
kernel/arm/cdot_vfp.S View File

@@ -76,30 +76,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
fmacs s2 , s5, s9
fmacs s3 , s5, s8

fldmias Y!, { s10 - s11 }
vldmia.f32 Y!, { s10 - s11 }
fmacs s0 , s6, s10
fmacs s1 , s6, s11
fmacs s2 , s7, s11
fmacs s3 , s7, s10


fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fldmias X!, { s6 - s7 }
vldmia.f32 X!, { s6 - s7 }
fmacs s2 , s5, s9
fmacs s3 , s5, s8

fldmias Y!, { s10 - s11 }
vldmia.f32 Y!, { s10 - s11 }
fmacs s0 , s6, s10
fmacs s1 , s6, s11
fmacs s2 , s7, s11
@@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 - s5 }
fldmias Y!, { s8 - s9 }
vldmia.f32 X!, { s4 - s5 }
vldmia.f32 Y!, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9
@@ -125,8 +125,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

nop

fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9
@@ -134,8 +134,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9
@@ -143,8 +143,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9
@@ -152,8 +152,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9
@@ -166,8 +166,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 - s5 }
fldmias Y, { s8 - s9 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s8 - s9 }
fmacs s0 , s4, s8
fmacs s1 , s4, s9
fmacs s2 , s5, s9


+ 22
- 22
kernel/arm/cgemm_kernel_2x2_vfp.S View File

@@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I

pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }


fmuls s8 , s0, s4
@@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1

pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_M2

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_E

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_SUB

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -317,7 +317,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -329,9 +329,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

fldmias CO2, { s4 - s7 }
vldmia.f32 CO2, { s4 - s7 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -343,7 +343,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias CO2, { s4 - s7 }
vstmia.f32 CO2, { s4 - s7 }

add CO1, CO1, #16

@@ -500,23 +500,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

fldmias CO2, { s4 - s5 }
vldmia.f32 CO2, { s4 - s5 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias CO2, { s4 - s5 }
vstmia.f32 CO2, { s4 - s5 }

add CO1, CO1, #8

@@ -671,7 +671,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -683,7 +683,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

add CO1, CO1, #16

@@ -800,14 +800,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

add CO1, CO1, #8



+ 32
- 32
kernel/arm/cgemm_kernel_2x2_vfpv3.S View File

@@ -182,30 +182,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I
pld [ AO , #A_PRE ]
pld [ BO , #B_PRE ]
fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }

fmuls s16 , s0, s8
fmuls s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s0, s9
fmuls s25 , s1, s8

fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s18 , s2, s8
fmuls s26 , s3, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s19 , s2, s9
fmuls s27 , s3, s8

fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s20 , s0, s10
fmuls s28 , s1, s11
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s21 , s0, s11
fmuls s29 , s1, s10

fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s22 , s2, s10
fmuls s30 , s3, s11
fmuls s23 , s2, s11
@@ -218,17 +218,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1

fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s24 , s1, s9
fmacs s17 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s25 , s1, s8

fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s26 , s3, s9
fmacs s19 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s27 , s3, s8

fmacs s20 , s0, s10
@@ -250,19 +250,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ BO , #B_PRE ]
fmacs s24 , s5, s13
fmacs s17 , s4, s13
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s25 , s5, s12

fmacs s18 , s6, s12
fmacs s26 , s7, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s19 , s6, s13
fmacs s27 , s7, s12

fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s20 , s4, s14
fmacs s28 , s5, s15
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s21 , s4, s15
fmacs s29 , s5, s14

@@ -300,16 +300,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_SUB

fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }

fmacs s16 , s0, s8
fmacs s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s17 , s0, s9
fmacs s25 , s1, s8

fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s18 , s2, s8
fmacs s26 , s3, s9
fmacs s19 , s2, s9
@@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s7 }
fldmias CO2, { s8 - s11 }
vldmia.f32 CO1, { s4 - s7 }
vldmia.f32 CO2, { s8 - s11 }

FADD_R s16, s24 , s16
FADD_I s17, s25 , s17
@@ -370,8 +370,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s10, s1 , s23
FMAC_I2 s11, s1 , s22

fstmias CO1, { s4 - s7 }
fstmias CO2, { s8 - s11 }
vstmia.f32 CO1, { s4 - s7 }
vstmia.f32 CO2, { s8 - s11 }

add CO1, CO1, #16

@@ -534,8 +534,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s5 }
fldmias CO2, { s8 - s9 }
vldmia.f32 CO1, { s4 - s5 }
vldmia.f32 CO2, { s8 - s9 }

FADD_R s16, s24 , s16
FADD_I s17, s25 , s17
@@ -552,8 +552,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s8 , s1 , s21
FMAC_I2 s9 , s1 , s20

fstmias CO1, { s4 - s5 }
fstmias CO2, { s8 - s9 }
vstmia.f32 CO1, { s4 - s5 }
vstmia.f32 CO2, { s8 - s9 }

add CO1, CO1, #8

@@ -716,7 +716,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s7 }
vldmia.f32 CO1, { s4 - s7 }

FADD_R s16, s24 , s16
FADD_I s17, s25 , s17
@@ -733,7 +733,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s19
FMAC_I2 s7 , s1 , s18

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

add CO1, CO1, #16

@@ -851,7 +851,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias CO1, { s4 - s5 }
vldmia.f32 CO1, { s4 - s5 }

FADD_R s16, s24 , s16
FADD_I s17, s25 , s17
@@ -861,7 +861,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s17
FMAC_I2 s5 , s1 , s16

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

add CO1, CO1, #8



+ 4
- 4
kernel/arm/cgemm_ncopy_2_vfp.S View File

@@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s6 , [ AO2, #8 ]
flds s7 , [ AO2, #12 ]

fstmias BO!, { s0 - s7 }
vstmia.f32 BO!, { s0 - s7 }
add AO2, AO2, #16

.endm
@@ -99,7 +99,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s3 , [ AO2, #4 ]

add AO1, AO1, #8
fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO2, AO2, #8

.endm
@@ -111,7 +111,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s2 , [ AO1, #8 ]
flds s3 , [ AO1, #12 ]

fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO1, AO1, #16

.endm
@@ -122,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0 , [ AO1, #0 ]
flds s1 , [ AO1, #4 ]

fstmias BO!, { s0 - s1 }
vstmia.f32 BO!, { s0 - s1 }
add AO1, AO1, #8

.endm


+ 10
- 10
kernel/arm/cgemm_tcopy_2_vfp.S View File

@@ -73,12 +73,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**************************************************************************************/
.macro COPY2x2

fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }

fstmias BO1, { s0 - s7 }
vstmia.f32 BO1, { s0 - s7 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -86,12 +86,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x2

fldmias AO1, { s0 -s1 }
vldmia.f32 AO1, { s0 -s1 }

add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }

fstmias BO2, { s0 - s3 }
vstmia.f32 BO2, { s0 - s3 }
add AO1, AO1, #8
add BO2, BO2, #16

@@ -100,9 +100,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*************************************************************************************************************************/
.macro COPY2x1

fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

fstmias BO1, { s0 - s3 }
vstmia.f32 BO1, { s0 - s3 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -110,9 +110,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x1

fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }

fstmias BO2, { s0 - s1 }
vstmia.f32 BO2, { s0 - s1 }
add AO1, AO1, #8
add BO2, BO2, #8



+ 16
- 16
kernel/arm/cgemv_n_vfp.S View File

@@ -201,7 +201,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
@@ -213,9 +213,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }

fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }

.endm

@@ -266,14 +266,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, #8

@@ -349,47 +349,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, INC_Y

fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }

FMAC_R1 s6 , s0 , s10
FMAC_I1 s7 , s0 , s11
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }

add YO, YO, INC_Y

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, INC_Y

fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }

FMAC_R1 s6 , s0 , s14
FMAC_I1 s7 , s0 , s15
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }

add YO, YO, INC_Y

@@ -430,14 +430,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA_R
flds s1, ALPHA_I

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s8
FMAC_I1 s5 , s0 , s9
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, INC_Y



+ 20
- 20
kernel/arm/cgemv_t_vfp.S View File

@@ -150,9 +150,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmias XO! , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
fldmias AO2!, { s8 - s9 }
vldmia.f32 XO! , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }
vldmia.f32 AO2!, { s8 - s9 }

fmacs s12 , s4 , s2
fmacs s13 , s4 , s3
@@ -168,7 +168,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
@@ -180,7 +180,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }

.endm

@@ -204,8 +204,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmias XO! , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
vldmia.f32 XO! , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }

fmacs s12 , s4 , s2
fmacs s13 , s4 , s3
@@ -216,14 +216,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias YO!, { s4 - s5 }
vstmia.f32 YO!, { s4 - s5 }

.endm

@@ -249,9 +249,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmias XO , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
fldmias AO2!, { s8 - s9 }
vldmia.f32 XO , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }
vldmia.f32 AO2!, { s8 - s9 }

fmacs s12 , s4 , s2
fmacs s13 , s4 , s3
@@ -269,25 +269,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, INC_Y

fldmias YO, { s6 - s7 }
vldmia.f32 YO, { s6 - s7 }

FMAC_R1 s6 , s0 , s14
FMAC_I1 s7 , s0 , s15
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias YO, { s6 - s7 }
vstmia.f32 YO, { s6 - s7 }

add YO, YO, INC_Y

@@ -313,8 +313,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmias XO , { s2 - s3 }
fldmias AO1!, { s4 - s5 }
vldmia.f32 XO , { s2 - s3 }
vldmia.f32 AO1!, { s4 - s5 }

fmacs s12 , s4 , s2
fmacs s13 , s4 , s3
@@ -327,14 +327,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }

FMAC_R1 s4 , s0 , s12
FMAC_I1 s5 , s0 , s13
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias YO, { s4 - s5 }
vstmia.f32 YO, { s4 - s5 }

add YO, YO, INC_Y



+ 16
- 16
kernel/arm/ctrmm_kernel_2x2_vfp.S View File

@@ -165,9 +165,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I

pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }


fmuls s8 , s0, s4
@@ -197,9 +197,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1

pld [ AO, #A_PRE ]
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
pld [ BO, #B_PRE ]
fldmias BO!, { s4 - s7 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -225,8 +225,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_M2

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -254,8 +254,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_E

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_SUB

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s7 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s7 }

fmacs s8 , s0, s4
fmacs s9 , s0, s5
@@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

flds s4, FP_ZERO
vmov.f32 s5, s4
@@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s15
FMAC_I2 s7 , s1 , s14

fstmias CO2, { s4 - s7 }
vstmia.f32 CO2, { s4 - s7 }

add CO1, CO1, #16

@@ -513,7 +513,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

flds s4, FP_ZERO
vmov.f32 s5, s4
@@ -523,7 +523,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s13
FMAC_I2 s5 , s1 , s12

fstmias CO2, { s4 - s5 }
vstmia.f32 CO2, { s4 - s5 }

add CO1, CO1, #8

@@ -693,7 +693,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s11
FMAC_I2 s7 , s1 , s10

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

add CO1, CO1, #16

@@ -818,7 +818,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s9
FMAC_I2 s5 , s1 , s8

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

add CO1, CO1, #8



+ 26
- 26
kernel/arm/ctrmm_kernel_2x2_vfpv3.S View File

@@ -170,30 +170,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_I
pld [ AO , #A_PRE ]
pld [ BO , #B_PRE ]
fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }

fmuls s16 , s0, s8
fmuls s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s0, s9
fmuls s25 , s1, s8

fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s18 , s2, s8
fmuls s26 , s3, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s19 , s2, s9
fmuls s27 , s3, s8

fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s20 , s0, s10
fmuls s28 , s1, s11
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s21 , s0, s11
fmuls s29 , s1, s10

fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s22 , s2, s10
fmuls s30 , s3, s11
fmuls s23 , s2, s11
@@ -206,17 +206,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL2x2_M1

fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s24 , s1, s9
fmacs s17 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s25 , s1, s8

fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s26 , s3, s9
fmacs s19 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s27 , s3, s8

fmacs s20 , s0, s10
@@ -238,19 +238,19 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ BO , #B_PRE ]
fmacs s24 , s5, s13
fmacs s17 , s4, s13
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s25 , s5, s12

fmacs s18 , s6, s12
fmacs s26 , s7, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s19 , s6, s13
fmacs s27 , s7, s12

fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s20 , s4, s14
fmacs s28 , s5, s15
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s21 , s4, s15
fmacs s29 , s5, s14

@@ -288,16 +288,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL2x2_SUB

fldmias AO!, { s0 - s1 }
fldmias BO!, { s8 - s9 }
vldmia.f32 AO!, { s0 - s1 }
vldmia.f32 BO!, { s8 - s9 }

fmacs s16 , s0, s8
fmacs s24 , s1, s9
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s17 , s0, s9
fmacs s25 , s1, s8

fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s18 , s2, s8
fmacs s26 , s3, s9
fmacs s19 , s2, s9
@@ -354,8 +354,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s10, s1 , s23
FMAC_I2 s11, s1 , s22

fstmias CO1, { s4 - s7 }
fstmias CO2, { s8 - s11 }
vstmia.f32 CO1, { s4 - s7 }
vstmia.f32 CO2, { s8 - s11 }

add CO1, CO1, #16

@@ -532,8 +532,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s8 , s1 , s21
FMAC_I2 s9 , s1 , s20

fstmias CO1, { s4 - s5 }
fstmias CO2, { s8 - s9 }
vstmia.f32 CO1, { s4 - s5 }
vstmia.f32 CO2, { s8 - s9 }

add CO1, CO1, #8

@@ -710,7 +710,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s6 , s1 , s19
FMAC_I2 s7 , s1 , s18

fstmias CO1, { s4 - s7 }
vstmia.f32 CO1, { s4 - s7 }

add CO1, CO1, #16

@@ -835,7 +835,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 s4 , s1 , s17
FMAC_I2 s5 , s1 , s16

fstmias CO1, { s4 - s5 }
vstmia.f32 CO1, { s4 - s5 }

add CO1, CO1, #8



+ 14
- 14
kernel/arm/dcopy_vfp.S View File

@@ -65,15 +65,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F4

pld [ X, #X_PRE ]
fldmiad X!, { d0 - d3 }
fstmiad Y!, { d0 - d3 }
vldmia.f64 X!, { d0 - d3 }
vstmia.f64 Y!, { d0 - d3 }

.endm

.macro COPY_F1

fldmiad X!, { d0 }
fstmiad Y!, { d0 }
vldmia.f64 X!, { d0 }
vstmia.f64 Y!, { d0 }

.endm

@@ -83,23 +83,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4

nop
fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d1 }
fstmiad Y, { d1 }
vldmia.f64 X, { d1 }
vstmia.f64 Y, { d1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d1 }
fstmiad Y, { d1 }
vldmia.f64 X, { d1 }
vstmia.f64 Y, { d1 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -108,8 +108,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY_S1

fldmiad X, { d0 }
fstmiad Y, { d0 }
vldmia.f64 X, { d0 }
vstmia.f64 Y, { d0 }
add X, X, INC_X
add Y, Y, INC_Y



+ 20
- 20
kernel/arm/ddot_vfp.S View File

@@ -67,26 +67,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X!, { d8 }
vldmia.f64 X!, { d8 }
pld [ Y, #X_PRE ]
fldmiad Y!, { d4 }
fldmiad Y!, { d5 }
vldmia.f64 Y!, { d4 }
vldmia.f64 Y!, { d5 }
fmacd d0 , d4, d8
fldmiad X!, { d9 }
fldmiad Y!, { d6 }
vldmia.f64 X!, { d9 }
vldmia.f64 Y!, { d6 }
fmacd d1 , d5, d9
fldmiad X!, { d10 }
fldmiad X!, { d11 }
vldmia.f64 X!, { d10 }
vldmia.f64 X!, { d11 }
fmacd d0 , d6, d10
fldmiad Y!, { d7 }
vldmia.f64 Y!, { d7 }
fmacd d1 , d7, d11

.endm

.macro KERNEL_F1

fldmiad X!, { d4 }
fldmiad Y!, { d8 }
vldmia.f64 X!, { d4 }
vldmia.f64 Y!, { d8 }
fmacd d0 , d4, d8

.endm
@@ -97,26 +97,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4

nop
fldmiad X, { d4 }
fldmiad Y, { d8 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d8 }
add X, X, INC_X
add Y, Y, INC_Y
fmacd d0 , d4, d8

fldmiad X, { d5 }
fldmiad Y, { d9 }
vldmia.f64 X, { d5 }
vldmia.f64 Y, { d9 }
add X, X, INC_X
add Y, Y, INC_Y
fmacd d1 , d5, d9

fldmiad X, { d6 }
fldmiad Y, { d10 }
vldmia.f64 X, { d6 }
vldmia.f64 Y, { d10 }
add X, X, INC_X
add Y, Y, INC_Y
fmacd d0 , d6, d10

fldmiad X, { d7 }
fldmiad Y, { d11 }
vldmia.f64 X, { d7 }
vldmia.f64 Y, { d11 }
add X, X, INC_X
add Y, Y, INC_Y
fmacd d1 , d7, d11
@@ -126,8 +126,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 }
fldmiad Y, { d8 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d8 }
add X, X, INC_X
fmacd d0 , d4, d8
add Y, Y, INC_Y


+ 4
- 4
kernel/arm/dgemm_kernel_4x4_vfpv3.S View File

@@ -331,7 +331,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add r4 , CO2, r3
pld [ CO2 , #C_PRE ]

fldmiad CO1, { d8 - d11 }
vldmia.f64 CO1, { d8 - d11 }
pld [ r4 , #C_PRE ]

fmacd d8 , d0 , d16
@@ -352,7 +352,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d15, d0 , d23
fstd d11, [CO1, #24 ]

fldmiad r4, { d8 - d11 }
vldmia.f64 r4, { d8 - d11 }

fmacd d8 , d0 , d24
fstd d12, [CO2]
@@ -367,7 +367,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ CO2 , #C_PRE ]

fldmiad CO2, { d12 - d15 }
vldmia.f64 CO2, { d12 - d15 }

fstd d8 , [r4 ]
fmacd d12, d0 , d28
@@ -378,7 +378,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstd d11, [r4 , #24 ]
fmacd d15, d0 , d31

fstmiad CO2, { d12 - d15 }
vstmia.f64 CO2, { d12 - d15 }

add CO1, CO1, #32



+ 4
- 4
kernel/arm/dgemm_ncopy_2_vfp.S View File

@@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d3 , [ AO2, #8 ]

add AO1, AO1, #16
fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO2, AO2, #16

.endm
@@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d1 , [ AO2, #0 ]
add AO1, AO1, #8

fstmiad BO!, { d0 - d1 }
vstmia.f64 BO!, { d0 - d1 }
add AO2, AO2, #8

.endm
@@ -95,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0 , [ AO1, #0 ]
fldd d1 , [ AO1, #8 ]

fstmiad BO!, { d0 - d1 }
vstmia.f64 BO!, { d0 - d1 }
add AO1, AO1, #16

.endm
@@ -105,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fldd d0 , [ AO1, #0 ]

fstmiad BO!, { d0 }
vstmia.f64 BO!, { d0 }
add AO1, AO1, #8

.endm


+ 8
- 8
kernel/arm/dgemm_ncopy_4_vfp.S View File

@@ -105,10 +105,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d11, [ AO4, #16 ]
fldd d15, [ AO4, #24 ]

fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO4, AO4, #32
fstmiad BO!, { d4 - d7 }
fstmiad BO!, { d8 - d15 }
vstmia.f64 BO!, { d4 - d7 }
vstmia.f64 BO!, { d8 - d15 }

.endm

@@ -122,7 +122,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d3 , [ AO4, #0 ]

add AO3, AO3, #8
fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO4, AO4, #8

.endm
@@ -140,7 +140,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d5 , [ AO2, #16 ]
fldd d7 , [ AO2, #24 ]

fstmiad BO!, { d0 - d7 }
vstmia.f64 BO!, { d0 - d7 }
add AO2, AO2, #32

.endm
@@ -152,7 +152,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d1 , [ AO2, #0 ]
add AO1, AO1, #8

fstmiad BO!, { d0 - d1 }
vstmia.f64 BO!, { d0 - d1 }
add AO2, AO2, #8

.endm
@@ -164,7 +164,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d2 , [ AO1, #16 ]
fldd d3 , [ AO1, #24 ]

fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO1, AO1, #32

.endm
@@ -174,7 +174,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

fldd d0 , [ AO1, #0 ]

fstmiad BO!, { d0 }
vstmia.f64 BO!, { d0 }
add AO1, AO1, #8

.endm


+ 30
- 30
kernel/arm/dgemm_tcopy_4_vfp.S View File

@@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x4

pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }

add r3, AO1, LDA
pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }

add r3, r3, LDA
pld [ r3, #A_PRE ]
fldmiad r3, { d8 - d11 }
vldmia.f64 r3, { d8 - d11 }

add r3, r3, LDA
pld [ r3, #A_PRE ]
fldmiad r3, { d12 - d15 }
vldmia.f64 r3, { d12 - d15 }

fstmiad BO1, { d0 - d15 }
vstmia.f64 BO1, { d0 - d15 }
add AO1, AO1, #32
add BO1, BO1, M4

@@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x4

fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }

add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }

add r3, r3, LDA
fldmiad r3, { d4 - d5 }
vldmia.f64 r3, { d4 - d5 }

add r3, r3, LDA
fldmiad r3, { d6 - d7 }
vldmia.f64 r3, { d6 - d7 }

fstmiad BO2, { d0 - d7 }
vstmia.f64 BO2, { d0 - d7 }
add AO1, AO1, #16
add BO2, BO2, #64

@@ -117,18 +117,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x4

fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }

add r3, AO1, LDA
fldmiad r3, { d1 }
vldmia.f64 r3, { d1 }

add r3, r3, LDA
fldmiad r3, { d2 }
vldmia.f64 r3, { d2 }

add r3, r3, LDA
fldmiad r3, { d3 }
vldmia.f64 r3, { d3 }

fstmiad BO3, { d0 - d3 }
vstmia.f64 BO3, { d0 - d3 }
add AO1, AO1, #8
add BO3, BO3, #32

@@ -139,13 +139,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x2

pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }

add r3, AO1, LDA
pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }

fstmiad BO1, { d0 - d7 }
vstmia.f64 BO1, { d0 - d7 }
add AO1, AO1, #32
add BO1, BO1, M4

@@ -153,12 +153,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x2

fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }

add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }

fstmiad BO2, { d0 - d3 }
vstmia.f64 BO2, { d0 - d3 }
add AO1, AO1, #16
add BO2, BO2, #32

@@ -166,12 +166,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x2

fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }

add r3, AO1, LDA
fldmiad r3, { d1 }
vldmia.f64 r3, { d1 }

fstmiad BO3, { d0 - d1 }
vstmia.f64 BO3, { d0 - d1 }
add AO1, AO1, #8
add BO3, BO3, #16

@@ -182,9 +182,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x1

pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }

fstmiad BO1, { d0 - d3 }
vstmia.f64 BO1, { d0 - d3 }
add AO1, AO1, #32
add BO1, BO1, M4

@@ -192,9 +192,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x1

fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }

fstmiad BO2, { d0 - d1 }
vstmia.f64 BO2, { d0 - d1 }
add AO1, AO1, #16
add BO2, BO2, #16

@@ -202,9 +202,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x1

fldmiad AO1, { d0 }
vldmia.f64 AO1, { d0 }

fstmiad BO3, { d0 }
vstmia.f64 BO3, { d0 }
add AO1, AO1, #8
add BO3, BO3, #8



+ 13
- 13
kernel/arm/dtrmm_kernel_4x4_vfpv3.S View File

@@ -128,10 +128,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ]

pld [ AO , #A_PRE ]
fldmiad AO!, { d0 - d1}
vldmia.f64 AO!, { d0 - d1}

fmuld d16 , d0, d8
fldmiad AO!, { d2 - d3}
vldmia.f64 AO!, { d2 - d3}
fmuld d17 , d1, d8
fldd d9 , [ BO, #8 ]
fmuld d18 , d2, d8
@@ -148,10 +148,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmuld d23 , d3, d9

fmuld d24 , d0, d10
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmuld d25 , d1, d10
fmuld d26 , d2, d10
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmuld d27 , d3, d10

fldd d13, [ BO, #8 ]
@@ -173,10 +173,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ]

pld [ AO , #A_PRE ]
fldmiad AO!, { d0 - d1}
vldmia.f64 AO!, { d0 - d1}

fmacd d16 , d0, d8
fldmiad AO!, { d2 - d3}
vldmia.f64 AO!, { d2 - d3}
fmacd d17 , d1, d8
fldd d9 , [ BO, #8 ]
fmacd d18 , d2, d8
@@ -193,10 +193,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d23 , d3, d9

fmacd d24 , d0, d10
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmacd d25 , d1, d10
fmacd d26 , d2, d10
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmacd d27 , d3, d10

fldd d13, [ BO, #8 ]
@@ -225,11 +225,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d8 , [ BO ]
fmacd d21 , d5, d13
fmacd d22 , d6, d13
fldmiad AO!, { d0 - d1 }
vldmia.f64 AO!, { d0 - d1 }
fmacd d23 , d7, d13

fmacd d24 , d4, d14
fldmiad AO!, { d2 - d3 }
vldmia.f64 AO!, { d2 - d3 }
fmacd d25 , d5, d14
fldd d9 , [ BO, #8 ]
fmacd d26 , d6, d14
@@ -257,10 +257,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacd d19 , d3, d8

fmacd d20 , d0, d9
fldmiad AO!, { d4 - d5 }
vldmia.f64 AO!, { d4 - d5 }
fmacd d21 , d1, d9
fmacd d22 , d2, d9
fldmiad AO!, { d6 - d7 }
vldmia.f64 AO!, { d6 - d7 }
fmacd d23 , d3, d9

fmacd d24 , d0, d10
@@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fstd d11, [r4 , #24 ]
fmuld d15, d0 , d31

fstmiad CO2, { d12 - d15 }
vstmia.f64 CO2, { d12 - d15 }

add CO1, CO1, #32



+ 50
- 50
kernel/arm/gemv_n_vfp.S View File

@@ -139,8 +139,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1

pld [ AO2 , #A_PRE ]
fldmiad XO! , { d2 }
fldmiad AO1 , { d4 - d7 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1 , { d4 - d7 }

vmla.f64 d8 , d2 , d4
pld [ AO2 , #4*SIZE ]
@@ -150,7 +150,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f64 d11 , d2 , d7


fldmiad r3 , { d4 - d7 }
vldmia.f64 r3 , { d4 - d7 }

vmla.f64 d12 , d2 , d4
vmla.f64 d13 , d2 , d5
@@ -164,23 +164,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F8

fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }

vmla.f64 d4 , d0, d8
vmla.f64 d5 , d0, d9
vmla.f64 d6 , d0, d10
vmla.f64 d7 , d0, d11

fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }

fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }

vmla.f64 d4 , d0, d12
vmla.f64 d5 , d0, d13
vmla.f64 d6 , d0, d14
vmla.f64 d7 , d0, d15

fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }

.endm

@@ -195,8 +195,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmiad XO! , { d2 }
fldmiad AO1 , { d8 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA

@@ -204,9 +204,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d12
fstmiad YO!, { d4 }
vstmia.f64 YO!, { d4 }

.endm

@@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4X1

pld [ AO2 , #A_PRE ]
fldmiad XO , { d2 }
fldmiad AO1 , { d8 - d11 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1 , { d8 - d11 }

vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA
@@ -249,24 +249,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S4

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4 , d0, d12
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y

fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5 , d0, d13
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4 , d0, d14
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y

fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5 , d0, d15
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y

.endm
@@ -282,8 +282,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmiad XO , { d2 }
fldmiad AO1 , { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d12 , d2 , d8
add AO1, AO1, LDA
add XO, XO , INC_X
@@ -292,9 +292,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d12
fstmiad YO , { d4 }
vstmia.f64 YO , { d4 }
add YO, YO, INC_Y

.endm
@@ -338,8 +338,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1

pld [ AO2, #A_PRE ]
fldmias XO! , { s2 }
fldmias AO1 , { s4 - s7 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1 , { s4 - s7 }

vmla.f32 s8 , s2 , s4
vmla.f32 s9 , s2 , s5
@@ -348,7 +348,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

add r3, AO1, #4*SIZE

fldmias r3 , { s4 - s7 }
vldmia.f32 r3 , { s4 - s7 }

vmla.f32 s12 , s2 , s4
vmla.f32 s13 , s2 , s5
@@ -362,24 +362,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F8

fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }

vmla.f32 s4 , s0, s8
vmla.f32 s5 , s0, s9
vmla.f32 s6 , s0, s10
vmla.f32 s7 , s0, s11

fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }


fldmias YO, { s4 - s7 }
vldmia.f32 YO, { s4 - s7 }

vmla.f32 s4 , s0, s12
vmla.f32 s5 , s0, s13
vmla.f32 s6 , s0, s14
vmla.f32 s7 , s0, s15

fstmias YO!, { s4 - s7 }
vstmia.f32 YO!, { s4 - s7 }

.endm

@@ -394,8 +394,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmias XO! , { s2 }
fldmias AO1 , { s8 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s12 , s2 , s8
add AO1, AO1, LDA

@@ -403,9 +403,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s12
fstmias YO!, { s4 }
vstmia.f32 YO!, { s4 }

.endm

@@ -434,8 +434,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S4X1

fldmias XO , { s2 }
fldmias AO1 , { s8 - s11 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1 , { s8 - s11 }

vmla.f32 s12 , s2 , s8
vmla.f32 s13 , s2 , s9
@@ -449,24 +449,24 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S4

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4 , s0, s12
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y

fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5 , s0, s13
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4 , s0, s14
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y

fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5 , s0, s15
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y

.endm
@@ -482,8 +482,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmias XO , { s2 }
fldmias AO1 , { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s12 , s2 , s8
add AO1, AO1, LDA
add XO, XO , INC_X
@@ -492,9 +492,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s12
fstmias YO , { s4 }
vstmia.f32 YO , { s4 }
add YO, YO, INC_Y

.endm


+ 60
- 60
kernel/arm/gemv_n_vfpv3.S View File

@@ -138,8 +138,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F8X1

fldmiad XO! , { d4 }
fldmiad AO1 , { d8 - d15 }
vldmia.f64 XO! , { d4 }
vldmia.f64 AO1 , { d8 - d15 }

vmla.f64 d24 , d4 , d8
pld [ AO2 , #A_PRE ]
@@ -158,7 +158,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F8

fldmiad YO, { d16 - d23 }
vldmia.f64 YO, { d16 - d23 }

vmla.f64 d16, d0, d24
vmla.f64 d17, d0, d25
@@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f64 d22, d0, d30
vmla.f64 d23, d0, d31

fstmiad YO!, { d16 - d23 }
vstmia.f64 YO!, { d16 - d23 }

.endm

@@ -184,8 +184,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmiad XO! , { d4 }
fldmiad AO1 , { d8 }
vldmia.f64 XO! , { d4 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d24 , d4 , d8
add AO1, AO1, LDA

@@ -193,9 +193,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24
fstmiad YO!, { d16 }
vstmia.f64 YO!, { d16 }

.endm

@@ -234,8 +234,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ AO2 , #A_PRE ]
pld [ AO2 , #A_PRE+32 ]
fldmiad XO , { d4 }
fldmiad AO1 , { d8 - d15 }
vldmia.f64 XO , { d4 }
vldmia.f64 AO1 , { d8 - d15 }

vmla.f64 d24 , d4 , d8
vmla.f64 d25 , d4 , d9
@@ -253,44 +253,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S8

fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24
fstmiad YO, { d16 }
vstmia.f64 YO, { d16 }
add YO, YO, INC_Y

fldmiad YO, { d17 }
vldmia.f64 YO, { d17 }
vmla.f64 d17, d0, d25
fstmiad YO, { d17 }
vstmia.f64 YO, { d17 }
add YO, YO, INC_Y

fldmiad YO, { d18 }
vldmia.f64 YO, { d18 }
vmla.f64 d18, d0, d26
fstmiad YO, { d18 }
vstmia.f64 YO, { d18 }
add YO, YO, INC_Y

fldmiad YO, { d19 }
vldmia.f64 YO, { d19 }
vmla.f64 d19, d0, d27
fstmiad YO, { d19 }
vstmia.f64 YO, { d19 }
add YO, YO, INC_Y

fldmiad YO, { d20 }
vldmia.f64 YO, { d20 }
vmla.f64 d20, d0, d28
fstmiad YO, { d20 }
vstmia.f64 YO, { d20 }
add YO, YO, INC_Y

fldmiad YO, { d21 }
vldmia.f64 YO, { d21 }
vmla.f64 d21, d0, d29
fstmiad YO, { d21 }
vstmia.f64 YO, { d21 }
add YO, YO, INC_Y

fldmiad YO, { d22 }
vldmia.f64 YO, { d22 }
vmla.f64 d22, d0, d30
fstmiad YO, { d22 }
vstmia.f64 YO, { d22 }
add YO, YO, INC_Y

fldmiad YO, { d23 }
vldmia.f64 YO, { d23 }
vmla.f64 d23, d0, d31
fstmiad YO, { d23 }
vstmia.f64 YO, { d23 }
add YO, YO, INC_Y

.endm
@@ -306,8 +306,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmiad XO , { d4 }
fldmiad AO1 , { d8 }
vldmia.f64 XO , { d4 }
vldmia.f64 AO1 , { d8 }
vmla.f64 d24 , d4 , d8
add AO1, AO1, LDA
add XO, XO, INC_X
@@ -316,9 +316,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmiad YO, { d16 }
vldmia.f64 YO, { d16 }
vmla.f64 d16, d0, d24
fstmiad YO, { d16 }
vstmia.f64 YO, { d16 }
add YO, YO, INC_Y

.endm
@@ -361,8 +361,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F8X1

pld [ AO2 , #A_PRE ]
fldmias XO! , { s4 }
fldmias AO1 , { s8 - s15 }
vldmia.f32 XO! , { s4 }
vldmia.f32 AO1 , { s8 - s15 }

vmla.f32 s24 , s4 , s8
vmla.f32 s25 , s4 , s9
@@ -379,7 +379,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F8

fldmias YO, { s16 - s23 }
vldmia.f32 YO, { s16 - s23 }

vmla.f32 s16, s0, s24
vmla.f32 s17, s0, s25
@@ -390,7 +390,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
vmla.f32 s22, s0, s30
vmla.f32 s23, s0, s31

fstmias YO!, { s16 - s23 }
vstmia.f32 YO!, { s16 - s23 }

.endm

@@ -405,8 +405,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmias XO! , { s4 }
fldmias AO1 , { s8 }
vldmia.f32 XO! , { s4 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s24 , s4 , s8
add AO1, AO1, LDA

@@ -414,9 +414,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24
fstmias YO!, { s16 }
vstmia.f32 YO!, { s16 }

.endm

@@ -454,8 +454,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S8X1

pld [ AO2 , #A_PRE ]
fldmias XO , { s4 }
fldmias AO1 , { s8 - s15 }
vldmia.f32 XO , { s4 }
vldmia.f32 AO1 , { s8 - s15 }

vmla.f32 s24 , s4 , s8
vmla.f32 s25 , s4 , s9
@@ -473,44 +473,44 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S8

fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24
fstmias YO, { s16 }
vstmia.f32 YO, { s16 }
add YO, YO, INC_Y

fldmias YO, { s17 }
vldmia.f32 YO, { s17 }
vmla.f32 s17, s0, s25
fstmias YO, { s17 }
vstmia.f32 YO, { s17 }
add YO, YO, INC_Y

fldmias YO, { s18 }
vldmia.f32 YO, { s18 }
vmla.f32 s18, s0, s26
fstmias YO, { s18 }
vstmia.f32 YO, { s18 }
add YO, YO, INC_Y

fldmias YO, { s19 }
vldmia.f32 YO, { s19 }
vmla.f32 s19, s0, s27
fstmias YO, { s19 }
vstmia.f32 YO, { s19 }
add YO, YO, INC_Y

fldmias YO, { s20 }
vldmia.f32 YO, { s20 }
vmla.f32 s20, s0, s28
fstmias YO, { s20 }
vstmia.f32 YO, { s20 }
add YO, YO, INC_Y

fldmias YO, { s21 }
vldmia.f32 YO, { s21 }
vmla.f32 s21, s0, s29
fstmias YO, { s21 }
vstmia.f32 YO, { s21 }
add YO, YO, INC_Y

fldmias YO, { s22 }
vldmia.f32 YO, { s22 }
vmla.f32 s22, s0, s30
fstmias YO, { s22 }
vstmia.f32 YO, { s22 }
add YO, YO, INC_Y

fldmias YO, { s23 }
vldmia.f32 YO, { s23 }
vmla.f32 s23, s0, s31
fstmias YO, { s23 }
vstmia.f32 YO, { s23 }
add YO, YO, INC_Y

.endm
@@ -526,8 +526,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmias XO , { s4 }
fldmias AO1 , { s8 }
vldmia.f32 XO , { s4 }
vldmia.f32 AO1 , { s8 }
vmla.f32 s24 , s4 , s8
add AO1, AO1, LDA
add XO, XO, INC_X
@@ -536,9 +536,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmias YO, { s16 }
vldmia.f32 YO, { s16 }
vmla.f32 s16, s0, s24
fstmias YO, { s16 }
vstmia.f32 YO, { s16 }
add YO, YO, INC_Y

.endm


+ 84
- 84
kernel/arm/gemv_t_vfp.S View File

@@ -112,13 +112,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F2X4

pld [ XO , #X_PRE ]
fldmiad XO! , { d12 - d15 }
vldmia.f64 XO! , { d12 - d15 }
pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d4 - d5 }
fldmiad AO1!, { d10 - d11 }
fldmiad AO2!, { d6 - d7 }
vldmia.f64 AO2!, { d4 - d5 }
vldmia.f64 AO1!, { d10 - d11 }
vldmia.f64 AO2!, { d6 - d7 }

vmla.f64 d2 , d12 , d8
vmla.f64 d3 , d12 , d4
@@ -133,9 +133,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmiad XO! , { d1 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d4 }
vldmia.f64 XO! , { d1 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d4 }
vmla.f64 d2 , d1 , d8
vmla.f64 d3 , d1 , d4

@@ -143,10 +143,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }
vmla.f64 d4, d0, d2
vmla.f64 d5, d0, d3
fstmiad YO!, { d4 - d5 }
vstmia.f64 YO!, { d4 - d5 }

.endm

@@ -160,10 +160,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F1X4

pld [ XO , #X_PRE ]
fldmiad XO! , { d12 - d15 }
vldmia.f64 XO! , { d12 - d15 }
pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d2 , d12 , d8
vmla.f64 d2 , d13 , d9
vmla.f64 d2 , d14, d10
@@ -173,17 +173,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmiad XO! , { d1 }
fldmiad AO1!, { d8 }
vldmia.f64 XO! , { d1 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d2 , d1 , d8

.endm

.macro SAVE_F1

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2
fstmiad YO!, { d4 }
vstmia.f64 YO!, { d4 }

.endm

@@ -197,23 +197,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X4

fldmiad XO , { d12 }
vldmia.f64 XO , { d12 }
add XO, XO, INC_X

pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d4 - d5 }
vldmia.f64 AO2!, { d4 - d5 }

fldmiad XO , { d13 }
vldmia.f64 XO , { d13 }
add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
fldmiad AO2!, { d6 - d7 }
vldmia.f64 AO1!, { d10 - d11 }
vldmia.f64 AO2!, { d6 - d7 }

fldmiad XO , { d14 }
vldmia.f64 XO , { d14 }
add XO, XO, INC_X

fldmiad XO , { d15 }
vldmia.f64 XO , { d15 }
add XO, XO, INC_X

vmla.f64 d2 , d12 , d8
@@ -229,9 +229,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmiad XO , { d1 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d4 }
vldmia.f64 XO , { d1 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d4 }
vmla.f64 d2 , d1 , d8
add XO, XO, INC_X
vmla.f64 d3 , d1 , d4
@@ -240,14 +240,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y

fldmiad YO, { d5 }
vldmia.f64 YO, { d5 }
vmla.f64 d5, d0, d3
fstmiad YO, { d5 }
vstmia.f64 YO, { d5 }
add YO, YO, INC_Y

.endm
@@ -261,20 +261,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X4

fldmiad XO , { d12 }
vldmia.f64 XO , { d12 }
add XO, XO, INC_X

pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }

fldmiad XO , { d13 }
vldmia.f64 XO , { d13 }
add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }

fldmiad XO , { d14 }
vldmia.f64 XO , { d14 }
add XO, XO, INC_X

fldmiad XO , { d15 }
vldmia.f64 XO , { d15 }
add XO, XO, INC_X

vmla.f64 d2 , d12 , d8
@@ -286,8 +286,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmiad XO , { d1 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d1 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d2 , d1 , d8
add XO, XO, INC_X

@@ -295,9 +295,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmiad YO, { d4 }
vldmia.f64 YO, { d4 }
vmla.f64 d4, d0, d2
fstmiad YO, { d4 }
vstmia.f64 YO, { d4 }
add YO, YO, INC_Y

.endm
@@ -315,11 +315,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X4

fldmias XO! , { s12 - s15 }
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s4 - s5 }
fldmias AO1!, { s10 - s11 }
fldmias AO2!, { s6 - s7 }
vldmia.f32 XO! , { s12 - s15 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s4 - s5 }
vldmia.f32 AO1!, { s10 - s11 }
vldmia.f32 AO2!, { s6 - s7 }

vmla.f32 s2 , s12 , s8
vmla.f32 s3 , s12 , s4
@@ -334,9 +334,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmias XO! , { s1 }
fldmias AO1!, { s8 }
fldmias AO2!, { s4 }
vldmia.f32 XO! , { s1 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s4 }
vmla.f32 s2 , s1 , s8
vmla.f32 s3 , s1 , s4

@@ -344,10 +344,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmias YO, { s4 - s5 }
vldmia.f32 YO, { s4 - s5 }
vmla.f32 s4, s0, s2
vmla.f32 s5, s0, s3
fstmias YO!, { s4 - s5 }
vstmia.f32 YO!, { s4 - s5 }

.endm

@@ -359,9 +359,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X4

fldmias XO! , { s12 - s15 }
fldmias AO1!, { s8 - s9 }
fldmias AO1!, { s10 - s11 }
vldmia.f32 XO! , { s12 - s15 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s2 , s12 , s8
vmla.f32 s2 , s13 , s9
vmla.f32 s2 , s14, s10
@@ -371,17 +371,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmias XO! , { s1 }
fldmias AO1!, { s8 }
vldmia.f32 XO! , { s1 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s2 , s1 , s8

.endm

.macro SAVE_F1

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2
fstmias YO!, { s4 }
vstmia.f32 YO!, { s4 }

.endm

@@ -395,21 +395,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X4

fldmias XO , { s12 }
vldmia.f32 XO , { s12 }
add XO, XO, INC_X

fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s4 - s5 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s4 - s5 }

fldmias XO , { s13 }
vldmia.f32 XO , { s13 }
add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
fldmias AO2!, { s6 - s7 }
vldmia.f32 AO1!, { s10 - s11 }
vldmia.f32 AO2!, { s6 - s7 }

fldmias XO , { s14 }
vldmia.f32 XO , { s14 }
add XO, XO, INC_X

fldmias XO , { s15 }
vldmia.f32 XO , { s15 }
add XO, XO, INC_X

vmla.f32 s2 , s12 , s8
@@ -425,9 +425,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmias XO , { s1 }
fldmias AO1!, { s8 }
fldmias AO2!, { s4 }
vldmia.f32 XO , { s1 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s4 }
vmla.f32 s2 , s1 , s8
add XO, XO, INC_X
vmla.f32 s3 , s1 , s4
@@ -436,14 +436,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y

fldmias YO, { s5 }
vldmia.f32 YO, { s5 }
vmla.f32 s5, s0, s3
fstmias YO, { s5 }
vstmia.f32 YO, { s5 }
add YO, YO, INC_Y

.endm
@@ -456,20 +456,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X4

fldmias XO , { s12 }
vldmia.f32 XO , { s12 }
add XO, XO, INC_X

pld [ AO1 , #A_PRE ]
fldmias AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s8 - s9 }

fldmias XO , { s13 }
vldmia.f32 XO , { s13 }
add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }

fldmias XO , { s14 }
vldmia.f32 XO , { s14 }
add XO, XO, INC_X

fldmias XO , { s15 }
vldmia.f32 XO , { s15 }
add XO, XO, INC_X

vmla.f32 s2 , s12 , s8
@@ -481,8 +481,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmias XO , { s1 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s1 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s2 , s1 , s8
add XO, XO, INC_X

@@ -490,9 +490,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmias YO, { s4 }
vldmia.f32 YO, { s4 }
vmla.f32 s4, s0, s2
fstmias YO, { s4 }
vstmia.f32 YO, { s4 }
add YO, YO, INC_Y

.endm


+ 84
- 84
kernel/arm/gemv_t_vfpv3.S View File

@@ -108,17 +108,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F2X4

pld [ XO , #X_PRE ]
fldmiad XO! , { d28 - d31 }
vldmia.f64 XO! , { d28 - d31 }
pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d16 - d17 }
vldmia.f64 AO2!, { d16 - d17 }
vmla.f64 d4 , d28 , d8
vmla.f64 d5 , d28 , d16
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9
vmla.f64 d5 , d29 , d17
fldmiad AO2!, { d18 - d19 }
vldmia.f64 AO2!, { d18 - d19 }
vmla.f64 d4 , d30, d10
vmla.f64 d5 , d30, d18
vmla.f64 d4 , d31, d11
@@ -129,9 +129,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmiad XO! , { d2 }
fldmiad AO1!, { d8 }
fldmiad AO2!, { d16 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1!, { d8 }
vldmia.f64 AO2!, { d16 }
vmla.f64 d4 , d2 , d8
vmla.f64 d5 , d2 , d16

@@ -139,10 +139,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmiad YO, { d24 - d25 }
vldmia.f64 YO, { d24 - d25 }
vmla.f64 d24, d0, d4
vmla.f64 d25, d0, d5
fstmiad YO!, { d24 - d25 }
vstmia.f64 YO!, { d24 - d25 }

.endm

@@ -156,23 +156,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S2X4

pld [ AO1 , #A_PRE ]
fldmiad XO , { d28 }
vldmia.f64 XO , { d28 }
add XO, XO, INC_X
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
pld [ AO2 , #A_PRE ]
fldmiad AO2!, { d16 - d17 }
vldmia.f64 AO2!, { d16 - d17 }
vmla.f64 d4 , d28 , d8
fldmiad XO , { d29 }
vldmia.f64 XO , { d29 }
add XO, XO, INC_X
vmla.f64 d5 , d28 , d16
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9
fldmiad XO , { d30 }
vldmia.f64 XO , { d30 }
add XO, XO, INC_X
vmla.f64 d5 , d29 , d17
fldmiad AO2!, { d18 - d19 }
vldmia.f64 AO2!, { d18 - d19 }
vmla.f64 d4 , d30, d10
fldmiad XO , { d31 }
vldmia.f64 XO , { d31 }
add XO, XO, INC_X
vmla.f64 d5 , d30, d18
vmla.f64 d4 , d31, d11
@@ -183,10 +183,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmiad XO , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1!, { d8 }
add XO, XO, INC_X
fldmiad AO2!, { d16 }
vldmia.f64 AO2!, { d16 }
vmla.f64 d4 , d2 , d8
vmla.f64 d5 , d2 , d16

@@ -194,14 +194,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y

fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d5
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y

.endm
@@ -215,11 +215,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F1X4

pld [ XO , #X_PRE ]
fldmiad XO! , { d28 - d31 }
vldmia.f64 XO! , { d28 - d31 }
pld [ AO1 , #A_PRE ]
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
vmla.f64 d4 , d28 , d8
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9
vmla.f64 d4 , d30, d10
vmla.f64 d4 , d31, d11
@@ -229,17 +229,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmiad XO! , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO! , { d2 }
vldmia.f64 AO1!, { d8 }
vmla.f64 d4 , d2 , d8

.endm

.macro SAVE_F1

fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4
fstmiad YO!, { d24 }
vstmia.f64 YO!, { d24 }

.endm

@@ -252,18 +252,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S1X4

pld [ AO1 , #A_PRE ]
fldmiad XO , { d28 }
vldmia.f64 XO , { d28 }
add XO, XO, INC_X
fldmiad AO1!, { d8 - d9 }
vldmia.f64 AO1!, { d8 - d9 }
vmla.f64 d4 , d28 , d8
fldmiad XO , { d29 }
vldmia.f64 XO , { d29 }
add XO, XO, INC_X
fldmiad AO1!, { d10 - d11 }
vldmia.f64 AO1!, { d10 - d11 }
vmla.f64 d4 , d29 , d9
fldmiad XO , { d30 }
vldmia.f64 XO , { d30 }
add XO, XO, INC_X
vmla.f64 d4 , d30, d10
fldmiad XO , { d31 }
vldmia.f64 XO , { d31 }
add XO, XO, INC_X
vmla.f64 d4 , d31, d11

@@ -272,8 +272,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmiad XO , { d2 }
fldmiad AO1!, { d8 }
vldmia.f64 XO , { d2 }
vldmia.f64 AO1!, { d8 }
add XO, XO, INC_X
vmla.f64 d4 , d2 , d8

@@ -281,9 +281,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmiad YO, { d24 }
vldmia.f64 YO, { d24 }
vmla.f64 d24, d0, d4
fstmiad YO, { d24 }
vstmia.f64 YO, { d24 }
add YO, YO, INC_Y

.endm
@@ -300,15 +300,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X4

fldmias XO! , { s28 - s31 }
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s16 - s17 }
vldmia.f32 XO! , { s28 - s31 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s16 - s17 }
vmla.f32 s4 , s28 , s8
vmla.f32 s5 , s28 , s16
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9
vmla.f32 s5 , s29 , s17
fldmias AO2!, { s18 - s19 }
vldmia.f32 AO2!, { s18 - s19 }
vmla.f32 s4 , s30, s10
vmla.f32 s5 , s30, s18
vmla.f32 s4 , s31, s11
@@ -319,9 +319,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmias XO! , { s2 }
fldmias AO1!, { s8 }
fldmias AO2!, { s16 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1!, { s8 }
vldmia.f32 AO2!, { s16 }
vmla.f32 s4 , s2 , s8
vmla.f32 s5 , s2 , s16

@@ -329,10 +329,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmias YO, { s24 - s25 }
vldmia.f32 YO, { s24 - s25 }
vmla.f32 s24, s0, s4
vmla.f32 s25, s0, s5
fstmias YO!, { s24 - s25 }
vstmia.f32 YO!, { s24 - s25 }

.endm

@@ -345,22 +345,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X4

fldmias XO , { s28 }
vldmia.f32 XO , { s28 }
add XO, XO, INC_X
fldmias AO1!, { s8 - s9 }
fldmias AO2!, { s16 - s17 }
vldmia.f32 AO1!, { s8 - s9 }
vldmia.f32 AO2!, { s16 - s17 }
vmla.f32 s4 , s28 , s8
fldmias XO , { s29 }
vldmia.f32 XO , { s29 }
add XO, XO, INC_X
vmla.f32 s5 , s28 , s16
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9
fldmias XO , { s30 }
vldmia.f32 XO , { s30 }
add XO, XO, INC_X
vmla.f32 s5 , s29 , s17
fldmias AO2!, { s18 - s19 }
vldmia.f32 AO2!, { s18 - s19 }
vmla.f32 s4 , s30, s10
fldmias XO , { s31 }
vldmia.f32 XO , { s31 }
add XO, XO, INC_X
vmla.f32 s5 , s30, s18
vmla.f32 s4 , s31, s11
@@ -371,10 +371,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmias XO , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1!, { s8 }
add XO, XO, INC_X
fldmias AO2!, { s16 }
vldmia.f32 AO2!, { s16 }
vmla.f32 s4 , s2 , s8
vmla.f32 s5 , s2 , s16

@@ -382,14 +382,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y

fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s5
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y

.endm
@@ -402,10 +402,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X4

fldmias XO! , { s28 - s31 }
fldmias AO1!, { s8 - s9 }
vldmia.f32 XO! , { s28 - s31 }
vldmia.f32 AO1!, { s8 - s9 }
vmla.f32 s4 , s28 , s8
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9
vmla.f32 s4 , s30, s10
vmla.f32 s4 , s31, s11
@@ -415,17 +415,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmias XO! , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO! , { s2 }
vldmia.f32 AO1!, { s8 }
vmla.f32 s4 , s2 , s8

.endm

.macro SAVE_F1

fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4
fstmias YO!, { s24 }
vstmia.f32 YO!, { s24 }

.endm

@@ -437,18 +437,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X4

fldmias XO , { s28 }
vldmia.f32 XO , { s28 }
add XO, XO, INC_X
fldmias AO1!, { s8 - s9 }
vldmia.f32 AO1!, { s8 - s9 }
vmla.f32 s4 , s28 , s8
fldmias XO , { s29 }
vldmia.f32 XO , { s29 }
add XO, XO, INC_X
fldmias AO1!, { s10 - s11 }
vldmia.f32 AO1!, { s10 - s11 }
vmla.f32 s4 , s29 , s9
fldmias XO , { s30 }
vldmia.f32 XO , { s30 }
add XO, XO, INC_X
vmla.f32 s4 , s30, s10
fldmias XO , { s31 }
vldmia.f32 XO , { s31 }
add XO, XO, INC_X
vmla.f32 s4 , s31, s11

@@ -457,8 +457,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmias XO , { s2 }
fldmias AO1!, { s8 }
vldmia.f32 XO , { s2 }
vldmia.f32 AO1!, { s8 }
add XO, XO, INC_X
vmla.f32 s4 , s2 , s8

@@ -466,9 +466,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmias YO, { s24 }
vldmia.f32 YO, { s24 }
vmla.f32 s24, s0, s4
fstmias YO, { s24 }
vstmia.f32 YO, { s24 }
add YO, YO, INC_Y

.endm


+ 16
- 16
kernel/arm/iamax_vfp.S View File

@@ -114,7 +114,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F

fldmiad X!, { d0 }
vldmia.f64 X!, { d0 }
VABS( d0, d0 )
mov Z, #1
mov INDEX, Z
@@ -123,7 +123,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
add Z, Z, #1
VABS( d4, d4 )
vcmpe.f64 d4, d0
@@ -135,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S

fldmiad X, { d0 }
vldmia.f64 X, { d0 }
VABS( d0, d0 )
mov Z, #1
mov INDEX, Z
@@ -146,7 +146,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
add Z, Z, #1
VABS( d4, d4 )
vcmpe.f64 d4, d0
@@ -161,7 +161,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F

fldmias X!, { s0 }
vldmia.f32 X!, { s0 }
VABS( s0, s0 )
mov Z, #1
mov INDEX, Z
@@ -170,7 +170,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
add Z, Z, #1
VABS( s4, s4 )
vcmpe.f32 s4, s0
@@ -182,7 +182,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S

fldmias X, { s0 }
vldmia.f32 X, { s0 }
VABS( s0, s0 )
mov Z, #1
mov INDEX, Z
@@ -193,7 +193,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
add Z, Z, #1
VABS( s4, s4 )
vcmpe.f32 s4, s0
@@ -215,7 +215,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F

fldmiad X!, { d0 -d1 }
vldmia.f64 X!, { d0 -d1 }
vabs.f64 d0, d0
vabs.f64 d1, d1
vadd.f64 d0 , d0, d1
@@ -227,7 +227,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }
add Z, Z, #1
vabs.f64 d4, d4
vabs.f64 d5, d5
@@ -241,7 +241,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S

fldmiad X, { d0 -d1 }
vldmia.f64 X, { d0 -d1 }
vabs.f64 d0, d0
vabs.f64 d1, d1
vadd.f64 d0 , d0, d1
@@ -255,7 +255,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
add Z, Z, #1
vabs.f64 d4, d4
vabs.f64 d5, d5
@@ -272,7 +272,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_F

fldmias X!, { s0 -s1 }
vldmia.f32 X!, { s0 -s1 }
vabs.f32 s0, s0
vabs.f32 s1, s1
vadd.f32 s0 , s0, s1
@@ -284,7 +284,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }
add Z, Z, #1
vabs.f32 s4, s4
vabs.f32 s5, s5
@@ -298,7 +298,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro INIT_S

fldmias X, { s0 -s1 }
vldmia.f32 X, { s0 -s1 }
vabs.f32 s0, s0
vabs.f32 s1, s1
vadd.f32 s0 , s0, s1
@@ -312,7 +312,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
add Z, Z, #1
vabs.f32 s4, s4
vabs.f32 s5, s5


+ 8
- 8
kernel/arm/nrm2_vfp.S View File

@@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
@@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@:

.macro KERNEL_S1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT
@@ -121,7 +121,7 @@ KERNEL_S1_NEXT:

.macro KERNEL_F1

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
@@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@:

.macro KERNEL_S1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT
@@ -191,7 +191,7 @@ KERNEL_S1_NEXT:

.macro KERNEL_F1

fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -249,7 +249,7 @@ KERNEL_F1_END_\@:

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -294,7 +294,7 @@ KERNEL_S1_END_\@:

.macro KERNEL_F1

fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -350,7 +350,7 @@ KERNEL_F1_END_\@:

.macro KERNEL_S1

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr


+ 8
- 8
kernel/arm/nrm2_vfpv3.S View File

@@ -58,7 +58,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 }
vldmia.f64 X!, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
@@ -95,7 +95,7 @@ KERNEL_F1_NEXT_\@:

.macro KERNEL_S1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT
@@ -121,7 +121,7 @@ KERNEL_S1_NEXT:

.macro KERNEL_F1

fldmias X!, { s4 }
vldmia.f32 X!, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_F1_NEXT_\@
@@ -158,7 +158,7 @@ KERNEL_F1_NEXT_\@:

.macro KERNEL_S1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
beq KERNEL_S1_NEXT
@@ -191,7 +191,7 @@ KERNEL_S1_NEXT:

.macro KERNEL_F1

fldmiad X!, { d4 - d5 }
vldmia.f64 X!, { d4 - d5 }

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -249,7 +249,7 @@ KERNEL_F1_END_\@:

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }

vcmpe.f64 d4, d6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -294,7 +294,7 @@ KERNEL_S1_END_\@:

.macro KERNEL_F1

fldmias X!, { s4 - s5 }
vldmia.f32 X!, { s4 - s5 }

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr
@@ -350,7 +350,7 @@ KERNEL_F1_END_\@:

.macro KERNEL_S1

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }

vcmpe.f32 s4, s6 // compare with 0.0
vmrs APSR_nzcv, fpscr


+ 112
- 112
kernel/arm/rot_vfp.S View File

@@ -77,68 +77,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

.endm


.macro KERNEL_F1

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

.endm

.macro KERNEL_S1

fldmiad X, { d4 }
fldmiad Y, { d5 }
vldmia.f64 X, { d4 }
vldmia.f64 Y, { d5 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d5
vmul.f64 d3 , d0, d5
vmls.f64 d3 , d1, d4
fstmiad X, { d2 }
fstmiad Y, { d3 }
vstmia.f64 X, { d2 }
vstmia.f64 Y, { d3 }

add X, X, INC_X
add Y, Y, INC_Y
@@ -149,68 +149,68 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

.endm


.macro KERNEL_F1

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

.endm

.macro KERNEL_S1

fldmias X, { s4 }
fldmias Y, { s5 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s5 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s5
vmul.f32 s3 , s0, s5
vmls.f32 s3 , s1, s4
fstmias X, { s2 }
fstmias Y, { s3 }
vstmia.f32 X, { s2 }
vstmia.f32 Y, { s3 }

add X, X, INC_X
add Y, Y, INC_Y
@@ -230,96 +230,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }

.endm


.macro KERNEL_F1

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
vmls.f64 d3 , d1, d4
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }
vmul.f64 d2 , d0, d5
fmacd d2 , d1, d7
vmul.f64 d3 , d0, d7
vmls.f64 d3 , d1, d5
fstmiad X!, { d2 }
fstmiad Y!, { d3 }
vstmia.f64 X!, { d2 }
vstmia.f64 Y!, { d3 }


.endm

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
fldmiad Y, { d6 - d7 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d6 - d7 }
vmul.f64 d2 , d0, d4
fmacd d2 , d1, d6
vmul.f64 d3 , d0, d6
@@ -347,96 +347,96 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }

.endm


.macro KERNEL_F1

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6
vmls.f32 s3 , s1, s4
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }
vmul.f32 s2 , s0, s5
fmacs s2 , s1, s7
vmul.f32 s3 , s0, s7
vmls.f32 s3 , s1, s5
fstmias X!, { s2 }
fstmias Y!, { s3 }
vstmia.f32 X!, { s2 }
vstmia.f32 Y!, { s3 }


.endm

.macro KERNEL_S1

fldmias X, { s4 - s5 }
fldmias Y, { s6 - s7 }
vldmia.f32 X, { s4 - s5 }
vldmia.f32 Y, { s6 - s7 }
vmul.f32 s2 , s0, s4
fmacs s2 , s1, s6
vmul.f32 s3 , s0, s6


+ 38
- 38
kernel/arm/scal_vfp.S View File

@@ -64,30 +64,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_F4

pld [ X, #X_PRE ]
fldmiad X, { d4 - d7 }
vldmia.f64 X, { d4 - d7 }
vmul.f64 d4, d4, d0
vmul.f64 d5, d5, d0
vmul.f64 d6, d6, d0
fstmiad X!, { d4 - d5 }
vstmia.f64 X!, { d4 - d5 }
vmul.f64 d7, d7, d0
fstmiad X!, { d6 - d7 }
vstmia.f64 X!, { d6 - d7 }

.endm


.macro KERNEL_F1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vmul.f64 d4, d4, d0
fstmiad X!, { d4 }
vstmia.f64 X!, { d4 }

.endm

.macro KERNEL_S1

fldmiad X, { d4 }
vldmia.f64 X, { d4 }
vmul.f64 d4, d4, d0
fstmiad X, { d4 }
vstmia.f64 X, { d4 }
add X, X, INC_X

.endm
@@ -96,30 +96,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X, { s4 - s7 }
vldmia.f32 X, { s4 - s7 }
vmul.f32 s4, s4, s0
vmul.f32 s5, s5, s0
vmul.f32 s6, s6, s0
fstmias X!, { s4 - s5 }
vstmia.f32 X!, { s4 - s5 }
vmul.f32 s7, s7, s0
fstmias X!, { s6 - s7 }
vstmia.f32 X!, { s6 - s7 }

.endm


.macro KERNEL_F1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vmul.f32 s4, s4, s0
fstmias X!, { s4 }
vstmia.f32 X!, { s4 }

.endm

.macro KERNEL_S1

fldmias X, { s4 }
vldmia.f32 X, { s4 }
vmul.f32 s4, s4, s0
fstmias X, { s4 }
vstmia.f32 X, { s4 }
add X, X, INC_X

.endm
@@ -136,58 +136,58 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }

pld [ X, #X_PRE ]

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }

.endm


.macro KERNEL_F1

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X!, { d2 - d3 }
vstmia.f64 X!, { d2 - d3 }

.endm

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
vldmia.f64 X, { d4 - d5 }
vmul.f64 d2, d0, d4
vmls.f64 d2, d1, d5
vmul.f64 d3, d0, d5
fmacd d3, d1, d4
fstmiad X, { d2 - d3 }
vstmia.f64 X, { d2 - d3 }
add X, X, INC_X

.endm
@@ -199,56 +199,56 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }

.endm


.macro KERNEL_F1

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X!, { s2 - s3 }
vstmia.f32 X!, { s2 - s3 }

.endm

.macro KERNEL_S1

fldmias X, { s4 - s5 }
vldmia.f32 X, { s4 - s5 }
vmul.f32 s2, s0, s4
vmls.f32 s2, s1, s5
vmul.f32 s3, s0, s5
fmacs s3, s1, s4
fstmias X, { s2 - s3 }
vstmia.f32 X, { s2 - s3 }
add X, X, INC_X

.endm


+ 16
- 16
kernel/arm/scopy_vfp.S View File

@@ -65,17 +65,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_F8

pld [ X, #X_PRE ]
fldmias X!, { s0 - s3 }
fldmias X!, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias Y!, { s4 - s7 }
vldmia.f32 X!, { s0 - s3 }
vldmia.f32 X!, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 Y!, { s4 - s7 }

.endm

.macro COPY_F1

fldmias X!, { s0 }
fstmias Y!, { s0 }
vldmia.f32 X!, { s0 }
vstmia.f32 Y!, { s0 }

.endm

@@ -85,23 +85,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4

nop
fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s1 }
fstmias Y, { s1 }
vldmia.f32 X, { s1 }
vstmia.f32 Y, { s1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s1 }
fstmias Y, { s1 }
vldmia.f32 X, { s1 }
vstmia.f32 Y, { s1 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -110,8 +110,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY_S1

fldmias X, { s0 }
fstmias Y, { s0 }
vldmia.f32 X, { s0 }
vstmia.f32 Y, { s0 }
add X, X, INC_X
add Y, Y, INC_Y



+ 36
- 36
kernel/arm/sdot_vfp.S View File

@@ -68,26 +68,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4

fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4

fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4

fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
@@ -96,8 +96,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s14 }
fldmias Y!, { s15 }
vldmia.f32 X!, { s14 }
vldmia.f32 Y!, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
@@ -109,32 +109,32 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

nop

fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
add X, X, INC_X
add Y, Y, INC_Y

fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
@@ -146,8 +146,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s14 }
fldmias Y, { s15 }
vldmia.f32 X, { s14 }
vldmia.f32 Y, { s15 }
vmul.f32 s15, s14, s15
vcvt.f64.f32 d4, s15
vadd.f64 d0 , d0, d4
@@ -162,12 +162,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X!, { s8 - s9 }
fldmias Y!, { s4 - s5}
vldmia.f32 X!, { s8 - s9 }
vldmia.f32 Y!, { s4 - s5}
fmacs s0 , s4, s8
fldmias X!, { s10 - s11 }
vldmia.f32 X!, { s10 - s11 }
fmacs s1 , s5, s9
fldmias Y!, { s6 - s7 }
vldmia.f32 Y!, { s6 - s7 }
fmacs s0 , s6, s10
fmacs s1 , s7, s11

@@ -175,8 +175,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmias X!, { s4 }
fldmias Y!, { s8 }
vldmia.f32 X!, { s4 }
vldmia.f32 Y!, { s8 }
fmacs s0 , s4, s8

.endm
@@ -185,26 +185,26 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL_S4

nop
fldmias X, { s4 }
fldmias Y, { s8 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s8 }
add X, X, INC_X
add Y, Y, INC_Y
fmacs s0 , s4, s8

fldmias X, { s5 }
fldmias Y, { s9 }
vldmia.f32 X, { s5 }
vldmia.f32 Y, { s9 }
add X, X, INC_X
add Y, Y, INC_Y
fmacs s1 , s5, s9

fldmias X, { s6 }
fldmias Y, { s10 }
vldmia.f32 X, { s6 }
vldmia.f32 Y, { s10 }
add X, X, INC_X
add Y, Y, INC_Y
fmacs s0 , s6, s10

fldmias X, { s7 }
fldmias Y, { s11 }
vldmia.f32 X, { s7 }
vldmia.f32 Y, { s11 }
add X, X, INC_X
add Y, Y, INC_Y
fmacs s1 , s7, s11
@@ -214,8 +214,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmias X, { s4 }
fldmias Y, { s8 }
vldmia.f32 X, { s4 }
vldmia.f32 Y, { s8 }
add X, X, INC_X
fmacs s0 , s4, s8
add Y, Y, INC_Y


+ 2
- 2
kernel/arm/sgemm_kernel_4x2_vfp.S View File

@@ -112,8 +112,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL4x2_SUB

fldmias AO! , { s0 - s3 }
fldmias BO! , { s4 - s5 }
vldmia.f32 AO! , { s0 - s3 }
vldmia.f32 BO! , { s4 - s5 }

fmacs s8 , s0, s4
fmacs s9 , s1, s4


+ 20
- 20
kernel/arm/sgemm_kernel_4x4_vfpv3.S View File

@@ -136,29 +136,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_I

pld [ AO , #A_PRE ]
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
pld [ BO , #B_PRE ]
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }

fmuls s16 , s0, s8
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s1, s8
fmuls s18 , s2, s8
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s19 , s3, s8

fmuls s20 , s0, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s21 , s1, s9
fmuls s22 , s2, s9
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s23 , s3, s9

fmuls s24 , s0, s10
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s25 , s1, s10
fmuls s26 , s2, s10
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s27 , s3, s10

fmuls s28 , s0, s11
@@ -174,20 +174,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ AO , #A_PRE ]
fmacs s16 , s4, s12
fmacs s17 , s5, s12
fldmias AO!, { s0 - s3 }
vldmia.f32 AO!, { s0 - s3 }
fmacs s18 , s6, s12
pld [ BO , #B_PRE ]
fmacs s19 , s7, s12

fmacs s20 , s4, s13
fldmias BO!, { s8 - s11 }
vldmia.f32 BO!, { s8 - s11 }
fmacs s21 , s5, s13
fmacs s22 , s6, s13
//fldmias AO!, { s2 - s3 }
//vldmia.f32 AO!, { s2 - s3 }
fmacs s23 , s7, s13

fmacs s24 , s4, s14
//fldmias BO!, { s10 - s11 }
//vldmia.f32 BO!, { s10 - s11 }
fmacs s25 , s5, s14
fmacs s26 , s6, s14
fmacs s27 , s7, s14
@@ -203,17 +203,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_M1

fmacs s16 , s0, s8
fldmias AO!, { s4 - s7 }
vldmia.f32 AO!, { s4 - s7 }
fmacs s17 , s1, s8
fmacs s18 , s2, s8
fldmias BO!, { s12 - s15 }
//fldmias AO!, { s6 - s7 }
vldmia.f32 BO!, { s12 - s15 }
//vldmia.f32 AO!, { s6 - s7 }
fmacs s19 , s3, s8

fmacs s20 , s0, s9
fmacs s21 , s1, s9
fmacs s22 , s2, s9
//fldmias BO!, { s14 - s15 }
//vldmia.f32 BO!, { s14 - s15 }
fmacs s23 , s3, s9

fmacs s24 , s0, s10
@@ -300,7 +300,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0, ALPHA
add r4 , CO2, r3

fldmias CO1, { s8 - s11 }
vldmia.f32 CO1, { s8 - s11 }

fmacs s8 , s0 , s16
flds s12, [CO2]
@@ -322,7 +322,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ CO1 , #C_PRE ]

fldmias r4, { s8 - s11 }
vldmia.f32 r4, { s8 - s11 }

fmacs s8 , s0 , s24
fsts s12, [CO2]
@@ -338,7 +338,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add CO2, r4 , r3


fldmias CO2, { s12 - s15 }
vldmia.f32 CO2, { s12 - s15 }

fsts s8 , [r4 ]
fmacs s12, s0 , s28
@@ -350,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fmacs s15, s0 , s31

pld [ r4 , #C_PRE ]
fstmias CO2, { s12 - s15 }
vstmia.f32 CO2, { s12 - s15 }
pld [ CO2 , #C_PRE ]

add CO1, CO1, #16


+ 4
- 4
kernel/arm/sgemm_ncopy_2_vfp.S View File

@@ -73,7 +73,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s3 , [ AO2, #4 ]

add AO1, AO1, #8
fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO2, AO2, #8

.endm
@@ -85,7 +85,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s1 , [ AO2, #0 ]
add AO1, AO1, #4

fstmias BO!, { s0 - s1 }
vstmia.f32 BO!, { s0 - s1 }
add AO2, AO2, #4

.endm
@@ -95,7 +95,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s0 , [ AO1, #0 ]
flds s1 , [ AO1, #4 ]

fstmias BO!, { s0 - s1 }
vstmia.f32 BO!, { s0 - s1 }
add AO1, AO1, #8

.endm
@@ -105,7 +105,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

flds s0 , [ AO1, #0 ]

fstmias BO!, { s0 }
vstmia.f32 BO!, { s0 }
add AO1, AO1, #4

.endm


+ 8
- 8
kernel/arm/sgemm_ncopy_4_vfp.S View File

@@ -100,10 +100,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s11, [ AO4, #8 ]
flds s15, [ AO4, #12 ]

fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO4, AO4, #16
fstmias BO!, { s4 - s7 }
fstmias BO!, { s8 - s15 }
vstmia.f32 BO!, { s4 - s7 }
vstmia.f32 BO!, { s8 - s15 }

.endm

@@ -117,7 +117,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s3 , [ AO4, #0 ]

add AO3, AO3, #4
fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO4, AO4, #4

.endm
@@ -135,7 +135,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s5 , [ AO2, #8 ]
flds s7 , [ AO2, #12 ]

fstmias BO!, { s0 - s7 }
vstmia.f32 BO!, { s0 - s7 }
add AO2, AO2, #16

.endm
@@ -147,7 +147,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s1 , [ AO2, #0 ]
add AO1, AO1, #4

fstmias BO!, { s0 - s1 }
vstmia.f32 BO!, { s0 - s1 }
add AO2, AO2, #4

.endm
@@ -159,7 +159,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
flds s2 , [ AO1, #8 ]
flds s3 , [ AO1, #12 ]

fstmias BO!, { s0 - s3 }
vstmia.f32 BO!, { s0 - s3 }
add AO1, AO1, #16

.endm
@@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

flds s0 , [ AO1, #0 ]

fstmias BO!, { s0 }
vstmia.f32 BO!, { s0 }
add AO1, AO1, #4

.endm


+ 35
- 35
kernel/arm/sgemm_tcopy_4_vfp.S View File

@@ -76,21 +76,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY4x4_1

pld [ AO1, #A_PRE ]
fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

add r3, AO1, LDA
pld [ r3, #A_PRE ]
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }

add r3, r3, LDA
pld [ r3, #A_PRE ]
fldmias r3, { s8 - s11 }
vldmia.f32 r3, { s8 - s11 }

add r3, r3, LDA
pld [ r3, #A_PRE ]
fldmias r3, { s12 - s15 }
vldmia.f32 r3, { s12 - s15 }

fstmias BO1, { s0 - s15 }
vstmia.f32 BO1, { s0 - s15 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -98,18 +98,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY4x4_2

fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }

add r3, r3, LDA
fldmias r3, { s8 - s11 }
vldmia.f32 r3, { s8 - s11 }

add r3, r3, LDA
fldmias r3, { s12 - s15 }
vldmia.f32 r3, { s12 - s15 }

fstmias BO1, { s0 - s15 }
vstmia.f32 BO1, { s0 - s15 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -118,18 +118,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x4

fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }

add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }

add r3, r3, LDA
fldmias r3, { s4 - s5 }
vldmia.f32 r3, { s4 - s5 }

add r3, r3, LDA
fldmias r3, { s6 - s7 }
vldmia.f32 r3, { s6 - s7 }

fstmias BO2, { s0 - s7 }
vstmia.f32 BO2, { s0 - s7 }
add AO1, AO1, #8
add BO2, BO2, #32

@@ -137,18 +137,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x4

fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }

add r3, AO1, LDA
fldmias r3, { s1 }
vldmia.f32 r3, { s1 }

add r3, r3, LDA
fldmias r3, { s2 }
vldmia.f32 r3, { s2 }

add r3, r3, LDA
fldmias r3, { s3 }
vldmia.f32 r3, { s3 }

fstmias BO3, { s0 - s3 }
vstmia.f32 BO3, { s0 - s3 }
add AO1, AO1, #4
add BO3, BO3, #16

@@ -158,12 +158,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY4x2

fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

add r3, AO1, LDA
fldmias r3, { s4 - s7 }
vldmia.f32 r3, { s4 - s7 }

fstmias BO1, { s0 - s7 }
vstmia.f32 BO1, { s0 - s7 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -171,12 +171,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x2

fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }

add r3, AO1, LDA
fldmias r3, { s2 - s3 }
vldmia.f32 r3, { s2 - s3 }

fstmias BO2, { s0 - s3 }
vstmia.f32 BO2, { s0 - s3 }
add AO1, AO1, #8
add BO2, BO2, #16

@@ -184,12 +184,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x2

fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }

add r3, AO1, LDA
fldmias r3, { s1 }
vldmia.f32 r3, { s1 }

fstmias BO3, { s0 - s1 }
vstmia.f32 BO3, { s0 - s1 }
add AO1, AO1, #4
add BO3, BO3, #8

@@ -199,9 +199,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY4x1

fldmias AO1, { s0 - s3 }
vldmia.f32 AO1, { s0 - s3 }

fstmias BO1, { s0 - s3 }
vstmia.f32 BO1, { s0 - s3 }
add AO1, AO1, #16
add BO1, BO1, M4

@@ -209,9 +209,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY2x1

fldmias AO1, { s0 - s1 }
vldmia.f32 AO1, { s0 - s1 }

fstmias BO2, { s0 - s1 }
vstmia.f32 BO2, { s0 - s1 }
add AO1, AO1, #8
add BO2, BO2, #8

@@ -219,9 +219,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x1

fldmias AO1, { s0 }
vldmia.f32 AO1, { s0 }

fstmias BO3, { s0 }
vstmia.f32 BO3, { s0 }
add AO1, AO1, #4
add BO3, BO3, #4



+ 2
- 2
kernel/arm/strmm_kernel_4x2_vfp.S View File

@@ -118,8 +118,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL4x2_SUB

fldmias AO!, { s0 - s3 }
fldmias BO!, { s4 - s5 }
vldmia.f32 AO!, { s0 - s3 }
vldmia.f32 BO!, { s4 - s5 }

fmacs s8 , s0, s4
fmacs s9 , s1, s4


+ 17
- 17
kernel/arm/strmm_kernel_4x4_vfpv3.S View File

@@ -122,30 +122,30 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL4x4_I

fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
pld [ AO , #A_PRE-8 ]
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
pld [ BO , #B_PRE-8 ]

fmuls s16 , s0, s8
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmuls s17 , s1, s8
fmuls s18 , s2, s8
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmuls s19 , s3, s8

fmuls s20 , s0, s9
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmuls s21 , s1, s9
fmuls s22 , s2, s9
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmuls s23 , s3, s9

fmuls s24 , s0, s10
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmuls s25 , s1, s10
fmuls s26 , s2, s10
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmuls s27 , s3, s10

fmuls s28 , s0, s11
@@ -161,20 +161,20 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ AO , #A_PRE ]
fmacs s16 , s4, s12
fmacs s17 , s5, s12
fldmias AO!, { s0 - s1 }
vldmia.f32 AO!, { s0 - s1 }
fmacs s18 , s6, s12
pld [ BO , #B_PRE ]
fmacs s19 , s7, s12

fmacs s20 , s4, s13
fldmias AO!, { s2 - s3 }
vldmia.f32 AO!, { s2 - s3 }
fmacs s21 , s5, s13
fmacs s22 , s6, s13
fldmias BO!, { s8 - s9 }
vldmia.f32 BO!, { s8 - s9 }
fmacs s23 , s7, s13

fmacs s24 , s4, s14
fldmias BO!, { s10 - s11 }
vldmia.f32 BO!, { s10 - s11 }
fmacs s25 , s5, s14
fmacs s26 , s6, s14
fmacs s27 , s7, s14
@@ -190,17 +190,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro KERNEL4x4_M1

fmacs s16 , s0, s8
fldmias AO!, { s4 - s5 }
vldmia.f32 AO!, { s4 - s5 }
fmacs s17 , s1, s8
fmacs s18 , s2, s8
fldmias AO!, { s6 - s7 }
vldmia.f32 AO!, { s6 - s7 }
fmacs s19 , s3, s8

fmacs s20 , s0, s9
fldmias BO!, { s12 - s13 }
vldmia.f32 BO!, { s12 - s13 }
fmacs s21 , s1, s9
fmacs s22 , s2, s9
fldmias BO!, { s14 - s15 }
vldmia.f32 BO!, { s14 - s15 }
fmacs s23 , s3, s9

fmacs s24 , s0, s10
@@ -325,7 +325,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fsts s11, [r4 , #12 ]
fmuls s15, s0 , s31

fstmias CO2, { s12 - s15 }
vstmia.f32 CO2, { s12 - s15 }

add CO1, CO1, #16



+ 56
- 56
kernel/arm/swap_vfp.S View File

@@ -103,29 +103,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}

.endm


.macro KERNEL_F1

fldmiad X, { d0 }
fldmiad Y, { d4 }
fstmiad Y!, { d0 }
fstmiad X!, { d4 }
vldmia.f64 X, { d0 }
vldmia.f64 Y, { d4 }
vstmia.f64 Y!, { d0 }
vstmia.f64 X!, { d4 }

.endm

.macro KERNEL_S1

fldmiad X, { d0 }
fldmiad Y, { d4 }
fstmiad Y, { d0 }
fstmiad X, { d4 }
vldmia.f64 X, { d0 }
vldmia.f64 Y, { d4 }
vstmia.f64 Y, { d0 }
vstmia.f64 X, { d4 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -135,29 +135,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F4

fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}

.endm


.macro KERNEL_F1

fldmias X, { s0 }
fldmias Y, { s4 }
fstmias Y!, { s0 }
fstmias X!, { s4 }
vldmia.f32 X, { s0 }
vldmia.f32 Y, { s4 }
vstmia.f32 Y!, { s0 }
vstmia.f32 X!, { s4 }

.endm

.macro KERNEL_S1

fldmias X, { s0 }
fldmias Y, { s4 }
fstmias Y, { s0 }
fstmias X, { s4 }
vldmia.f32 X, { s0 }
vldmia.f32 Y, { s4 }
vstmia.f32 Y, { s0 }
vstmia.f32 X, { s4 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -174,35 +174,35 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]
fldmiad X, { d0 - d3 }
fldmiad Y, { d4 - d7 }
fstmiad Y!, { d0 - d3 }
fstmiad X!, { d4 - d7}
vldmia.f64 X, { d0 - d3 }
vldmia.f64 Y, { d4 - d7 }
vstmia.f64 Y!, { d0 - d3 }
vstmia.f64 X!, { d4 - d7}

.endm

.macro KERNEL_F1

fldmiad X, { d0 - d1 }
fldmiad Y, { d4 - d5 }
fstmiad Y!, { d0 - d1 }
fstmiad X!, { d4 - d5 }
vldmia.f64 X, { d0 - d1 }
vldmia.f64 Y, { d4 - d5 }
vstmia.f64 Y!, { d0 - d1 }
vstmia.f64 X!, { d4 - d5 }

.endm

.macro KERNEL_S1

fldmiad X, { d0 - d1 }
fldmiad Y, { d4 - d5 }
fstmiad Y, { d0 - d1 }
fstmiad X, { d4 - d5 }
vldmia.f64 X, { d0 - d1 }
vldmia.f64 Y, { d4 - d5 }
vstmia.f64 Y, { d0 - d1 }
vstmia.f64 X, { d4 - d5 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -215,33 +215,33 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]
fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}

fldmias X, { s0 - s3 }
fldmias Y, { s4 - s7 }
fstmias Y!, { s0 - s3 }
fstmias X!, { s4 - s7}
vldmia.f32 X, { s0 - s3 }
vldmia.f32 Y, { s4 - s7 }
vstmia.f32 Y!, { s0 - s3 }
vstmia.f32 X!, { s4 - s7}

.endm

.macro KERNEL_F1

fldmias X, { s0 - s1 }
fldmias Y, { s4 - s5 }
fstmias Y!, { s0 - s1 }
fstmias X!, { s4 - s5 }
vldmia.f32 X, { s0 - s1 }
vldmia.f32 Y, { s4 - s5 }
vstmia.f32 Y!, { s0 - s1 }
vstmia.f32 X!, { s4 - s5 }

.endm

.macro KERNEL_S1

fldmias X, { s0 - s1 }
fldmias Y, { s4 - s5 }
fstmias Y, { s0 - s1 }
fstmias X, { s4 - s5 }
vldmia.f32 X, { s0 - s1 }
vldmia.f32 Y, { s4 - s5 }
vstmia.f32 Y, { s0 - s1 }
vstmia.f32 X, { s4 - s5 }
add X, X, INC_X
add Y, Y, INC_Y



+ 14
- 14
kernel/arm/zcopy_vfp.S View File

@@ -66,15 +66,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ X, #X_PRE ]
pld [ X, #X_PRE+32 ]
fldmiad X!, { d0 - d7 }
fstmiad Y!, { d0 - d7 }
vldmia.f64 X!, { d0 - d7 }
vstmia.f64 Y!, { d0 - d7 }

.endm

.macro COPY_F1

fldmiad X!, { d0 - d1 }
fstmiad Y!, { d0 - d1 }
vldmia.f64 X!, { d0 - d1 }
vstmia.f64 Y!, { d0 - d1 }

.endm

@@ -84,23 +84,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY_S4

nop
fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d2 - d3 }
fstmiad Y, { d2 - d3 }
vldmia.f64 X, { d2 - d3 }
vstmia.f64 Y, { d2 - d3 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d2 - d3 }
fstmiad Y, { d2 - d3 }
vldmia.f64 X, { d2 - d3 }
vstmia.f64 Y, { d2 - d3 }
add X, X, INC_X
add Y, Y, INC_Y

@@ -109,8 +109,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY_S1

fldmiad X, { d0 - d1 }
fstmiad Y, { d0 - d1 }
vldmia.f64 X, { d0 - d1 }
vstmia.f64 Y, { d0 - d1 }
add X, X, INC_X
add Y, Y, INC_Y



+ 20
- 20
kernel/arm/zdot_vfp.S View File

@@ -76,15 +76,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
pld [ X, #X_PRE ]
pld [ Y, #X_PRE ]

fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
fmacd d2 , d5, d9
fmacd d3 , d5, d8

fldmiad Y!, { d10 - d11 }
vldmia.f64 Y!, { d10 - d11 }
fmacd d0 , d6, d10
fmacd d1 , d6, d11
pld [ X, #X_PRE ]
@@ -93,15 +93,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

pld [ Y, #X_PRE ]

fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fldmiad X!, { d6 - d7 }
vldmia.f64 X!, { d6 - d7 }
fmacd d2 , d5, d9
fmacd d3 , d5, d8

fldmiad Y!, { d10 - d11 }
vldmia.f64 Y!, { d10 - d11 }
fmacd d0 , d6, d10
fmacd d1 , d6, d11
fmacd d2 , d7, d11
@@ -111,8 +111,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1

fldmiad X!, { d4 - d5 }
fldmiad Y!, { d8 - d9 }
vldmia.f64 X!, { d4 - d5 }
vldmia.f64 Y!, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9
@@ -127,8 +127,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

nop

fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9
@@ -136,8 +136,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9
@@ -145,8 +145,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9
@@ -154,8 +154,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
add X, X, INC_X
add Y, Y, INC_Y

fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9
@@ -168,8 +168,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1

fldmiad X, { d4 - d5 }
fldmiad Y, { d8 - d9 }
vldmia.f64 X, { d4 - d5 }
vldmia.f64 Y, { d8 - d9 }
fmacd d0 , d4, d8
fmacd d1 , d4, d9
fmacd d2 , d5, d9


+ 12
- 12
kernel/arm/zgemm_kernel_2x2_vfp.S View File

@@ -360,7 +360,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -372,9 +372,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

fldmiad CO2, { d4 - d7 }
vldmia.f64 CO2, { d4 - d7 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -386,7 +386,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad CO2, { d4 - d7 }
vstmia.f64 CO2, { d4 - d7 }

add CO1, CO1, #32

@@ -543,23 +543,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

fldmiad CO2, { d4 - d5 }
vldmia.f64 CO2, { d4 - d5 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad CO2, { d4 - d5 }
vstmia.f64 CO2, { d4 - d5 }

add CO1, CO1, #16

@@ -714,7 +714,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -726,7 +726,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

add CO1, CO1, #32

@@ -843,14 +843,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

add CO1, CO1, #16



+ 12
- 12
kernel/arm/zgemm_kernel_2x2_vfpv3.S View File

@@ -374,8 +374,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d7 }
fldmiad CO2, { d8 - d11 }
vldmia.f64 CO1, { d4 - d7 }
vldmia.f64 CO2, { d8 - d11 }

FADD_R d16, d24 , d16
FADD_I d17, d25 , d17
@@ -406,8 +406,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d10, d1 , d23
FMAC_I2 d11, d1 , d22

fstmiad CO1, { d4 - d7 }
fstmiad CO2, { d8 - d11 }
vstmia.f64 CO1, { d4 - d7 }
vstmia.f64 CO2, { d8 - d11 }

add CO1, CO1, #32

@@ -570,8 +570,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d5 }
fldmiad CO2, { d8 - d9 }
vldmia.f64 CO1, { d4 - d5 }
vldmia.f64 CO2, { d8 - d9 }

FADD_R d16, d24 , d16
FADD_I d17, d25 , d17
@@ -588,8 +588,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d8 , d1 , d21
FMAC_I2 d9 , d1 , d20

fstmiad CO1, { d4 - d5 }
fstmiad CO2, { d8 - d9 }
vstmia.f64 CO1, { d4 - d5 }
vstmia.f64 CO2, { d8 - d9 }

add CO1, CO1, #16

@@ -752,7 +752,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d7 }
vldmia.f64 CO1, { d4 - d7 }

FADD_R d16, d24 , d16
FADD_I d17, d25 , d17
@@ -769,7 +769,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d19
FMAC_I2 d7 , d1 , d18

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

add CO1, CO1, #32

@@ -887,7 +887,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad CO1, { d4 - d5 }
vldmia.f64 CO1, { d4 - d5 }

FADD_R d16, d24 , d16
FADD_I d17, d25 , d17
@@ -897,7 +897,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d17
FMAC_I2 d5 , d1 , d16

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

add CO1, CO1, #16



+ 4
- 4
kernel/arm/zgemm_ncopy_2_vfp.S View File

@@ -87,7 +87,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d6 , [ AO2, #16 ]
fldd d7 , [ AO2, #24 ]

fstmiad BO!, { d0 - d7 }
vstmia.f64 BO!, { d0 - d7 }
add AO2, AO2, #32

.endm
@@ -101,7 +101,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d3 , [ AO2, #8 ]

add AO1, AO1, #16
fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO2, AO2, #16

.endm
@@ -113,7 +113,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d2 , [ AO1, #16 ]
fldd d3 , [ AO1, #24 ]

fstmiad BO!, { d0 - d3 }
vstmia.f64 BO!, { d0 - d3 }
add AO1, AO1, #32

.endm
@@ -124,7 +124,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0 , [ AO1, #0 ]
fldd d1 , [ AO1, #8 ]

fstmiad BO!, { d0 - d1 }
vstmia.f64 BO!, { d0 - d1 }
add AO1, AO1, #16

.endm


+ 10
- 10
kernel/arm/zgemm_tcopy_2_vfp.S View File

@@ -74,13 +74,13 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.macro COPY2x2

pld [ AO1, #A_PRE ]
fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }

add r3, AO1, LDA
pld [ r3, #A_PRE ]
fldmiad r3, { d4 - d7 }
vldmia.f64 r3, { d4 - d7 }

fstmiad BO1, { d0 - d7 }
vstmia.f64 BO1, { d0 - d7 }
add AO1, AO1, #32
add BO1, BO1, M4

@@ -88,12 +88,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x2

fldmiad AO1, { d0 -d1 }
vldmia.f64 AO1, { d0 -d1 }

add r3, AO1, LDA
fldmiad r3, { d2 - d3 }
vldmia.f64 r3, { d2 - d3 }

fstmiad BO2, { d0 - d3 }
vstmia.f64 BO2, { d0 - d3 }
add AO1, AO1, #16
add BO2, BO2, #32

@@ -102,9 +102,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/*************************************************************************************************************************/
.macro COPY2x1

fldmiad AO1, { d0 - d3 }
vldmia.f64 AO1, { d0 - d3 }

fstmiad BO1, { d0 - d3 }
vstmia.f64 BO1, { d0 - d3 }
add AO1, AO1, #32
add BO1, BO1, M4

@@ -112,9 +112,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro COPY1x1

fldmiad AO1, { d0 - d1 }
vldmia.f64 AO1, { d0 - d1 }

fstmiad BO2, { d0 - d1 }
vstmia.f64 BO2, { d0 - d1 }
add AO1, AO1, #16
add BO2, BO2, #16



+ 16
- 16
kernel/arm/zgemv_n_vfp.S View File

@@ -204,7 +204,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
@@ -216,9 +216,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }

fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -230,7 +230,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }

.endm

@@ -269,14 +269,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, #16

@@ -352,47 +352,47 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, INC_Y

fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }

FMAC_R1 d6 , d0 , d10
FMAC_I1 d7 , d0 , d11
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }

add YO, YO, INC_Y

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, INC_Y

fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }

FMAC_R1 d6 , d0 , d14
FMAC_I1 d7 , d0 , d15
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }

add YO, YO, INC_Y

@@ -433,14 +433,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
fldd d0, ALPHA_R
fldd d1, ALPHA_I

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d8
FMAC_I1 d5 , d0 , d9
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, INC_Y



+ 20
- 20
kernel/arm/zgemv_t_vfp.S View File

@@ -151,12 +151,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F2X1

fldmiad XO! , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO! , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }

fmacd d12 , d4 , d2
fmacd d13 , d4 , d3
fldmiad AO2!, { d8 - d9 }
vldmia.f64 AO2!, { d8 - d9 }
KMAC_R d12 , d5 , d3
KMAC_I d13 , d5 , d2

@@ -169,7 +169,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F2

fldmiad YO, { d4 - d7 }
vldmia.f64 YO, { d4 - d7 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
@@ -181,7 +181,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad YO!, { d4 - d7 }
vstmia.f64 YO!, { d4 - d7 }

.endm

@@ -205,8 +205,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_F1X1

fldmiad XO! , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO! , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }

fmacd d12 , d4 , d2
fmacd d13 , d4 , d3
@@ -217,14 +217,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_F1

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad YO!, { d4 - d5 }
vstmia.f64 YO!, { d4 - d5 }

.endm

@@ -250,9 +250,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S2X1

fldmiad XO , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
fldmiad AO2!, { d8 - d9 }
vldmia.f64 XO , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }
vldmia.f64 AO2!, { d8 - d9 }

fmacd d12 , d4 , d2
fmacd d13 , d4 , d3
@@ -270,25 +270,25 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S2

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, INC_Y

fldmiad YO, { d6 - d7 }
vldmia.f64 YO, { d6 - d7 }

FMAC_R1 d6 , d0 , d14
FMAC_I1 d7 , d0 , d15
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad YO, { d6 - d7 }
vstmia.f64 YO, { d6 - d7 }

add YO, YO, INC_Y

@@ -314,8 +314,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro KERNEL_S1X1

fldmiad XO , { d2 - d3 }
fldmiad AO1!, { d4 - d5 }
vldmia.f64 XO , { d2 - d3 }
vldmia.f64 AO1!, { d4 - d5 }

fmacd d12 , d4 , d2
fmacd d13 , d4 , d3
@@ -328,14 +328,14 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

.macro SAVE_S1

fldmiad YO, { d4 - d5 }
vldmia.f64 YO, { d4 - d5 }

FMAC_R1 d4 , d0 , d12
FMAC_I1 d5 , d0 , d13
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad YO, { d4 - d5 }
vstmia.f64 YO, { d4 - d5 }

add YO, YO, INC_Y



+ 6
- 6
kernel/arm/ztrmm_kernel_2x2_vfp.S View File

@@ -385,7 +385,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

fldd d4 , FP_ZERO
vmov.f64 d5 , d4
@@ -402,7 +402,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d15
FMAC_I2 d7 , d1 , d14

fstmiad CO2, { d4 - d7 }
vstmia.f64 CO2, { d4 - d7 }

add CO1, CO1, #32

@@ -567,7 +567,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

fldd d4 , FP_ZERO
vmov.f64 d5 , d4
@@ -577,7 +577,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d13
FMAC_I2 d5 , d1 , d12

fstmiad CO2, { d4 - d5 }
vstmia.f64 CO2, { d4 - d5 }

add CO1, CO1, #16

@@ -747,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d11
FMAC_I2 d7 , d1 , d10

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

add CO1, CO1, #32

@@ -872,7 +872,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d9
FMAC_I2 d5 , d1 , d8

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

add CO1, CO1, #16



+ 6
- 6
kernel/arm/ztrmm_kernel_2x2_vfpv3.S View File

@@ -391,8 +391,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d10, d1 , d23
FMAC_I2 d11, d1 , d22

fstmiad CO1, { d4 - d7 }
fstmiad CO2, { d8 - d11 }
vstmia.f64 CO1, { d4 - d7 }
vstmia.f64 CO2, { d8 - d11 }

add CO1, CO1, #32

@@ -569,8 +569,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d8 , d1 , d21
FMAC_I2 d9 , d1 , d20

fstmiad CO1, { d4 - d5 }
fstmiad CO2, { d8 - d9 }
vstmia.f64 CO1, { d4 - d5 }
vstmia.f64 CO2, { d8 - d9 }

add CO1, CO1, #16

@@ -747,7 +747,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d6 , d1 , d19
FMAC_I2 d7 , d1 , d18

fstmiad CO1, { d4 - d7 }
vstmia.f64 CO1, { d4 - d7 }

add CO1, CO1, #32

@@ -872,7 +872,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
FMAC_R2 d4 , d1 , d17
FMAC_I2 d5 , d1 , d16

fstmiad CO1, { d4 - d5 }
vstmia.f64 CO1, { d4 - d5 }

add CO1, CO1, #16



+ 14
- 0
kernel/mips64/axpy_loongson3a.S View File

@@ -270,6 +270,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
.align 5

.L20:
beqz INCY, .L27
dsra I, N, 3
move YY, Y

@@ -450,5 +451,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

j $31
NOP
.align 3
.L27:
LD b1, 0 * SIZE(Y)

.L28:
daddiu N, N, -1
LD a1, 0 * SIZE(X)
daddu X, X, INCX
bgtz N, .L28
MADD b1, b1, ALPHA, a1

j .L999
ST b1, 0 * SIZE(Y)
EPILOGUE

+ 14
- 0
kernel/mips64/daxpy_loongson3a_simd.S View File

@@ -562,6 +562,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

//INCX!=1 or INCY != 1
.L20:
beq INCY, $0, .L27
dsra I, N, 3
move YY, Y

@@ -754,5 +755,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

j $31
NOP
.align 3
.L27:
LD b1, 0 * SIZE(Y)

.L28:
daddiu N, N, -1
LD a1, 0 * SIZE(X)
daddu X, X, INCX
bgtz N, .L28
MADD b1, b1, ALPHA, a1

j .L999
ST b1, 0 * SIZE(Y)
EPILOGUE

+ 5
- 11
kernel/x86_64/KERNEL.SKYLAKEX View File

@@ -2,18 +2,12 @@ include $(KERNELDIR)/KERNEL.HASWELL

SGEMMKERNEL = sgemm_kernel_16x4_skylakex.S

DGEMMKERNEL = dgemm_kernel_4x8_skylakex.c

#DTRMMKERNEL = ../generic/trmmkernel_16x2.c
#DGEMMKERNEL = dgemm_kernel_16x2_skylakex.S
#DGEMMINCOPY = ../generic/gemm_ncopy_16.c
#DGEMMITCOPY = ../generic/gemm_tcopy_16.c
#DGEMMONCOPY = ../generic/gemm_ncopy_2.c
#DGEMMOTCOPY = ../generic/gemm_tcopy_2.c
#DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX)
#DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX)
#DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
#DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)

DGEMMINCOPY = ../generic/gemm_ncopy_8.c
DGEMMITCOPY = ../generic/gemm_tcopy_8.c
DGEMMONCOPY = ../generic/gemm_ncopy_8.c
DGEMMOTCOPY = ../generic/gemm_tcopy_8.c

SGEMM_BETA = ../generic/gemm_beta.c
DGEMM_BETA = ../generic/gemm_beta.c

+ 1642
- 0
kernel/x86_64/dgemm_kernel_4x8_skylakex.c
File diff suppressed because it is too large
View File


+ 1
- 1
utest/test_fork.c View File

@@ -31,10 +31,10 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

**********************************************************************************/

#include "openblas_utest.h"
#include <sys/types.h>
#include <sys/wait.h>
#include <cblas.h>
#include "openblas_utest.h"

void* xmalloc(size_t n)
{


Loading…
Cancel
Save