Browse Source

Merge pull request #897 from ksraste/develop

STRSM optimized for MSA
tags/v0.2.19^2
Zhang Xianyi GitHub 9 years ago
parent
commit
4a30a2584a
6 changed files with 1362 additions and 2964 deletions
  1. +0
    -4
      kernel/Makefile.L3
  2. +5
    -1
      kernel/mips/macros_msa.h
  3. +325
    -672
      kernel/mips/strsm_kernel_LN_8x8_msa.c
  4. +290
    -695
      kernel/mips/strsm_kernel_LT_8x8_msa.c
  5. +347
    -805
      kernel/mips/strsm_kernel_RN_8x8_msa.c
  6. +395
    -787
      kernel/mips/strsm_kernel_RT_8x8_msa.c

+ 0
- 4
kernel/Makefile.L3 View File

@@ -12,10 +12,6 @@ ifeq ($(ARCH), ia64)
USE_GEMM3M = 1
endif

ifeq ($(ARCH), MIPS)
USE_GEMM3M = 1
endif

ifeq ($(ARCH), arm)
USE_TRMM = 1
endif


+ 5
- 1
kernel/mips/macros_msa.h View File

@@ -42,6 +42,11 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ST_D(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in)
#define ST_DP(...) ST_D(v2f64, __VA_ARGS__)

#define COPY_FLOAT_TO_VECTOR(a, b) \
b = __msa_cast_to_vector_float(a); \
b = (v4f32) __msa_splati_w((v4i32) b, 0);


/* Description : Load 2 vectors of single precision floating point elements with stride
Arguments : Inputs - psrc, stride
Outputs - out0, out1
@@ -178,7 +183,6 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
out2 = (RTYPE) __msa_ilvr_d((v2i64) s3_m, (v2i64) s1_m); \
out3 = (RTYPE) __msa_ilvl_d((v2i64) s3_m, (v2i64) s1_m); \
}

#define TRANSPOSE4x4_SP_SP(...) TRANSPOSE4x4_W(v4f32, __VA_ARGS__)

#endif /* __MACROS_MSA_H__ */

+ 325
- 672
kernel/mips/strsm_kernel_LN_8x8_msa.c
File diff suppressed because it is too large
View File


+ 290
- 695
kernel/mips/strsm_kernel_LT_8x8_msa.c
File diff suppressed because it is too large
View File


+ 347
- 805
kernel/mips/strsm_kernel_RN_8x8_msa.c
File diff suppressed because it is too large
View File


+ 395
- 787
kernel/mips/strsm_kernel_RT_8x8_msa.c
File diff suppressed because it is too large
View File


Loading…
Cancel
Save