Browse Source

THUNDERX2T99: Add Optimized SGEMM Implementation

tags/v0.2.20^2
Ashwin Sekhar T K 8 years ago
parent
commit
f279ff4789
4 changed files with 2103 additions and 6 deletions
  1. +4
    -0
      driver/others/parameter.c
  2. +6
    -0
      kernel/arm64/KERNEL.THUNDERX2T99
  3. +2087
    -0
      kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S
  4. +6
    -6
      param.h

+ 4
- 0
driver/others/parameter.c View File

@@ -743,6 +743,10 @@ void blas_set_parameter(void)
dgemm_q = 128;
dgemm_r = 4096;

sgemm_p = 128;
sgemm_q = 352;
sgemm_r = 4096;

dgemm_prefetch_size_a = 3584;
dgemm_prefetch_size_b = 512;
dgemm_prefetch_size_c = 128;


+ 6
- 0
kernel/arm64/KERNEL.THUNDERX2T99 View File

@@ -6,3 +6,9 @@ else
DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S
endif

ifeq ($(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N), 16x4)
SGEMMKERNEL = sgemm_kernel_16x4_thunderx2t99.S
else
SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S
endif


+ 2087
- 0
kernel/arm64/sgemm_kernel_16x4_thunderx2t99.S
File diff suppressed because it is too large
View File


+ 6
- 6
param.h View File

@@ -2326,17 +2326,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_P sgemm_p
#define DGEMM_DEFAULT_P dgemm_p
#define CGEMM_DEFAULT_P 256
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 1024
#define SGEMM_DEFAULT_Q sgemm_q
#define DGEMM_DEFAULT_Q dgemm_q
#define CGEMM_DEFAULT_Q 512
#define ZGEMM_DEFAULT_Q 512

#define SGEMM_DEFAULT_R 4096
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R dgemm_r
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048
@@ -2482,17 +2482,17 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ZGEMM_DEFAULT_UNROLL_M 4
#define ZGEMM_DEFAULT_UNROLL_N 4

#define SGEMM_DEFAULT_P 512
#define SGEMM_DEFAULT_P sgemm_p
#define DGEMM_DEFAULT_P dgemm_p
#define CGEMM_DEFAULT_P 256
#define ZGEMM_DEFAULT_P 128

#define SGEMM_DEFAULT_Q 1024
#define SGEMM_DEFAULT_Q sgemm_q
#define DGEMM_DEFAULT_Q dgemm_q
#define CGEMM_DEFAULT_Q 512
#define ZGEMM_DEFAULT_Q 512

#define SGEMM_DEFAULT_R 4096
#define SGEMM_DEFAULT_R sgemm_r
#define DGEMM_DEFAULT_R dgemm_r
#define CGEMM_DEFAULT_R 4096
#define ZGEMM_DEFAULT_R 2048


Loading…
Cancel
Save