Browse Source

Merge pull request #3029 from RajalakshmiSR/axpyp10

POWER10: Improve axpy performance
tags/v0.3.13^2
Martin Kroeker GitHub 5 years ago
parent
commit
043128cbe5
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 38 additions and 17 deletions
  1. +16
    -8
      kernel/power/caxpy_microk_power10.c
  2. +12
    -5
      kernel/power/daxpy_power10.c
  3. +10
    -4
      kernel/power/saxpy_power10.c

+ 16
- 8
kernel/power/caxpy_microk_power10.c View File

@@ -112,10 +112,14 @@ static void caxpy_kernel_8 (long n, float *x, float *y,
"xvmaddasp 38, 58, 33 \n\t" "xvmaddasp 38, 58, 33 \n\t"
"xvmaddasp 39, 59, 33 \n\t" "xvmaddasp 39, 59, 33 \n\t"


"stxvp 48, 0(%4) \n\t"
"stxvp 50, 32(%4) \n\t"
"stxvp 34, 64(%4) \n\t"
"stxvp 38, 96(%4) \n\t"
"stxv 49, 0(%4) \n\t"
"stxv 48, 16(%4) \n\t"
"stxv 51, 32(%4) \n\t"
"stxv 50, 48(%4) \n\t"
"stxv 35, 64(%4) \n\t"
"stxv 34, 80(%4) \n\t"
"stxv 39, 96(%4) \n\t"
"stxv 38, 112(%4) \n\t"


"addi %4, %4, 128 \n\t" "addi %4, %4, 128 \n\t"
"xxperm 52, 40, %x10 \n\t" // exchange real and imag part "xxperm 52, 40, %x10 \n\t" // exchange real and imag part
@@ -159,10 +163,14 @@ static void caxpy_kernel_8 (long n, float *x, float *y,
"xvmaddasp 38, 58, 33 \n\t" "xvmaddasp 38, 58, 33 \n\t"
"xvmaddasp 39, 59, 33 \n\t" "xvmaddasp 39, 59, 33 \n\t"


"stxvp 48, 0(%4) \n\t"
"stxvp 50, 32(%4) \n\t"
"stxvp 34, 64(%4) \n\t"
"stxvp 38, 96(%4) \n\t"
"stxv 49, 0(%4) \n\t"
"stxv 48, 16(%4) \n\t"
"stxv 51, 32(%4) \n\t"
"stxv 50, 48(%4) \n\t"
"stxv 35, 64(%4) \n\t"
"stxv 34, 80(%4) \n\t"
"stxv 39, 96(%4) \n\t"
"stxv 38, 112(%4) \n\t"


"#n=%1 x=%5=%2 y=%0=%3 alpha=(%7,%8) mvecp=%6=%9 ytmp=%4\n" "#n=%1 x=%5=%2 y=%0=%3 alpha=(%7,%8) mvecp=%6=%9 ytmp=%4\n"
: :


+ 12
- 5
kernel/power/daxpy_power10.c View File

@@ -66,12 +66,19 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
if ( (inc_x == 1) && (inc_y == 1) ) if ( (inc_x == 1) && (inc_y == 1) )
{ {


BLASLONG n1 = n & -16;
if ( n >= 16 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 3) & 0x3;
for (i = 0; i < align; i++) {
y[i] += da * x[i] ;
}
}
BLASLONG n1 = (n-i) & -16;
if ( n1 )
daxpy_kernel_8(n1, &x[i], &y[i], da);

i += n1;


if ( n1 )
daxpy_kernel_8(n1, x, y, da);

i = n1;
while(i < n) while(i < n)
{ {




+ 10
- 4
kernel/power/saxpy_power10.c View File

@@ -64,12 +64,18 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
if ( (inc_x == 1) && (inc_y == 1) ) if ( (inc_x == 1) && (inc_y == 1) )
{ {


BLASLONG n1 = n & -64;

if ( n >= 64 )
{
BLASLONG align = ((32 - ((uintptr_t)y & (uintptr_t)0x1F)) >> 2) & 0x7;
for (i = 0; i < align; i++) {
y[i] += da * x[i] ;
}
}
BLASLONG n1 = (n-i) & -64;
if ( n1 ) if ( n1 )
saxpy_kernel_64(n1, x, y, da);
saxpy_kernel_64(n1, &x[i], &y[i], da);


i = n1;
i += n1;
while(i < n) while(i < n)
{ {




Loading…
Cancel
Save