Browse Source

Optimise casum for POWER9

Use lxvd2x instruction instead of lxvw4x.
lxvd2x performs far better on the new POWER architecture than lxvw4x.
tags/v0.2.20^2
Matt Brown 8 years ago
parent
commit
19bdf9d52b
1 changed files with 16 additions and 16 deletions
  1. +16
    -16
      kernel/power/casum_microk_power8.c

+ 16
- 16
kernel/power/casum_microk_power8.c View File

@@ -56,14 +56,14 @@ static float casum_kernel_16 (long n, float *x)
"xxlxor 38, 38, 38 \n\t"
"xxlxor 39, 39, 39 \n\t"

"lxvw4x 40, 0, %2 \n\t"
"lxvw4x 41, %8, %2 \n\t"
"lxvw4x 42, %9, %2 \n\t"
"lxvw4x 43, %10, %2 \n\t"
"lxvw4x 44, %11, %2 \n\t"
"lxvw4x 45, %12, %2 \n\t"
"lxvw4x 46, %13, %2 \n\t"
"lxvw4x 47, %14, %2 \n\t"
"lxvd2x 40, 0, %2 \n\t"
"lxvd2x 41, %8, %2 \n\t"
"lxvd2x 42, %9, %2 \n\t"
"lxvd2x 43, %10, %2 \n\t"
"lxvd2x 44, %11, %2 \n\t"
"lxvd2x 45, %12, %2 \n\t"
"lxvd2x 46, %13, %2 \n\t"
"lxvd2x 47, %14, %2 \n\t"

"addi %2, %2, 128 \n\t"

@@ -78,26 +78,26 @@ static float casum_kernel_16 (long n, float *x)
"xvabssp 50, 42 \n\t"
"xvabssp 51, 43 \n\t"

"lxvw4x 40, 0, %2 \n\t"
"lxvw4x 41, %8, %2 \n\t"
"lxvd2x 40, 0, %2 \n\t"
"lxvd2x 41, %8, %2 \n\t"

"xvabssp %x3, 44 \n\t"
"xvabssp %x4, 45 \n\t"

"lxvw4x 42, %9, %2 \n\t"
"lxvw4x 43, %10, %2 \n\t"
"lxvd2x 42, %9, %2 \n\t"
"lxvd2x 43, %10, %2 \n\t"

"xvabssp %x5, 46 \n\t"
"xvabssp %x6, 47 \n\t"

"lxvw4x 44, %11, %2 \n\t"
"lxvw4x 45, %12, %2 \n\t"
"lxvd2x 44, %11, %2 \n\t"
"lxvd2x 45, %12, %2 \n\t"

"xvaddsp 32, 32, 48 \n\t"
"xvaddsp 33, 33, 49 \n\t"

"lxvw4x 46, %13, %2 \n\t"
"lxvw4x 47, %14, %2 \n\t"
"lxvd2x 46, %13, %2 \n\t"
"lxvd2x 47, %14, %2 \n\t"

"xvaddsp 34, 34, 50 \n\t"
"xvaddsp 35, 35, 51 \n\t"


Loading…
Cancel
Save