Browse Source

Fix declaration of input arguments in the x86_64 microkernels for DOT and AXPY (#1965)

* Tag operands 0 and 1 as both input and output

For #1964 (basically a continuation of coding problems first seen in #1292)
tags/v0.3.6^2
Martin Kroeker GitHub 6 years ago
parent
commit
d5e6940253
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 202 additions and 202 deletions
  1. +7
    -7
      kernel/x86_64/caxpy_microk_bulldozer-2.c
  2. +3
    -3
      kernel/x86_64/caxpy_microk_haswell-2.c
  3. +4
    -4
      kernel/x86_64/caxpy_microk_sandy-2.c
  4. +7
    -7
      kernel/x86_64/caxpy_microk_steamroller-2.c
  5. +7
    -7
      kernel/x86_64/cdot_microk_bulldozer-2.c
  6. +3
    -3
      kernel/x86_64/cdot_microk_haswell-2.c
  7. +4
    -4
      kernel/x86_64/cdot_microk_sandy-2.c
  8. +7
    -7
      kernel/x86_64/cdot_microk_steamroller-2.c
  9. +3
    -3
      kernel/x86_64/daxpy_microk_bulldozer-2.c
  10. +4
    -4
      kernel/x86_64/daxpy_microk_haswell-2.c
  11. +3
    -3
      kernel/x86_64/daxpy_microk_nehalem-2.c
  12. +8
    -8
      kernel/x86_64/daxpy_microk_piledriver-2.c
  13. +4
    -4
      kernel/x86_64/daxpy_microk_sandy-2.c
  14. +8
    -8
      kernel/x86_64/daxpy_microk_steamroller-2.c
  15. +4
    -4
      kernel/x86_64/ddot_microk_bulldozer-2.c
  16. +3
    -3
      kernel/x86_64/ddot_microk_haswell-2.c
  17. +4
    -4
      kernel/x86_64/ddot_microk_nehalem-2.c
  18. +8
    -8
      kernel/x86_64/ddot_microk_piledriver-2.c
  19. +4
    -4
      kernel/x86_64/ddot_microk_sandy-2.c
  20. +4
    -4
      kernel/x86_64/ddot_microk_steamroller-2.c
  21. +4
    -4
      kernel/x86_64/saxpy_microk_haswell-2.c
  22. +3
    -3
      kernel/x86_64/saxpy_microk_nehalem-2.c
  23. +8
    -8
      kernel/x86_64/saxpy_microk_piledriver-2.c
  24. +4
    -4
      kernel/x86_64/saxpy_microk_sandy-2.c
  25. +4
    -4
      kernel/x86_64/sdot_microk_bulldozer-2.c
  26. +4
    -4
      kernel/x86_64/sdot_microk_haswell-2.c
  27. +4
    -4
      kernel/x86_64/sdot_microk_nehalem-2.c
  28. +4
    -4
      kernel/x86_64/sdot_microk_sandy-2.c
  29. +8
    -8
      kernel/x86_64/sdot_microk_steamroller-2.c
  30. +8
    -8
      kernel/x86_64/zaxpy_microk_bulldozer-2.c
  31. +4
    -4
      kernel/x86_64/zaxpy_microk_haswell-2.c
  32. +8
    -8
      kernel/x86_64/zaxpy_microk_sandy-2.c
  33. +8
    -8
      kernel/x86_64/zaxpy_microk_steamroller-2.c
  34. +8
    -8
      kernel/x86_64/zdot_microk_bulldozer-2.c
  35. +8
    -8
      kernel/x86_64/zdot_microk_haswell-2.c
  36. +8
    -8
      kernel/x86_64/zdot_microk_sandy-2.c
  37. +8
    -8
      kernel/x86_64/zdot_microk_steamroller-2.c

+ 7
- 7
kernel/x86_64/caxpy_microk_bulldozer-2.c View File

@@ -114,9 +114,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4
@@ -180,10 +180,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 3
- 3
kernel/x86_64/caxpy_microk_haswell-2.c View File

@@ -112,9 +112,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 4
- 4
kernel/x86_64/caxpy_microk_sandy-2.c View File

@@ -95,10 +95,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 7
- 7
kernel/x86_64/caxpy_microk_steamroller-2.c View File

@@ -113,10 +113,10 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4
@@ -181,9 +181,9 @@ static void caxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 7
- 7
kernel/x86_64/cdot_microk_bulldozer-2.c View File

@@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 3
- 3
kernel/x86_64/cdot_microk_haswell-2.c View File

@@ -98,9 +98,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/cdot_microk_sandy-2.c View File

@@ -105,10 +105,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 7
- 7
kernel/x86_64/cdot_microk_steamroller-2.c View File

@@ -97,9 +97,9 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -175,10 +175,10 @@ static void cdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 3
- 3
kernel/x86_64/daxpy_microk_bulldozer-2.c View File

@@ -64,9 +64,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 4
- 4
kernel/x86_64/daxpy_microk_haswell-2.c View File

@@ -59,10 +59,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 3
- 3
kernel/x86_64/daxpy_microk_nehalem-2.c View File

@@ -73,9 +73,9 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 8
- 8
kernel/x86_64/daxpy_microk_piledriver-2.c View File

@@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $16, %1 \n\t"
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4
@@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $16, %1 \n\t"
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 4
- 4
kernel/x86_64/daxpy_microk_sandy-2.c View File

@@ -99,10 +99,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)

"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 8
- 8
kernel/x86_64/daxpy_microk_steamroller-2.c View File

@@ -78,10 +78,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $16, %1 \n\t"
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4
@@ -140,10 +140,10 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"subq $16, %1 \n\t"
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 4
- 4
kernel/x86_64/ddot_microk_bulldozer-2.c View File

@@ -65,10 +65,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"vmovsd %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 3
- 3
kernel/x86_64/ddot_microk_haswell-2.c View File

@@ -77,9 +77,9 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/ddot_microk_nehalem-2.c View File

@@ -75,10 +75,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"movsd %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/ddot_microk_piledriver-2.c View File

@@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovsd %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -145,10 +145,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovsd %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/ddot_microk_sandy-2.c View File

@@ -81,10 +81,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovsd %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/ddot_microk_steamroller-2.c View File

@@ -78,10 +78,10 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovsd %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/saxpy_microk_haswell-2.c View File

@@ -59,10 +59,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 3
- 3
kernel/x86_64/saxpy_microk_nehalem-2.c View File

@@ -73,9 +73,9 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 8
- 8
kernel/x86_64/saxpy_microk_piledriver-2.c View File

@@ -78,10 +78,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4
@@ -139,10 +139,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 4
- 4
kernel/x86_64/saxpy_microk_sandy-2.c View File

@@ -99,10 +99,10 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)

"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha) // 4


+ 4
- 4
kernel/x86_64/sdot_microk_bulldozer-2.c View File

@@ -66,10 +66,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"vmovss %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/sdot_microk_haswell-2.c View File

@@ -79,10 +79,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovss %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/sdot_microk_nehalem-2.c View File

@@ -75,10 +75,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"movss %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 4
- 4
kernel/x86_64/sdot_microk_sandy-2.c View File

@@ -82,10 +82,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovss %%xmm4, (%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/sdot_microk_steamroller-2.c View File

@@ -80,10 +80,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"vmovss %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -143,10 +143,10 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)

"vmovss %%xmm4, (%4) \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/zaxpy_microk_bulldozer-2.c View File

@@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4
@@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 4
- 4
kernel/x86_64/zaxpy_microk_haswell-2.c View File

@@ -111,10 +111,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 8
- 8
kernel/x86_64/zaxpy_microk_sandy-2.c View File

@@ -99,10 +99,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4
@@ -176,10 +176,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 8
- 8
kernel/x86_64/zaxpy_microk_steamroller-2.c View File

@@ -113,10 +113,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4
@@ -180,10 +180,10 @@ static void zaxpy_kernel_4( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"jnz 1b \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (alpha), // 4


+ 8
- 8
kernel/x86_64/zdot_microk_bulldozer-2.c View File

@@ -96,10 +96,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -175,10 +175,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/zdot_microk_haswell-2.c View File

@@ -101,10 +101,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -186,10 +186,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/zdot_microk_sandy-2.c View File

@@ -107,10 +107,10 @@ if ( n < 1280 )
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -199,10 +199,10 @@ if ( n < 1280 )
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


+ 8
- 8
kernel/x86_64/zdot_microk_steamroller-2.c View File

@@ -95,10 +95,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4
@@ -172,10 +172,10 @@ static void zdot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vmovups %%xmm4, 16(%4) \n\t"
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
:
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (dot) // 4


Loading…
Cancel
Save