Browse Source

Tag %1 and %2 as both input and output

The inline assembly modifies its input operands, so mark them as output to avoid surprises with optimization. Fixes #1292
tags/v0.3.0
Martin Kroeker GitHub 7 years ago
parent
commit
723f396a20
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 61 additions and 61 deletions
  1. +12
    -12
      kernel/x86_64/cgemv_n_microk_haswell-4.c
  2. +9
    -9
      kernel/x86_64/cgemv_t_microk_haswell-4.c
  3. +6
    -6
      kernel/x86_64/dgemv_n_microk_haswell-4.c
  4. +3
    -3
      kernel/x86_64/dgemv_t_microk_haswell-4.c
  5. +7
    -7
      kernel/x86_64/sgemv_n_microk_haswell-4.c
  6. +3
    -3
      kernel/x86_64/sgemv_t_microk_haswell-4.c
  7. +12
    -12
      kernel/x86_64/zgemv_n_microk_haswell-4.c
  8. +9
    -9
      kernel/x86_64/zgemv_t_microk_haswell-4.c

+ 12
- 12
kernel/x86_64/cgemv_n_microk_haswell-4.c View File

@@ -159,9 +159,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -283,9 +283,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -390,9 +390,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4
@@ -520,9 +520,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n1), // 1
"+r" (i), // 0
"+r" (n1) // 1
:
"r" (src), // 2
"r" (dest), // 3
"r" (&alpha_r), // 4


+ 9
- 9
kernel/x86_64/cgemv_t_microk_haswell-4.c View File

@@ -230,9 +230,9 @@ static void cgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -391,9 +391,9 @@ static void cgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -519,9 +519,9 @@ static void cgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4


+ 6
- 6
kernel/x86_64/dgemv_n_microk_haswell-4.c View File

@@ -93,9 +93,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -172,9 +172,9 @@ static void dgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT


:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4


+ 3
- 3
kernel/x86_64/dgemv_t_microk_haswell-4.c View File

@@ -107,9 +107,9 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4


+ 7
- 7
kernel/x86_64/sgemv_n_microk_haswell-4.c View File

@@ -153,10 +153,10 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"r" (x), // 2
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
"r" (ap[1]), // 5
@@ -276,9 +276,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4


+ 3
- 3
kernel/x86_64/sgemv_t_microk_haswell-4.c View File

@@ -128,9 +128,9 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4


+ 12
- 12
kernel/x86_64/zgemv_n_microk_haswell-4.c View File

@@ -115,9 +115,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -203,9 +203,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -277,9 +277,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y)
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap) // 4
@@ -379,9 +379,9 @@ static void add_y(BLASLONG n, FLOAT *src, FLOAT *dest, BLASLONG inc_dest,FLOAT a
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (src), // 2
"r" (dest), // 3
"r" (&alpha_r), // 4


+ 9
- 9
kernel/x86_64/zgemv_t_microk_haswell-4.c View File

@@ -181,9 +181,9 @@ static void zgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -308,9 +308,9 @@ static void zgemv_kernel_4x2( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap[0]), // 4
@@ -407,9 +407,9 @@ static void zgemv_kernel_4x1( BLASLONG n, FLOAT *ap, FLOAT *x, FLOAT *y, FLOAT *
"vzeroupper \n\t"

:
:
"r" (i), // 0
"r" (n), // 1
"+r" (i), // 0
"+r" (n) // 1
:
"r" (x), // 2
"r" (y), // 3
"r" (ap), // 4


Loading…
Cancel
Save