Browse Source

Merge pull request #1473 from martin-frbg/p2align

Replace .align with .p2aligns in dscal.c and the Nehalem microkernels as well
tags/v0.3.0
Martin Kroeker GitHub 7 years ago
parent
commit
719b68f077
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 16 additions and 16 deletions
  1. +1
    -1
      kernel/x86_64/daxpy_microk_nehalem-2.c
  2. +1
    -1
      kernel/x86_64/ddot_microk_nehalem-2.c
  3. +1
    -1
      kernel/x86_64/dgemv_n_microk_nehalem-4.c
  4. +1
    -1
      kernel/x86_64/dscal.c
  5. +1
    -1
      kernel/x86_64/dsymv_L_microk_nehalem-2.c
  6. +1
    -1
      kernel/x86_64/dsymv_U_microk_nehalem-2.c
  7. +1
    -1
      kernel/x86_64/saxpy_microk_nehalem-2.c
  8. +1
    -1
      kernel/x86_64/sdot_microk_nehalem-2.c
  9. +5
    -5
      kernel/x86_64/sgemv_n_microk_nehalem-4.c
  10. +1
    -1
      kernel/x86_64/sgemv_t_microk_nehalem-4.c
  11. +1
    -1
      kernel/x86_64/ssymv_L_microk_nehalem-2.c
  12. +1
    -1
      kernel/x86_64/ssymv_U_microk_nehalem-2.c

+ 1
- 1
kernel/x86_64/daxpy_microk_nehalem-2.c View File

@@ -39,7 +39,7 @@ static void daxpy_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"movsd (%4), %%xmm0 \n\t" // alpha "movsd (%4), %%xmm0 \n\t" // alpha
"shufpd $0, %%xmm0, %%xmm0 \n\t" "shufpd $0, %%xmm0, %%xmm0 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
// "prefetcht0 192(%2,%0,8) \n\t" // "prefetcht0 192(%2,%0,8) \n\t"
// "prefetcht0 192(%3,%0,8) \n\t" // "prefetcht0 192(%3,%0,8) \n\t"


+ 1
- 1
kernel/x86_64/ddot_microk_nehalem-2.c View File

@@ -41,7 +41,7 @@ static void ddot_kernel_8( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"xorpd %%xmm6, %%xmm6 \n\t" "xorpd %%xmm6, %%xmm6 \n\t"
"xorpd %%xmm7, %%xmm7 \n\t" "xorpd %%xmm7, %%xmm7 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"


"movups (%2,%0,8), %%xmm12 \n\t" // 2 * x "movups (%2,%0,8), %%xmm12 \n\t" // 2 * x


+ 1
- 1
kernel/x86_64/dgemv_n_microk_nehalem-4.c View File

@@ -62,7 +62,7 @@ static void dgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"subq $4 , %1 \n\t" "subq $4 , %1 \n\t"
"jz 2f \n\t" "jz 2f \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"


"xorpd %%xmm4 , %%xmm4 \n\t" "xorpd %%xmm4 , %%xmm4 \n\t"


+ 1
- 1
kernel/x86_64/dscal.c View File

@@ -99,7 +99,7 @@ static void dscal_kernel_inc_8(BLASLONG n, FLOAT *alpha, FLOAT *x, BLASLONG inc_


"leaq (%1,%4,4), %2 \n\t" "leaq (%1,%4,4), %2 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"


"1: \n\t" "1: \n\t"
"movsd (%1) , %%xmm4 \n\t" "movsd (%1) , %%xmm4 \n\t"


+ 1
- 1
kernel/x86_64/dsymv_L_microk_nehalem-2.c View File

@@ -47,7 +47,7 @@ static void dsymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, FL
"shufpd $0, %%xmm6, %%xmm6 \n\t" "shufpd $0, %%xmm6, %%xmm6 \n\t"
"shufpd $0, %%xmm7, %%xmm7 \n\t" "shufpd $0, %%xmm7, %%xmm7 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movups (%4,%0,8), %%xmm12 \n\t" // 2 * a "movups (%4,%0,8), %%xmm12 \n\t" // 2 * a
"movups (%2,%0,8), %%xmm8 \n\t" // 2 * x "movups (%2,%0,8), %%xmm8 \n\t" // 2 * x


+ 1
- 1
kernel/x86_64/dsymv_U_microk_nehalem-2.c View File

@@ -50,7 +50,7 @@ static void dsymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT


"xorq %0,%0 \n\t" "xorq %0,%0 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movups (%4,%0,8), %%xmm12 \n\t" // 2 * a "movups (%4,%0,8), %%xmm12 \n\t" // 2 * a
"movups (%2,%0,8), %%xmm8 \n\t" // 2 * x "movups (%2,%0,8), %%xmm8 \n\t" // 2 * x


+ 1
- 1
kernel/x86_64/saxpy_microk_nehalem-2.c View File

@@ -39,7 +39,7 @@ static void saxpy_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha)
"movss (%4), %%xmm0 \n\t" // alpha "movss (%4), %%xmm0 \n\t" // alpha
"shufps $0, %%xmm0, %%xmm0 \n\t" "shufps $0, %%xmm0, %%xmm0 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
// "prefetcht0 192(%2,%0,4) \n\t" // "prefetcht0 192(%2,%0,4) \n\t"
// "prefetcht0 192(%3,%0,4) \n\t" // "prefetcht0 192(%3,%0,4) \n\t"


+ 1
- 1
kernel/x86_64/sdot_microk_nehalem-2.c View File

@@ -41,7 +41,7 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"xorps %%xmm6, %%xmm6 \n\t" "xorps %%xmm6, %%xmm6 \n\t"
"xorps %%xmm7, %%xmm7 \n\t" "xorps %%xmm7, %%xmm7 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movups (%2,%0,4), %%xmm12 \n\t" // 4 * x "movups (%2,%0,4), %%xmm12 \n\t" // 4 * x
"movups (%3,%0,4), %%xmm8 \n\t" // 4 * x "movups (%3,%0,4), %%xmm8 \n\t" // 4 * x


+ 5
- 5
kernel/x86_64/sgemv_n_microk_nehalem-4.c View File

@@ -59,18 +59,18 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"shufps $0, %%xmm6 , %%xmm6 \n\t" "shufps $0, %%xmm6 , %%xmm6 \n\t"




".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"xorps %%xmm4 , %%xmm4 \n\t" "xorps %%xmm4 , %%xmm4 \n\t"
"xorps %%xmm5 , %%xmm5 \n\t" "xorps %%xmm5 , %%xmm5 \n\t"
"movups (%3,%0,4), %%xmm7 \n\t" // 4 * y "movups (%3,%0,4), %%xmm7 \n\t" // 4 * y


".align 2 \n\t"
".p2align 1 \n\t"
"movups (%4,%0,4), %%xmm8 \n\t" "movups (%4,%0,4), %%xmm8 \n\t"
"movups (%5,%0,4), %%xmm9 \n\t" "movups (%5,%0,4), %%xmm9 \n\t"
"movups (%6,%0,4), %%xmm10 \n\t" "movups (%6,%0,4), %%xmm10 \n\t"
"movups (%7,%0,4), %%xmm11 \n\t" "movups (%7,%0,4), %%xmm11 \n\t"
".align 2 \n\t"
".p2align 1 \n\t"
"mulps %%xmm12, %%xmm8 \n\t" "mulps %%xmm12, %%xmm8 \n\t"
"mulps %%xmm13, %%xmm9 \n\t" "mulps %%xmm13, %%xmm9 \n\t"
"mulps %%xmm14, %%xmm10 \n\t" "mulps %%xmm14, %%xmm10 \n\t"
@@ -84,7 +84,7 @@ static void sgemv_kernel_4x8( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, BLASLO
"movups (%5,%8,4), %%xmm9 \n\t" "movups (%5,%8,4), %%xmm9 \n\t"
"movups (%6,%8,4), %%xmm10 \n\t" "movups (%6,%8,4), %%xmm10 \n\t"
"movups (%7,%8,4), %%xmm11 \n\t" "movups (%7,%8,4), %%xmm11 \n\t"
".align 2 \n\t"
".p2align 1 \n\t"
"mulps %%xmm0 , %%xmm8 \n\t" "mulps %%xmm0 , %%xmm8 \n\t"
"mulps %%xmm1 , %%xmm9 \n\t" "mulps %%xmm1 , %%xmm9 \n\t"
"mulps %%xmm2 , %%xmm10 \n\t" "mulps %%xmm2 , %%xmm10 \n\t"
@@ -154,7 +154,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y, FLOAT
"movss (%8), %%xmm6 \n\t" // alpha "movss (%8), %%xmm6 \n\t" // alpha
"shufps $0, %%xmm6 , %%xmm6 \n\t" "shufps $0, %%xmm6 , %%xmm6 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"xorps %%xmm4 , %%xmm4 \n\t" "xorps %%xmm4 , %%xmm4 \n\t"
"movups (%3,%0,4), %%xmm7 \n\t" // 4 * y "movups (%3,%0,4), %%xmm7 \n\t" // 4 * y


+ 1
- 1
kernel/x86_64/sgemv_t_microk_nehalem-4.c View File

@@ -40,7 +40,7 @@ static void sgemv_kernel_4x4( BLASLONG n, FLOAT **ap, FLOAT *x, FLOAT *y)
"xorps %%xmm6 , %%xmm6 \n\t" "xorps %%xmm6 , %%xmm6 \n\t"
"xorps %%xmm7 , %%xmm7 \n\t" "xorps %%xmm7 , %%xmm7 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"


"movups (%2,%0,4), %%xmm12 \n\t" // 4 * x "movups (%2,%0,4), %%xmm12 \n\t" // 4 * x


+ 1
- 1
kernel/x86_64/ssymv_L_microk_nehalem-2.c View File

@@ -47,7 +47,7 @@ static void ssymv_kernel_4x4(BLASLONG from, BLASLONG to, FLOAT **a, FLOAT *x, F
"shufps $0, %%xmm6, %%xmm6 \n\t" "shufps $0, %%xmm6, %%xmm6 \n\t"
"shufps $0, %%xmm7, %%xmm7 \n\t" "shufps $0, %%xmm7, %%xmm7 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movups (%2,%0,4), %%xmm8 \n\t" // 4 * x "movups (%2,%0,4), %%xmm8 \n\t" // 4 * x
"movups (%3,%0,4), %%xmm9 \n\t" // 4 * y "movups (%3,%0,4), %%xmm9 \n\t" // 4 * y


+ 1
- 1
kernel/x86_64/ssymv_U_microk_nehalem-2.c View File

@@ -50,7 +50,7 @@ static void ssymv_kernel_4x4(BLASLONG n, FLOAT *a0, FLOAT *a1, FLOAT *a2, FLOAT


"xorq %0,%0 \n\t" "xorq %0,%0 \n\t"


".align 16 \n\t"
".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movups (%2,%0,4), %%xmm8 \n\t" // 4 * x "movups (%2,%0,4), %%xmm8 \n\t" // 4 * x
"movups (%3,%0,4), %%xmm9 \n\t" // 4 * y "movups (%3,%0,4), %%xmm9 \n\t" // 4 * y


Loading…
Cancel
Save