Browse Source

Merge pull request #1419 from brada4/develop

Initialize unitialized values for repeated calls
tags/v0.3.0
Martin Kroeker GitHub 7 years ago
parent
commit
e388459a27
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
24 changed files with 137 additions and 225 deletions
  1. +1
    -0
      driver/others/init.c
  2. +1
    -1
      driver/others/memory.c
  3. +4
    -2
      kernel/generic/laswp_ncopy_2.c
  4. +7
    -19
      kernel/generic/trmm_ltcopy_2.c
  5. +9
    -28
      kernel/generic/trmm_ltcopy_4.c
  6. +19
    -26
      kernel/generic/trmm_utcopy_16.c
  7. +10
    -19
      kernel/generic/trmm_utcopy_2.c
  8. +10
    -15
      kernel/generic/trmm_utcopy_4.c
  9. +2
    -0
      kernel/generic/trmmkernel_16x2.c
  10. +2
    -0
      kernel/generic/trmmkernel_2x2.c
  11. +2
    -0
      kernel/generic/trmmkernel_8x2.c
  12. +9
    -9
      kernel/generic/zlaswp_ncopy_2.c
  13. +4
    -2
      kernel/generic/zlaswp_ncopy_4.c
  14. +7
    -37
      kernel/generic/ztrmm_ltcopy_2.c
  15. +11
    -20
      kernel/generic/ztrmm_utcopy_1.c
  16. +11
    -22
      kernel/generic/ztrmm_utcopy_2.c
  17. +9
    -12
      kernel/generic/ztrmm_utcopy_8.c
  18. +2
    -0
      kernel/generic/ztrmmkernel_2x2.c
  19. +2
    -0
      kernel/generic/ztrmmkernel_4x4.c
  20. +1
    -1
      kernel/mips/cgemv_n_msa.c
  21. +2
    -2
      kernel/mips/dgemv_n_msa.c
  22. +2
    -2
      kernel/mips/sgemv_n_msa.c
  23. +6
    -6
      kernel/x86_64/dtrmm_kernel_4x8_haswell.c
  24. +4
    -2
      lapack/laswp/generic/laswp_k_4.c

+ 1
- 0
driver/others/init.c View File

@@ -903,6 +903,7 @@ void gotoblas_affinity_init(void) {
}
#else
common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
}
#endif

#endif


+ 1
- 1
driver/others/memory.c View File

@@ -177,7 +177,7 @@ int get_num_procs(void) {
cpu_set_t *cpusetp;
size_t size;
int ret;
int i,n;
// int i,n;

if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
#if !defined(OS_LINUX)


+ 4
- 2
kernel/generic/laswp_ncopy_2.c View File

@@ -116,7 +116,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*b2 = A2;
*b4 = A4;
}
} else
} else {
if (b1 == a2) {
if (b2 == a2) {
*(buffer + 0) = A2;
@@ -139,7 +139,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(buffer + 3) = A4;
*b1 = A1;
*b3 = A3;
} else
} else {
if (b2 == b1) {
*(buffer + 0) = B1;
*(buffer + 1) = B3;
@@ -157,6 +157,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*b3 = A3;
*b4 = A4;
}
}
}
}

buffer += 4;


+ 7
- 19
kernel/generic/trmm_ltcopy_2.c View File

@@ -120,30 +120,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
ao2 += 1; */
b += 2;
} else
#ifdef UNIT
if (X < posY) {
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);

b[ 0] = data01;
b[ 1] = data02;
// ao1 += lda;
b += 2;
} else {
#endif
b[ 0] = *(ao1 + 0);
#ifdef UNIT
data02 = *(ao1 + 1);
} else {

b[ 0] = ONE;
b[ 1] = data02;
#else
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);

b[ 0] = data01;
b[ 1] = data02;
#endif
// ao1 += 2;
b += 2;
}
#endif
b[ 1] = *(ao1 + 1);
b += 2;
}

posY += 2;


+ 9
- 28
kernel/generic/trmm_ltcopy_4.c View File

@@ -410,36 +410,17 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON

i = (m & 1);
if (i) {

if (X > posY) {
/* ao1 += 1;
ao2 += 1; */

b += 2;
} else
if (X < posY) {
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);

b[ 0] = data01;
b[ 1] = data02;
// ao1 += lda;
b += 2;
} else {
#ifdef UNIT
data02 = *(ao1 + 1);

b[ 0] = ONE;
b[ 1] = data02;
#else
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);

b[ 0] = data01;
b[ 1] = data02;
if (X < posY) {
#endif
b += 2;
}
b[ 0] = *(ao1 + 0);
#ifdef UNIT
} else {
b[ 0] = ONE;
}
#endif
b[ 1] = *(ao1 + 1);
b += 2;
}
posY += 2;
}


+ 19
- 26
kernel/generic/trmm_utcopy_16.c View File

@@ -1487,23 +1487,19 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (X < posY) {
a01 ++;
a02 ++;
b += 2;
} else
} else {
#ifdef UNIT
if (X > posY) {
#endif
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
// a01 += lda;
b += 2;
} else {
#ifdef UNIT
} else {
b[ 0] = ONE;
b[ 1] = *(a01 + 1);
#else
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#endif
b += 2;
}
#endif
b[ 1] = *(a01 + 1);
}
b += 2;
}
posY += 2;
}
@@ -1522,25 +1518,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
if (i > 0) {
do {
if (X < posY) {
a01 += 1;
b ++;
} else
a01 ++;
} else {
#ifdef UNIT
if (X > posY) {
#endif
b[ 0] = *(a01 + 0);
a01 += lda;
b ++;
} else {
#ifdef UNIT
} else {
b[ 0] = ONE;
#else
b[ 0] = *(a01 + 0);
#endif
a01 += lda;
b ++;
}

X += 1;
i --;
#endif
a01 += lda;
}
b ++;
X ++;
i --;
} while (i > 0);
}
// posY += 1;


+ 10
- 19
kernel/generic/trmm_utcopy_2.c View File

@@ -161,27 +161,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
i = m;
if (m > 0) {
do {
if (X < posY) {
b += 1;
ao1 += 1;
} else
if (X > posY) {
data01 = *(ao1 + 0);
b[ 0] = data01;
b += 1;
ao1 += lda;
} else {
#ifdef UNIT
b[ 0] = ONE;
#else
data01 = *(ao1 + 0);
b[ 0] = data01;
if (X > posY) {
#endif
b += 1;
ao1 += lda;
}

X += 1;
b[ 0] = *(ao1 + 0);
#ifdef UNIT
} else {
b[ 0] = ONE;
}
#endif
b ++;
ao1 += lda;
X ++;
i --;
} while (i > 0);
}


+ 10
- 15
kernel/generic/trmm_utcopy_4.c View File

@@ -443,26 +443,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
do {

if (X < posY) {
b += 1;
ao1 += 1;
} else
} else {
#ifdef UNIT
if (X > posY) {
data01 = *(ao1 + 0);
b[ 0] = data01;
ao1 += lda;
b += 1;
} else {
#endif
b[ 0] = *(ao1 + 0);
#ifdef UNIT
} else {
b[ 0] = ONE;
#else
data01 = *(ao1 + 0);
b[ 0] = data01;
#endif
ao1 += lda;
b += 1;
}

X += 1;
#endif
ao1 += lda;
}
b ++;
X ++;
i --;
} while (i > 0);
}


+ 2
- 0
kernel/generic/trmmkernel_16x2.c View File

@@ -52,6 +52,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL

#if !defined(LEFT)
off = -offset;
#else
off = 0;
#endif




+ 2
- 0
kernel/generic/trmmkernel_2x2.c View File

@@ -11,6 +11,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
BLASLONG off, temp;
#if defined(TRMMKERNEL) && !defined(LEFT)
off = -offset;
#else
off = 0;
#endif
for (j=0; j<bn/2; j+=1)
{


+ 2
- 0
kernel/generic/trmmkernel_8x2.c View File

@@ -34,6 +34,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL

#if !defined(LEFT)
off = -offset;
#else
off = 0;
#endif




+ 9
- 9
kernel/generic/zlaswp_ncopy_2.c View File

@@ -204,20 +204,20 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
}
}

buffer += 8;
buffer += 8;

b1 = a + ip1;
b2 = a + ip2;
b1 = a + ip1;
b2 = a + ip2;

b3 = b1 + lda;
b4 = b2 + lda;
b3 = b1 + lda;
b4 = b2 + lda;

a1 += 4;
a3 += 4;
a1 += 4;
a3 += 4;

i --;
i --;
} while (i > 0);
}



+ 4
- 2
kernel/generic/zlaswp_ncopy_4.c View File

@@ -462,7 +462,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
} else
} else {
if (b1 == a2) {
if (b2 == a2) {
*(buffer + 0) = A3;
@@ -503,7 +503,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b1 + 1) = A2;
*(b3 + 0) = A5;
*(b3 + 1) = A6;
} else
} else {
if (b2 == b1) {
*(buffer + 0) = B1;
*(buffer + 1) = B2;
@@ -536,6 +536,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT *a, BLASLONG lda, blasint
*(b4 + 0) = A7;
*(b4 + 1) = A8;
}
}
}
}

buffer += 8;


+ 7
- 37
kernel/generic/ztrmm_ltcopy_2.c View File

@@ -139,48 +139,18 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
}

if (m & 1) {

if (X > posY) {
/* ao1 += 2;
ao2 += 2; */
b += 4;

} else
#ifdef UNIT
if (X < posY) {
data1 = *(ao1 + 0);
data2 = *(ao1 + 1);
data3 = *(ao1 + 2);
data4 = *(ao1 + 3);

b[ 0] = data1;
b[ 1] = data2;
b[ 2] = data3;
b[ 3] = data4;

// ao1 += lda;
b += 4;
} else {
#endif
b[ 0] = *(ao1 + 0);
b[ 1] = *(ao1 + 1);
#ifdef UNIT
data3 = *(ao1 + 2);
data4 = *(ao1 + 3);

} else {
b[ 0] = ONE;
b[ 1] = ZERO;
b[ 2] = data3;
b[ 3] = data4;
#else
data1 = *(ao1 + 0);
data2 = *(ao1 + 1);
data3 = *(ao1 + 2);
data4 = *(ao1 + 3);

b[ 0] = data1;
b[ 1] = data2;
b[ 2] = data3;
b[ 3] = data4;
#endif
b += 4;
}
#endif
b += 4;
}

posY += 2;


+ 11
- 20
kernel/generic/ztrmm_utcopy_1.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
BLASLONG i, js;
BLASLONG X;

FLOAT data01, data02;
// FLOAT data01, data02;
FLOAT *ao1;

lda += lda;
@@ -65,30 +65,21 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
do {
if (X < posY) {
ao1 += 2;
b += 2;
} else
} else {
#ifdef UNIT
if (X > posY) {
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);
b[ 0] = data01;
b[ 1] = data02;
ao1 += lda;
b += 2;

} else {
#endif
b[ 0] = *(ao1 + 0);
b[ 1] = *(ao1 + 1);
#ifdef UNIT
} else {
b[ 0] = ONE;
b[ 1] = ZERO;
#else
data01 = *(ao1 + 0);
data02 = *(ao1 + 1);
b[ 0] = data01;
b[ 1] = data02;
#endif
ao1 += lda;
b += 2;
}

#endif
ao1 += lda;
}
b += 2;
X ++;
i --;
} while (i > 0);


+ 11
- 22
kernel/generic/ztrmm_utcopy_2.c View File

@@ -203,33 +203,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
do {
if (X < posY) {
ao1 += 2;
b += 2;
} else
} else {
#ifdef UNIT
if (X > posY) {
data1 = *(ao1 + 0);
data2 = *(ao1 + 1);

b[ 0] = data1;
b[ 1] = data2;

ao1 += lda;
b += 2;
} else {
#endif
b[ 0] = *(ao1 + 0);
b[ 1] = *(ao1 + 1);
#ifdef UNIT
} else {
b[ 0] = ONE;
b[ 1] = ZERO;
#else
data1 = *(ao1 + 0);
data2 = *(ao1 + 1);

b[ 0] = data1;
b[ 1] = data2;
#endif
ao1 += lda;
b += 2;
}

X += 1;
#endif
ao1 += lda;
}
b += 2;
X ++;
i --;
} while (i > 0);
}


+ 9
- 12
kernel/generic/ztrmm_utcopy_8.c View File

@@ -856,25 +856,22 @@ int CNAME(BLASLONG m, BLASLONG n, FLOAT *a, BLASLONG lda, BLASLONG posX, BLASLON
do {
if (X < posY) {
a01 += 2;
b += 2;
} else
} else {
#ifdef UNIT
if (X > posY) {
#endif
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
a01 += lda;
b += 2;
} else {
#ifdef UNIT
} else {
b[ 0] = ONE;
b[ 1] = ZERO;
#else
b[ 0] = *(a01 + 0);
b[ 1] = *(a01 + 1);
#endif
a01 += lda;
b += 2;
}
X += 1;
#endif
a01 += lda;
}
b += 2;
X ++;
i --;
} while (i > 0);
}


+ 2
- 0
kernel/generic/ztrmmkernel_2x2.c View File

@@ -15,6 +15,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alphar,FLOAT alphai,FLOAT* b

#if defined(TRMMKERNEL) && !defined(LEFT)
off = -offset;
#else
off = 0;
#endif
for (j=0; j<bn/2; j+=1)
{


+ 2
- 0
kernel/generic/ztrmmkernel_4x4.c View File

@@ -53,6 +53,8 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha_r, FLOAT alpha_i,FLOAT

#if defined(TRMMKERNEL) && !defined(LEFT)
off = -offset;
#else
off = 0;
#endif

for (j=0; j<bn/4; j+=1) // do blocks of the Mx4 loops


+ 1
- 1
kernel/mips/cgemv_n_msa.c View File

@@ -511,7 +511,7 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha_r, FLOAT alpha_i,
BLASLONG i, j, k, k_pref, pref_offset;
FLOAT *y_org = y;
FLOAT *pa0, *pa1, *pa2, *pa3;
FLOAT temp_r, temp_i, res0, res1, temp0_r;
FLOAT temp_r = 0.0, temp_i = 0.0, res0, res1, temp0_r;
FLOAT temp0_i, temp1_r, temp1_i, temp2_r, temp2_i, temp3_r, temp3_i;
v4f32 alphar, alphai;
v4f32 x0, x1, y0, y1, y2, y3, x0r, x0i, y0r, y1r, y0i, y1i;


+ 2
- 2
kernel/mips/dgemv_n_msa.c View File

@@ -484,10 +484,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *A,
FLOAT *pa0, *pa1, *pa2, *pa3, *pa4, *pa5, *pa6, *pa7;
FLOAT temp, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
v2f64 v_alpha;
v2f64 x0, x1, x2, x3, y0, y1, y2, y3;
v2f64 x0, x1, x2, x3, y0 = 0.0, y1 = 0.0, y2 = 0.0, y3 = 0.0;
v2f64 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
v2f64 t16, t17, t18, t19, t20, t21, t22, t23, t24, t25, t26, t27, t28, t29;
v2f64 t30, t31, tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7;
v2f64 t30, t31, tp0 = 0.0, tp1 = 0.0, tp2 = 0.0, tp3 = 0.0, tp4 = 0.0, tp5 = 0.0, tp6 = 0.0, tp7 = 0.0;

v_alpha = COPY_DOUBLE_TO_VECTOR(alpha);



+ 2
- 2
kernel/mips/sgemv_n_msa.c View File

@@ -423,9 +423,9 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1, FLOAT alpha, FLOAT *A,
FLOAT *y_org = y;
FLOAT *pa0, *pa1, *pa2, *pa3, *pa4, *pa5, *pa6, *pa7;
FLOAT temp, temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
v4f32 v_alpha, x0, x1, y0, y1;
v4f32 v_alpha, x0, x1, y0 = 0.0, y1 = 0.0;
v4f32 t0, t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12, t13, t14, t15;
v4f32 tp0, tp1, tp2, tp3, tp4, tp5, tp6, tp7;
v4f32 tp0 = 0.0, tp1 = 0.0, tp2 = 0.0, tp3 = 0.0, tp4 = 0.0, tp5 = 0.0, tp6 = 0.0, tp7 = 0.0;

v_alpha = COPY_FLOAT_TO_VECTOR(alpha);



+ 6
- 6
kernel/x86_64/dtrmm_kernel_4x8_haswell.c View File

@@ -777,9 +777,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
res3_2 = 0;
res3_3 = 0;

temp = backwards ? bk-off :
left ? off + 4 : // number of values in A
off + 4; // number of values in B
temp = backwards ? bk-off : off + 4;
/* left ? off + 4 : // number of values in A
off + 4; // number of values in B */

for (k=0; k<temp; k++)
{
@@ -857,9 +857,9 @@ int CNAME(BLASLONG bm,BLASLONG bn,BLASLONG bk,FLOAT alpha,FLOAT* ba,FLOAT* bb,FL
C3[3] = res3_3;

if (!backwards) {
temp = bk-off;
temp = left ? temp - 4 : // number of values in A
temp - 4; // number of values in B
temp = bk-off - 4;
/* temp = left ? temp - 4 : // number of values in A
temp - 4; // number of values in B */

ptrba += temp*4; // number of values in A
ptrbb += temp*4; // number of values in B


+ 4
- 2
lapack/laswp/generic/laswp_k_4.c View File

@@ -174,7 +174,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*a8 = B8;
*b8 = A8;
}
} else
} else {
if (b1 == a2) {
if (b2 != a1) {
if (b2 == a2) {
@@ -225,7 +225,7 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b5 = A5;
*a7 = B7;
*b7 = A7;
} else
} else {
if (b2 == b1) {
*a1 = B1;
*a2 = A1;
@@ -257,6 +257,8 @@ int CNAME(BLASLONG n, BLASLONG k1, BLASLONG k2, FLOAT dummy1, FLOAT *a, BLASLONG
*b7 = A7;
*b8 = A8;
}
}
}
}

b1 = a + ip1;


Loading…
Cancel
Save