|
@@ -526,7 +526,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO |
|
|
BLASLONG width, i, j, k; |
|
|
BLASLONG width, i, j, k; |
|
|
BLASLONG n, n_from, n_to; |
|
|
BLASLONG n, n_from, n_to; |
|
|
int mode, mask; |
|
|
int mode, mask; |
|
|
double dnum; |
|
|
|
|
|
|
|
|
double dnum, di, dinum; |
|
|
|
|
|
|
|
|
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { |
|
|
if ((nthreads == 1) || (args -> n < nthreads * SWITCH_RATIO)) { |
|
|
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); |
|
|
SYRK_LOCAL(args, range_m, range_n, sa, sb, 0); |
|
@@ -601,9 +601,14 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO |
|
|
|
|
|
|
|
|
if (nthreads - num_cpu > 1) { |
|
|
if (nthreads - num_cpu > 1) { |
|
|
|
|
|
|
|
|
double di = (double)i; |
|
|
|
|
|
|
|
|
di = (double)i; |
|
|
|
|
|
|
|
|
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1) ); |
|
|
|
|
|
|
|
|
dinum = di * di + dnum; |
|
|
|
|
|
|
|
|
|
|
|
if (dinum > 0) |
|
|
|
|
|
width = (((BLASLONG)((sqrt(dinum) - di) + mask)/(mask+1)) * (mask+1) ); |
|
|
|
|
|
else |
|
|
|
|
|
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1) ); |
|
|
|
|
|
|
|
|
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) ); |
|
|
if (num_cpu == 0) width = n - (((n - width)/(mask+1)) * (mask+1) ); |
|
|
|
|
|
|
|
@@ -643,10 +648,15 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO |
|
|
|
|
|
|
|
|
if (nthreads - num_cpu > 1) { |
|
|
if (nthreads - num_cpu > 1) { |
|
|
|
|
|
|
|
|
double di = (double)i; |
|
|
|
|
|
|
|
|
di = (double)i; |
|
|
|
|
|
|
|
|
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); |
|
|
|
|
|
|
|
|
dinum = di * di +dnum; |
|
|
|
|
|
|
|
|
|
|
|
if (dinum > 0) |
|
|
|
|
|
width = (((BLASLONG)((sqrt(di * di + dnum) - di) + mask)/(mask+1)) * (mask+1)); |
|
|
|
|
|
else |
|
|
|
|
|
width = (((BLASLONG)(- di + mask)/(mask+1)) * (mask+1)); |
|
|
|
|
|
|
|
|
if ((width > n - i) || (width < mask)) width = n - i; |
|
|
if ((width > n - i) || (width < mask)) width = n - i; |
|
|
|
|
|
|
|
|
} else { |
|
|
} else { |
|
|