Browse Source

prepared lapack/lauum for UNROLL values, that are not a power of two

tags/v0.2.20^2
Werner Saar 8 years ago
parent
commit
209b63197e
2 changed files with 2 additions and 2 deletions
  1. +1
    -1
      lapack/lauum/lauum_L_parallel.c
  2. +1
    -1
      lapack/lauum/lauum_U_parallel.c

+ 1
- 1
lapack/lauum/lauum_L_parallel.c View File

@@ -88,7 +88,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.beta = NULL;
newarg.nthreads = args -> nthreads;

blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
blocking = ((n / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (blocking > GEMM_Q) blocking = GEMM_Q;

for (i = 0; i < n; i += blocking) {


+ 1
- 1
lapack/lauum/lauum_U_parallel.c View File

@@ -88,7 +88,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa,
newarg.beta = NULL;
newarg.nthreads = args -> nthreads;

blocking = (n / 2 + GEMM_UNROLL_N - 1) & ~(GEMM_UNROLL_N - 1);
blocking = ((n / 2 + GEMM_UNROLL_N - 1)/GEMM_UNROLL_N) * GEMM_UNROLL_N;
if (blocking > GEMM_Q) blocking = GEMM_Q;

for (i = 0; i < n; i += blocking) {


Loading…
Cancel
Save