Browse Source

Merge pull request #5127 from Harishmcw/gesv-threshold

Refined GESV Parallelization Logic for Windows on ARM64
tags/v0.3.30
Martin Kroeker GitHub 7 months ago
parent
commit
a64b75a2e0
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
1 changed files with 19 additions and 7 deletions
  1. +19
    -7
      interface/lapack/gesv.c

+ 19
- 7
interface/lapack/gesv.c View File

@@ -107,21 +107,33 @@ int NAME(blasint *N, blasint *NRHS, FLOAT *a, blasint *ldA, blasint *ipiv,

#ifndef PPC440
buffer = (FLOAT *)blas_memory_alloc(1);
sa = (FLOAT *)((BLASLONG)buffer + GEMM_OFFSET_A);
sb = (FLOAT *)(((BLASLONG)sa + ((GEMM_P * GEMM_Q * COMPSIZE * SIZE + GEMM_ALIGN) & ~GEMM_ALIGN)) + GEMM_OFFSET_B);
#endif

#ifdef SMP
args.common = NULL;
#ifndef DOUBLE
if (args.m*args.n < 40000)

#if defined(_WIN64) && defined(_M_ARM64)
#ifdef COMPLEX
if (args.m * args.n > 600)
#else
if (args.m * args.n > 1000)
#endif
args.nthreads = num_cpu_avail(4);
else
args.nthreads = 1;
#else
if (args.m*args.n < 10000)
#ifndef DOUBLE
if (args.m * args.n < 40000)
#else
if (args.m * args.n < 10000)
#endif
args.nthreads = 1;
else
args.nthreads = num_cpu_avail(4);
#endif
args.nthreads=1;
else
args.nthreads = num_cpu_avail(4);

if (args.nthreads == 1) {
#endif


Loading…
Cancel
Save