Browse Source

Add thread throttling profile for SGEMV on `NEOVERSEV1`

tags/v0.3.30
Marek Michalowski 8 months ago
parent
commit
4d5b13f765
2 changed files with 35 additions and 6 deletions
  1. +4
    -1
      CONTRIBUTORS.md
  2. +31
    -5
      interface/gemv.c

+ 4
- 1
CONTRIBUTORS.md View File

@@ -231,4 +231,7 @@ In chronological order:
* [2024-01-24] Optimize GEMV forwarding on ARM64 systems

* Aniket P. Garade <https://github.com/garadeaniket> Sushil Pratap Singh <https://github.com/SushilPratap04> Juliya James <https://github.com/Juliya32>
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE
* [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE

* Marek Michalowski <https://github.com/michalowski-arm>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`

+ 31
- 5
interface/gemv.c View File

@@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT
};
#endif

#ifdef DYNAMIC_ARCH
extern char* gotoblas_corename(void);
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
return
MN < 25600L ? 1
: MN < 63001L ? MIN(ncpu, 4)
: MN < 459684L ? MIN(ncpu, 16)
: ncpu;
}
#endif

static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
}
#endif

if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )
return 1;
else
return num_cpu_avail(2);
}

#ifndef CBLAS

void NAME(char *TRANS, blasint *M, blasint *N,
@@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order,
STACK_ALLOC(buffer_size, FLOAT, buffer);

#ifdef SMP

if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD )
nthreads = 1;
else
nthreads = num_cpu_avail(2);
nthreads = get_gemv_optimal_nthreads(1L * m * n);

if (nthreads == 1) {
#endif


Loading…
Cancel
Save