From 4d5b13f765ac4632c2330020ca2946b3de4a7aa0 Mon Sep 17 00:00:00 2001 From: Marek Michalowski Date: Tue, 21 Jan 2025 12:29:58 +0000 Subject: [PATCH] Add thread throttling profile for SGEMV on `NEOVERSEV1` --- CONTRIBUTORS.md | 5 ++++- interface/gemv.c | 36 +++++++++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md index 508dbcd0e..fcc80cc7e 100644 --- a/CONTRIBUTORS.md +++ b/CONTRIBUTORS.md @@ -231,4 +231,7 @@ In chronological order: * [2024-01-24] Optimize GEMV forwarding on ARM64 systems * Aniket P. Garade Sushil Pratap Singh Juliya James - * [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE + * [2024-12-13] Optimized swap and rot Level-1 BLAS routines with ARM SVE + +* Marek Michalowski + * [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1` diff --git a/interface/gemv.c b/interface/gemv.c index 2c121f130..f91f364ee 100644 --- a/interface/gemv.c +++ b/interface/gemv.c @@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT }; #endif +#ifdef DYNAMIC_ARCH + extern char* gotoblas_corename(void); +#endif + +#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) +static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { + return + MN < 25600L ? 1 + : MN < 63001L ? MIN(ncpu, 4) + : MN < 459684L ? MIN(ncpu, 16) + : ncpu; +} +#endif + +static inline int get_gemv_optimal_nthreads(BLASLONG MN) { + int ncpu = num_cpu_avail(3); +#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); +#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) + if (strcmp(gotoblas_corename(), "neoversev1") == 0) { + return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); + } +#endif + + if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) + return 1; + else + return num_cpu_avail(2); +} + #ifndef CBLAS void NAME(char *TRANS, blasint *M, blasint *N, @@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order, STACK_ALLOC(buffer_size, FLOAT, buffer); #ifdef SMP - - if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD ) - nthreads = 1; - else - nthreads = num_cpu_avail(2); + nthreads = get_gemv_optimal_nthreads(1L * m * n); if (nthreads == 1) { #endif