Browse Source

Add thread throttling profile for SGEMM on `NEOVERSEV2`

tags/v0.3.30
Marek Michalowski 7 months ago
parent
commit
b723c1b7b7
2 changed files with 25 additions and 1 deletions
  1. +2
    -1
      CONTRIBUTORS.md
  2. +23
    -0
      interface/gemm.c

+ 2
- 1
CONTRIBUTORS.md View File

@@ -237,8 +237,9 @@ In chronological order:
* [2025-01-10] Add thread throttling profile for SGEMM on NEOVERSEV1
* [2025-01-21] Optimize gemv_t_sve_v1x3 kernel

* Marek Michalowski <https://github.com/michalowski-arm>
* Marek Michalowski <marek.michalowski@arm.com>
* [2025-01-21] Add thread throttling profile for SGEMV on `NEOVERSEV1`
* [2025-02-18] Add thread throttling profile for SGEMM on `NEOVERSEV2`

* Ye Tao <ye.tao@arm.com>
* [2025-02-03] Optimize SBGEMM kernel on NEOVERSEV1

+ 23
- 0
interface/gemm.c View File

@@ -198,14 +198,37 @@ static inline int get_gemm_optimal_nthreads_neoversev1(double MNK, int ncpu) {
}
#endif

#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV2)
static inline int get_gemm_optimal_nthreads_neoversev2(double MNK, int ncpu) {
return
MNK < 125000L ? 1
: MNK < 1092727L ? MIN(ncpu, 6)
: MNK < 2628072L ? MIN(ncpu, 8)
: MNK < 8000000L ? MIN(ncpu, 12)
: MNK < 20346417L ? MIN(ncpu, 16)
: MNK < 57066625L ? MIN(ncpu, 24)
: MNK < 91125000L ? MIN(ncpu, 28)
: MNK < 238328000L ? MIN(ncpu, 40)
: MNK < 454756609L ? MIN(ncpu, 48)
: MNK < 857375000L ? MIN(ncpu, 56)
: MNK < 1073741824L ? MIN(ncpu, 64)
: ncpu;
}
#endif

static inline int get_gemm_optimal_nthreads(double MNK) {
int ncpu = num_cpu_avail(3);
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemm_optimal_nthreads_neoversev1(MNK, ncpu);
}
if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
return get_gemm_optimal_nthreads_neoversev2(MNK, ncpu);
}
#endif
if ( MNK <= (SMP_THRESHOLD_MIN * (double) GEMM_MULTITHREAD_THRESHOLD) ) {
return 1;


Loading…
Cancel
Save