Browse Source

Simplified thread throttling function in gemv

tags/v0.3.30
shubham.chaudhari 6 months ago
parent
commit
8e289ecddc
1 changed files with 18 additions and 33 deletions
  1. +18
    -33
      interface/gemv.c

+ 18
- 33
interface/gemv.c View File

@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT


#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) #if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {
return
MN < 25600L ? 1
: MN < 63001L ? MIN(ncpu, 4)
: MN < 459684L ? MIN(ncpu, 16)
: ncpu;
#ifdef DOUBLE
return (MN < 8100L) ? 1
: (MN < 12100L) ? MIN(ncpu, 2)
: (MN < 36100L) ? MIN(ncpu, 4)
: (MN < 84100L) ? MIN(ncpu, 8)
: (MN < 348100L) ? MIN(ncpu, 16)
: (MN < 435600L) ? MIN(ncpu, 24)
: (MN < 810000L) ? MIN(ncpu, 32)
: (MN < 1050625L) ? MIN(ncpu, 40)
: ncpu;
#else
return (MN < 25600L) ? 1
: (MN < 63001L) ? MIN(ncpu, 4)
: (MN < 459684L) ? MIN(ncpu, 16)
: ncpu;
#endif
} }
#endif #endif


@@ -89,24 +100,6 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) {
} }
#endif #endif


//thread throttling for dgemv
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1)
static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) {

return
MN < 8100L ? 1
: MN < 12100L ? MIN(ncpu, 2)
: MN < 36100L ? MIN(ncpu, 4)
: MN < 84100L ? MIN(ncpu, 8)
: MN < 348100L ? MIN(ncpu, 16)
: MN < 435600L ? MIN(ncpu, 24)
: MN < 810000L ? MIN(ncpu, 32)
: MN < 1050625 ? MIN(ncpu, 40)
: ncpu;

}
#endif

static inline int get_gemv_optimal_nthreads(BLASLONG MN) { static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
int ncpu = num_cpu_avail(3); int ncpu = num_cpu_avail(3);
#if defined(_WIN64) && defined(_M_ARM64) #if defined(_WIN64) && defined(_M_ARM64)
@@ -114,25 +107,17 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) {
return num_cpu_avail(4); return num_cpu_avail(4);
return 1; return 1;
#endif #endif
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16)
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu);
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) #elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16)
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) { if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); return get_gemv_optimal_nthreads_neoversev1(MN, ncpu);
} }
if (strcmp(gotoblas_corename(), "neoversev2") == 0) { if (strcmp(gotoblas_corename(), "neoversev2") == 0) {
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); return get_gemv_optimal_nthreads_neoversev2(MN, ncpu);
} }
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16)
if (strcmp(gotoblas_corename(), "neoversev1") == 0) {
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu);
}


#endif #endif


if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD )


Loading…
Cancel
Save