|
@@ -70,11 +70,22 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT |
|
|
|
|
|
|
|
|
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) |
|
|
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) |
|
|
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { |
|
|
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { |
|
|
return |
|
|
|
|
|
MN < 25600L ? 1 |
|
|
|
|
|
: MN < 63001L ? MIN(ncpu, 4) |
|
|
|
|
|
: MN < 459684L ? MIN(ncpu, 16) |
|
|
|
|
|
: ncpu; |
|
|
|
|
|
|
|
|
#ifdef DOUBLE |
|
|
|
|
|
return (MN < 8100L) ? 1 |
|
|
|
|
|
: (MN < 12100L) ? MIN(ncpu, 2) |
|
|
|
|
|
: (MN < 36100L) ? MIN(ncpu, 4) |
|
|
|
|
|
: (MN < 84100L) ? MIN(ncpu, 8) |
|
|
|
|
|
: (MN < 348100L) ? MIN(ncpu, 16) |
|
|
|
|
|
: (MN < 435600L) ? MIN(ncpu, 24) |
|
|
|
|
|
: (MN < 810000L) ? MIN(ncpu, 32) |
|
|
|
|
|
: (MN < 1050625L) ? MIN(ncpu, 40) |
|
|
|
|
|
: ncpu; |
|
|
|
|
|
#else |
|
|
|
|
|
return (MN < 25600L) ? 1 |
|
|
|
|
|
: (MN < 63001L) ? MIN(ncpu, 4) |
|
|
|
|
|
: (MN < 459684L) ? MIN(ncpu, 16) |
|
|
|
|
|
: ncpu; |
|
|
|
|
|
#endif |
|
|
} |
|
|
} |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
@@ -89,24 +100,6 @@ static inline int get_gemv_optimal_nthreads_neoversev2(BLASLONG MN, int ncpu) { |
|
|
} |
|
|
} |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
//thread throttling for dgemv |
|
|
|
|
|
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) |
|
|
|
|
|
static inline int get_dgemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { |
|
|
|
|
|
|
|
|
|
|
|
return |
|
|
|
|
|
MN < 8100L ? 1 |
|
|
|
|
|
: MN < 12100L ? MIN(ncpu, 2) |
|
|
|
|
|
: MN < 36100L ? MIN(ncpu, 4) |
|
|
|
|
|
: MN < 84100L ? MIN(ncpu, 8) |
|
|
|
|
|
: MN < 348100L ? MIN(ncpu, 16) |
|
|
|
|
|
: MN < 435600L ? MIN(ncpu, 24) |
|
|
|
|
|
: MN < 810000L ? MIN(ncpu, 32) |
|
|
|
|
|
: MN < 1050625 ? MIN(ncpu, 40) |
|
|
|
|
|
: ncpu; |
|
|
|
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
static inline int get_gemv_optimal_nthreads(BLASLONG MN) { |
|
|
static inline int get_gemv_optimal_nthreads(BLASLONG MN) { |
|
|
int ncpu = num_cpu_avail(3); |
|
|
int ncpu = num_cpu_avail(3); |
|
|
#if defined(_WIN64) && defined(_M_ARM64) |
|
|
#if defined(_WIN64) && defined(_M_ARM64) |
|
@@ -114,25 +107,17 @@ static inline int get_gemv_optimal_nthreads(BLASLONG MN) { |
|
|
return num_cpu_avail(4); |
|
|
return num_cpu_avail(4); |
|
|
return 1; |
|
|
return 1; |
|
|
#endif |
|
|
#endif |
|
|
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
|
|
|
|
|
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(BFLOAT16) |
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
#elif defined(NEOVERSEV1) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
|
|
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
|
|
|
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
#elif defined(NEOVERSEV2) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
|
|
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
|
|
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
|
|
|
|
|
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(BFLOAT16) |
|
|
if (strcmp(gotoblas_corename(), "neoversev1") == 0) { |
|
|
if (strcmp(gotoblas_corename(), "neoversev1") == 0) { |
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
} |
|
|
} |
|
|
if (strcmp(gotoblas_corename(), "neoversev2") == 0) { |
|
|
if (strcmp(gotoblas_corename(), "neoversev2") == 0) { |
|
|
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
|
|
return get_gemv_optimal_nthreads_neoversev2(MN, ncpu); |
|
|
} |
|
|
} |
|
|
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
|
|
if (strcmp(gotoblas_corename(), "neoversev1") == 0) { |
|
|
|
|
|
return get_dgemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) |
|
|
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) |
|
|