|
|
@@ -63,6 +63,36 @@ static int (*gemv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT |
|
|
|
}; |
|
|
|
#endif |
|
|
|
|
|
|
|
#ifdef DYNAMIC_ARCH |
|
|
|
extern char* gotoblas_corename(void); |
|
|
|
#endif |
|
|
|
|
|
|
|
#if defined(DYNAMIC_ARCH) || defined(NEOVERSEV1) |
|
|
|
static inline int get_gemv_optimal_nthreads_neoversev1(BLASLONG MN, int ncpu) { |
|
|
|
return |
|
|
|
MN < 25600L ? 1 |
|
|
|
: MN < 63001L ? MIN(ncpu, 4) |
|
|
|
: MN < 459684L ? MIN(ncpu, 16) |
|
|
|
: ncpu; |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
static inline int get_gemv_optimal_nthreads(BLASLONG MN) { |
|
|
|
int ncpu = num_cpu_avail(3); |
|
|
|
#if defined(NEOVERSEV1) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
|
#elif defined(DYNAMIC_ARCH) && !defined(COMPLEX) && !defined(DOUBLE) && !defined(BFLOAT16) |
|
|
|
if (strcmp(gotoblas_corename(), "neoversev1") == 0) { |
|
|
|
return get_gemv_optimal_nthreads_neoversev1(MN, ncpu); |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|
if ( MN < 115200L * GEMM_MULTITHREAD_THRESHOLD ) |
|
|
|
return 1; |
|
|
|
else |
|
|
|
return num_cpu_avail(2); |
|
|
|
} |
|
|
|
|
|
|
|
#ifndef CBLAS |
|
|
|
|
|
|
|
void NAME(char *TRANS, blasint *M, blasint *N, |
|
|
@@ -225,11 +255,7 @@ void CNAME(enum CBLAS_ORDER order, |
|
|
|
STACK_ALLOC(buffer_size, FLOAT, buffer); |
|
|
|
|
|
|
|
#ifdef SMP |
|
|
|
|
|
|
|
if ( 1L * m * n < 115200L * GEMM_MULTITHREAD_THRESHOLD ) |
|
|
|
nthreads = 1; |
|
|
|
else |
|
|
|
nthreads = num_cpu_avail(2); |
|
|
|
nthreads = get_gemv_optimal_nthreads(1L * m * n); |
|
|
|
|
|
|
|
if (nthreads == 1) { |
|
|
|
#endif |
|
|
|