| @@ -88,6 +88,11 @@ VERSION = 0.1alpha2.5 | |||||
| # If you need to synchronize FP CSR between threads (for x86/x86_64 only). | # If you need to synchronize FP CSR between threads (for x86/x86_64 only). | ||||
| # CONSISTENT_FPCSR = 1 | # CONSISTENT_FPCSR = 1 | ||||
| # If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute | |||||
| # with single thread. You can use this flag to avoid the overhead of multi-threading | |||||
| # in small matrix sizes. The default value is 4. | |||||
| # GEMM_MULTITHREAD_THRESHOLD = 4 | |||||
| # If you need santy check by comparing reference BLAS. It'll be very | # If you need santy check by comparing reference BLAS. It'll be very | ||||
| # slow (Not implemented yet). | # slow (Not implemented yet). | ||||
| # SANITY_CHECK = 1 | # SANITY_CHECK = 1 | ||||
| @@ -40,6 +40,11 @@ ifdef INTERFACE64 | |||||
| GETARCH_FLAGS += -DUSE64BITINT | GETARCH_FLAGS += -DUSE64BITINT | ||||
| endif | endif | ||||
| ifndef GEMM_MULTITHREAD_THRESHOLD | |||||
| GEMM_MULTITHREAD_THRESHOLD=4 | |||||
| endif | |||||
| GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD) | |||||
| # This operation is expensive, so execution should be once. | # This operation is expensive, so execution should be once. | ||||
| ifndef GOTOBLAS_MAKEFILE | ifndef GOTOBLAS_MAKEFILE | ||||
| export GOTOBLAS_MAKEFILE = 1 | export GOTOBLAS_MAKEFILE = 1 | ||||
| @@ -34,6 +34,7 @@ int main(int argc, char **argv) { | |||||
| #ifdef USE64BITINT | #ifdef USE64BITINT | ||||
| printf("#define USE64BITINT\n"); | printf("#define USE64BITINT\n"); | ||||
| #endif | #endif | ||||
| printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", GEMM_MULTITHREAD_THRESHOLD); | |||||
| } | } | ||||
| return 0; | return 0; | ||||
| @@ -397,8 +397,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS | |||||
| mode |= (transb << BLAS_TRANSB_SHIFT); | mode |= (transb << BLAS_TRANSB_SHIFT); | ||||
| args.common = NULL; | args.common = NULL; | ||||
| args.nthreads = num_cpu_avail(3); | |||||
| if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD | |||||
| || args.k <=GEMM_MULTITHREAD_THRESHOLD){ | |||||
| args.nthreads = 1; | |||||
| }else{ | |||||
| args.nthreads = num_cpu_avail(3); | |||||
| } | |||||
| if (args.nthreads == 1) { | if (args.nthreads == 1) { | ||||
| #endif | #endif | ||||