Browse Source

Ref #79 Added GEMM_MULTITHREAD_THRESHOLD flag to use single thread in gemm function with small matrices.

tags/v0.1.0^2
Xianyi Zhang 13 years ago
parent
commit
31c836ac25
4 changed files with 17 additions and 1 deletions
  1. +5
    -0
      Makefile.rule
  2. +5
    -0
      Makefile.system
  3. +1
    -0
      getarch_2nd.c
  4. +6
    -1
      interface/gemm.c

+ 5
- 0
Makefile.rule View File

@@ -88,6 +88,11 @@ VERSION = 0.1alpha2.5
# If you need to synchronize FP CSR between threads (for x86/x86_64 only). # If you need to synchronize FP CSR between threads (for x86/x86_64 only).
# CONSISTENT_FPCSR = 1 # CONSISTENT_FPCSR = 1


# If any gemm arguement m, n or k is less or equal this threshold, gemm will be execute
# with single thread. You can use this flag to avoid the overhead of multi-threading
# in small matrix sizes. The default value is 4.
# GEMM_MULTITHREAD_THRESHOLD = 4

# If you need santy check by comparing reference BLAS. It'll be very # If you need santy check by comparing reference BLAS. It'll be very
# slow (Not implemented yet). # slow (Not implemented yet).
# SANITY_CHECK = 1 # SANITY_CHECK = 1


+ 5
- 0
Makefile.system View File

@@ -40,6 +40,11 @@ ifdef INTERFACE64
GETARCH_FLAGS += -DUSE64BITINT GETARCH_FLAGS += -DUSE64BITINT
endif endif


ifndef GEMM_MULTITHREAD_THRESHOLD
GEMM_MULTITHREAD_THRESHOLD=4
endif
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD)

# This operation is expensive, so execution should be once. # This operation is expensive, so execution should be once.
ifndef GOTOBLAS_MAKEFILE ifndef GOTOBLAS_MAKEFILE
export GOTOBLAS_MAKEFILE = 1 export GOTOBLAS_MAKEFILE = 1


+ 1
- 0
getarch_2nd.c View File

@@ -34,6 +34,7 @@ int main(int argc, char **argv) {
#ifdef USE64BITINT #ifdef USE64BITINT
printf("#define USE64BITINT\n"); printf("#define USE64BITINT\n");
#endif #endif
printf("#define GEMM_MULTITHREAD_THRESHOLD\t%ld\n", GEMM_MULTITHREAD_THRESHOLD);
} }


return 0; return 0;


+ 6
- 1
interface/gemm.c View File

@@ -397,8 +397,13 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANS
mode |= (transb << BLAS_TRANSB_SHIFT); mode |= (transb << BLAS_TRANSB_SHIFT);


args.common = NULL; args.common = NULL;
args.nthreads = num_cpu_avail(3);


if(args.m <= GEMM_MULTITHREAD_THRESHOLD || args.n <= GEMM_MULTITHREAD_THRESHOLD
|| args.k <=GEMM_MULTITHREAD_THRESHOLD){
args.nthreads = 1;
}else{
args.nthreads = num_cpu_avail(3);
}
if (args.nthreads == 1) { if (args.nthreads == 1) {
#endif #endif


Loading…
Cancel
Save