For gemm multi-threading, simply split M.

e.g. layer 1: A (1600k, 576), B(576, 64) B is very small. We split M.
9 years ago · 92058a75e2
--- a/Makefile.rule
+++ b/Makefile.rule
@@ -80,7 +80,7 @@ VERSION = 0.2.16.dev
 # NO_LAPACKE = 1

 # If you want to use legacy threaded Level 3 implementation.
 # USE_SIMPLE_THREADED_LEVEL3 = 1
 USE_SIMPLE_THREADED_LEVEL3 = 1

 # If you want to drive whole 64bit region by BLAS. Not all Fortran
 # compiler supports this. It's safe to keep comment it out if you
--- a/common_param.h
+++ b/common_param.h
@@ -1194,6 +1194,8 @@ extern gotoblas_t *gotoblas;
 #define XGEMM_DEFAULT_UNROLL_N 2
 #endif

 #define GEMM_THREAD gemm_thread_m

 #ifndef GEMM_THREAD
 #define GEMM_THREAD gemm_thread_n
 #endif