@@ -919,14 +919,22 @@ extern gotoblas_t *gotoblas; | |||||
#define SGEMM_R SGEMM_DEFAULT_R | #define SGEMM_R SGEMM_DEFAULT_R | ||||
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M | #define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M | ||||
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N | #define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N | ||||
#ifdef SGEMM_DEFAULT_UNROLL_MN | |||||
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN | |||||
#else | |||||
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N)) | #define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N)) | ||||
#endif | |||||
#define DGEMM_P DGEMM_DEFAULT_P | #define DGEMM_P DGEMM_DEFAULT_P | ||||
#define DGEMM_Q DGEMM_DEFAULT_Q | #define DGEMM_Q DGEMM_DEFAULT_Q | ||||
#define DGEMM_R DGEMM_DEFAULT_R | #define DGEMM_R DGEMM_DEFAULT_R | ||||
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M | #define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M | ||||
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N | #define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N | ||||
#ifdef DGEMM_DEFAULT_UNROLL_MN | |||||
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN | |||||
#else | |||||
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N)) | #define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N)) | ||||
#endif | |||||
#define QGEMM_P QGEMM_DEFAULT_P | #define QGEMM_P QGEMM_DEFAULT_P | ||||
#define QGEMM_Q QGEMM_DEFAULT_Q | #define QGEMM_Q QGEMM_DEFAULT_Q | ||||
@@ -940,14 +948,22 @@ extern gotoblas_t *gotoblas; | |||||
#define CGEMM_R CGEMM_DEFAULT_R | #define CGEMM_R CGEMM_DEFAULT_R | ||||
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M | #define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M | ||||
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N | #define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N | ||||
#ifdef CGEMM_DEFAULT_UNROLL_MN | |||||
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN | |||||
#else | |||||
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) | #define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) | ||||
#endif | |||||
#define ZGEMM_P ZGEMM_DEFAULT_P | #define ZGEMM_P ZGEMM_DEFAULT_P | ||||
#define ZGEMM_Q ZGEMM_DEFAULT_Q | #define ZGEMM_Q ZGEMM_DEFAULT_Q | ||||
#define ZGEMM_R ZGEMM_DEFAULT_R | #define ZGEMM_R ZGEMM_DEFAULT_R | ||||
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M | #define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M | ||||
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N | #define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N | ||||
#ifdef ZGEMM_DEFAULT_UNROLL_MN | |||||
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN | |||||
#else | |||||
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) | #define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) | ||||
#endif | |||||
#define XGEMM_P XGEMM_DEFAULT_P | #define XGEMM_P XGEMM_DEFAULT_P | ||||
#define XGEMM_Q XGEMM_DEFAULT_Q | #define XGEMM_Q XGEMM_DEFAULT_Q | ||||
@@ -538,10 +538,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; | mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; | ||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
mode = BLAS_DOUBLE | BLAS_REAL; | mode = BLAS_DOUBLE | BLAS_REAL; | ||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | |||||
mask = DGEMM_UNROLL_MN - 1; | |||||
#else | #else | ||||
mode = BLAS_SINGLE | BLAS_REAL; | mode = BLAS_SINGLE | BLAS_REAL; | ||||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; | |||||
mask = SGEMM_UNROLL_MN - 1; | |||||
#endif | #endif | ||||
#else | #else | ||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
@@ -549,10 +549,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||||
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; | mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; | ||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
mode = BLAS_DOUBLE | BLAS_COMPLEX; | mode = BLAS_DOUBLE | BLAS_COMPLEX; | ||||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; | |||||
mask = ZGEMM_UNROLL_MN - 1; | |||||
#else | #else | ||||
mode = BLAS_SINGLE | BLAS_COMPLEX; | mode = BLAS_SINGLE | BLAS_COMPLEX; | ||||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; | |||||
mask = CGEMM_UNROLL_MN - 1; | |||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -57,10 +57,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||||
switch (mode & BLAS_PREC) { | switch (mode & BLAS_PREC) { | ||||
case BLAS_SINGLE: | case BLAS_SINGLE: | ||||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; | |||||
mask = SGEMM_UNROLL_MN - 1; | |||||
break; | break; | ||||
case BLAS_DOUBLE: | case BLAS_DOUBLE: | ||||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | |||||
mask = DGEMM_UNROLL_MN - 1; | |||||
break; | break; | ||||
#ifdef EXPRECISION | #ifdef EXPRECISION | ||||
case BLAS_XDOUBLE: | case BLAS_XDOUBLE: | ||||
@@ -71,10 +71,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||||
} else { | } else { | ||||
switch (mode & BLAS_PREC) { | switch (mode & BLAS_PREC) { | ||||
case BLAS_SINGLE: | case BLAS_SINGLE: | ||||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; | |||||
mask = CGEMM_UNROLL_MN - 1; | |||||
break; | break; | ||||
case BLAS_DOUBLE: | case BLAS_DOUBLE: | ||||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; | |||||
mask = ZGEMM_UNROLL_MN - 1; | |||||
break; | break; | ||||
#ifdef EXPRECISION | #ifdef EXPRECISION | ||||
case BLAS_XDOUBLE: | case BLAS_XDOUBLE: | ||||
@@ -54,7 +54,14 @@ gotoblas_t TABLE_NAME = { | |||||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | ||||
0, 0, 0, | 0, 0, 0, | ||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | |||||
#ifdef SGEMM_DEFAULT_UNROLL_MN | |||||
SGEMM_DEFAULT_UNROLL_MN, | |||||
#else | |||||
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||||
#endif | |||||
#ifdef HAVE_EXCLUSIVE_CACHE | #ifdef HAVE_EXCLUSIVE_CACHE | ||||
1, | 1, | ||||
#else | #else | ||||
@@ -110,7 +117,12 @@ gotoblas_t TABLE_NAME = { | |||||
#endif | #endif | ||||
0, 0, 0, | 0, 0, 0, | ||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, | |||||
#ifdef DGEMM_DEFAULT_UNROLL_MN | |||||
DGEMM_DEFAULT_UNROLL_MN, | |||||
#else | |||||
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||||
#endif | |||||
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, | damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, | ||||
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, | idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, | ||||
@@ -214,7 +226,12 @@ gotoblas_t TABLE_NAME = { | |||||
#endif | #endif | ||||
0, 0, 0, | 0, 0, 0, | ||||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, | |||||
#ifdef CGEMM_DEFAULT_UNROLL_MN | |||||
CGEMM_DEFAULT_UNROLL_MN, | |||||
#else | |||||
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||||
#endif | |||||
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, | camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, | ||||
cnrm2_kTS, casum_kTS, ccopy_kTS, | cnrm2_kTS, casum_kTS, ccopy_kTS, | ||||
@@ -307,7 +324,12 @@ gotoblas_t TABLE_NAME = { | |||||
#endif | #endif | ||||
0, 0, 0, | 0, 0, 0, | ||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, | |||||
#ifdef ZGEMM_DEFAULT_UNROLL_MN | |||||
ZGEMM_DEFAULT_UNROLL_MN, | |||||
#else | |||||
MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||||
#endif | |||||
zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS, | zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS, | ||||
znrm2_kTS, zasum_kTS, zcopy_kTS, | znrm2_kTS, zasum_kTS, zcopy_kTS, | ||||
@@ -1206,6 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define ZGEMM_DEFAULT_UNROLL_N 2 | #define ZGEMM_DEFAULT_UNROLL_N 2 | ||||
#define XGEMM_DEFAULT_UNROLL_N 1 | #define XGEMM_DEFAULT_UNROLL_N 1 | ||||
#define DGEMM_DEFAULT_UNROLL_MN 16 | |||||
#endif | #endif | ||||
#ifdef ARCH_X86 | #ifdef ARCH_X86 | ||||