@@ -919,14 +919,22 @@ extern gotoblas_t *gotoblas; | |||
#define SGEMM_R SGEMM_DEFAULT_R | |||
#define SGEMM_UNROLL_M SGEMM_DEFAULT_UNROLL_M | |||
#define SGEMM_UNROLL_N SGEMM_DEFAULT_UNROLL_N | |||
#ifdef SGEMM_DEFAULT_UNROLL_MN | |||
#define SGEMM_UNROLL_MN SGEMM_DEFAULT_UNROLL_MN | |||
#else | |||
#define SGEMM_UNROLL_MN MAX((SGEMM_UNROLL_M), (SGEMM_UNROLL_N)) | |||
#endif | |||
#define DGEMM_P DGEMM_DEFAULT_P | |||
#define DGEMM_Q DGEMM_DEFAULT_Q | |||
#define DGEMM_R DGEMM_DEFAULT_R | |||
#define DGEMM_UNROLL_M DGEMM_DEFAULT_UNROLL_M | |||
#define DGEMM_UNROLL_N DGEMM_DEFAULT_UNROLL_N | |||
#ifdef DGEMM_DEFAULT_UNROLL_MN | |||
#define DGEMM_UNROLL_MN DGEMM_DEFAULT_UNROLL_MN | |||
#else | |||
#define DGEMM_UNROLL_MN MAX((DGEMM_UNROLL_M), (DGEMM_UNROLL_N)) | |||
#endif | |||
#define QGEMM_P QGEMM_DEFAULT_P | |||
#define QGEMM_Q QGEMM_DEFAULT_Q | |||
@@ -940,14 +948,22 @@ extern gotoblas_t *gotoblas; | |||
#define CGEMM_R CGEMM_DEFAULT_R | |||
#define CGEMM_UNROLL_M CGEMM_DEFAULT_UNROLL_M | |||
#define CGEMM_UNROLL_N CGEMM_DEFAULT_UNROLL_N | |||
#ifdef CGEMM_DEFAULT_UNROLL_MN | |||
#define CGEMM_UNROLL_MN CGEMM_DEFAULT_UNROLL_MN | |||
#else | |||
#define CGEMM_UNROLL_MN MAX((CGEMM_UNROLL_M), (CGEMM_UNROLL_N)) | |||
#endif | |||
#define ZGEMM_P ZGEMM_DEFAULT_P | |||
#define ZGEMM_Q ZGEMM_DEFAULT_Q | |||
#define ZGEMM_R ZGEMM_DEFAULT_R | |||
#define ZGEMM_UNROLL_M ZGEMM_DEFAULT_UNROLL_M | |||
#define ZGEMM_UNROLL_N ZGEMM_DEFAULT_UNROLL_N | |||
#ifdef ZGEMM_DEFAULT_UNROLL_MN | |||
#define ZGEMM_UNROLL_MN ZGEMM_DEFAULT_UNROLL_MN | |||
#else | |||
#define ZGEMM_UNROLL_MN MAX((ZGEMM_UNROLL_M), (ZGEMM_UNROLL_N)) | |||
#endif | |||
#define XGEMM_P XGEMM_DEFAULT_P | |||
#define XGEMM_Q XGEMM_DEFAULT_Q | |||
@@ -538,10 +538,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
mask = MAX(QGEMM_UNROLL_M, QGEMM_UNROLL_N) - 1; | |||
#elif defined(DOUBLE) | |||
mode = BLAS_DOUBLE | BLAS_REAL; | |||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | |||
mask = DGEMM_UNROLL_MN - 1; | |||
#else | |||
mode = BLAS_SINGLE | BLAS_REAL; | |||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; | |||
mask = SGEMM_UNROLL_MN - 1; | |||
#endif | |||
#else | |||
#ifdef XDOUBLE | |||
@@ -549,10 +549,10 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, FLO | |||
mask = MAX(XGEMM_UNROLL_M, XGEMM_UNROLL_N) - 1; | |||
#elif defined(DOUBLE) | |||
mode = BLAS_DOUBLE | BLAS_COMPLEX; | |||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; | |||
mask = ZGEMM_UNROLL_MN - 1; | |||
#else | |||
mode = BLAS_SINGLE | BLAS_COMPLEX; | |||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; | |||
mask = CGEMM_UNROLL_MN - 1; | |||
#endif | |||
#endif | |||
@@ -57,10 +57,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
switch (mode & BLAS_PREC) { | |||
case BLAS_SINGLE: | |||
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; | |||
mask = SGEMM_UNROLL_MN - 1; | |||
break; | |||
case BLAS_DOUBLE: | |||
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; | |||
mask = DGEMM_UNROLL_MN - 1; | |||
break; | |||
#ifdef EXPRECISION | |||
case BLAS_XDOUBLE: | |||
@@ -71,10 +71,10 @@ int CNAME(int mode, blas_arg_t *arg, BLASLONG *range_m, BLASLONG *range_n, int ( | |||
} else { | |||
switch (mode & BLAS_PREC) { | |||
case BLAS_SINGLE: | |||
mask = MAX(CGEMM_UNROLL_M, CGEMM_UNROLL_N) - 1; | |||
mask = CGEMM_UNROLL_MN - 1; | |||
break; | |||
case BLAS_DOUBLE: | |||
mask = MAX(ZGEMM_UNROLL_M, ZGEMM_UNROLL_N) - 1; | |||
mask = ZGEMM_UNROLL_MN - 1; | |||
break; | |||
#ifdef EXPRECISION | |||
case BLAS_XDOUBLE: | |||
@@ -54,7 +54,14 @@ gotoblas_t TABLE_NAME = { | |||
GEMM_DEFAULT_OFFSET_A, GEMM_DEFAULT_OFFSET_B, GEMM_DEFAULT_ALIGN, | |||
0, 0, 0, | |||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||
SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N, | |||
#ifdef SGEMM_DEFAULT_UNROLL_MN | |||
SGEMM_DEFAULT_UNROLL_MN, | |||
#else | |||
MAX(SGEMM_DEFAULT_UNROLL_M, SGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
#ifdef HAVE_EXCLUSIVE_CACHE | |||
1, | |||
#else | |||
@@ -110,7 +117,12 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
0, 0, 0, | |||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, | |||
#ifdef DGEMM_DEFAULT_UNROLL_MN | |||
DGEMM_DEFAULT_UNROLL_MN, | |||
#else | |||
MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
damax_kTS, damin_kTS, dmax_kTS, dmin_kTS, | |||
idamax_kTS, idamin_kTS, idmax_kTS, idmin_kTS, | |||
@@ -214,7 +226,12 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
0, 0, 0, | |||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||
CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N, | |||
#ifdef CGEMM_DEFAULT_UNROLL_MN | |||
CGEMM_DEFAULT_UNROLL_MN, | |||
#else | |||
MAX(CGEMM_DEFAULT_UNROLL_M, CGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
camax_kTS, camin_kTS, icamax_kTS, icamin_kTS, | |||
cnrm2_kTS, casum_kTS, ccopy_kTS, | |||
@@ -307,7 +324,12 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
0, 0, 0, | |||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, | |||
#ifdef ZGEMM_DEFAULT_UNROLL_MN | |||
ZGEMM_DEFAULT_UNROLL_MN, | |||
#else | |||
MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||
#endif | |||
zamax_kTS, zamin_kTS, izamax_kTS, izamin_kTS, | |||
znrm2_kTS, zasum_kTS, zcopy_kTS, | |||
@@ -1206,6 +1206,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define XGEMM_DEFAULT_UNROLL_N 1 | |||
#define DGEMM_DEFAULT_UNROLL_MN 16 | |||
#endif | |||
#ifdef ARCH_X86 | |||