|
@@ -101,7 +101,12 @@ static FLOAT dm1 = -1.; |
|
|
#endif |
|
|
#endif |
|
|
|
|
|
|
|
|
typedef struct { |
|
|
typedef struct { |
|
|
volatile BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE]; |
|
|
|
|
|
|
|
|
#ifdef HAVE_C11 |
|
|
|
|
|
_Atomic |
|
|
|
|
|
#else |
|
|
|
|
|
volatile |
|
|
|
|
|
#endif |
|
|
|
|
|
BLASLONG working[MAX_CPU_NUMBER][CACHE_LINE_SIZE * DIVIDE_RATE]; |
|
|
} job_t; |
|
|
} job_t; |
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -375,6 +380,9 @@ static int thread_driver(blas_arg_t *args, FLOAT *sa, FLOAT *sb){ |
|
|
#elif defined(DOUBLE) |
|
|
#elif defined(DOUBLE) |
|
|
mode = BLAS_DOUBLE | BLAS_REAL; |
|
|
mode = BLAS_DOUBLE | BLAS_REAL; |
|
|
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; |
|
|
mask = MAX(DGEMM_UNROLL_M, DGEMM_UNROLL_N) - 1; |
|
|
|
|
|
#elif defined(HALF) |
|
|
|
|
|
mode = BLAS_HALF | BLAS_REAL; |
|
|
|
|
|
mask = MAX(SHGEMM_UNROLL_M, SHGEMM_UNROLL_N) - 1; |
|
|
#else |
|
|
#else |
|
|
mode = BLAS_SINGLE | BLAS_REAL; |
|
|
mode = BLAS_SINGLE | BLAS_REAL; |
|
|
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; |
|
|
mask = MAX(SGEMM_UNROLL_M, SGEMM_UNROLL_N) - 1; |
|
|