|
|
@@ -170,6 +170,22 @@ |
|
|
|
#define STOP_RPCC(COUNTER) |
|
|
|
#endif |
|
|
|
|
|
|
|
#if defined(BUILD_BFLOAT16) |
|
|
|
#if defined(DYNAMIC_ARCH) |
|
|
|
#if defined(BGEMM) |
|
|
|
#define BFLOAT16_ALIGN_K gotoblas->bgemm_align_k |
|
|
|
#else |
|
|
|
#define BFLOAT16_ALIGN_K gotoblas->sbgemm_align_k |
|
|
|
#endif |
|
|
|
#else |
|
|
|
#if defined(BGEMM) |
|
|
|
#define BFLOAT16_ALIGN_K BGEMM_ALIGN_K |
|
|
|
#else |
|
|
|
#define BFLOAT16_ALIGN_K SBGEMM_ALIGN_K |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
XFLOAT *sa, XFLOAT *sb, BLASLONG dummy){ |
|
|
|
BLASLONG k, lda, ldb, ldc; |
|
|
@@ -307,11 +323,7 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, |
|
|
|
|
|
|
|
BLASLONG pad_min_l = min_l; |
|
|
|
#if defined(BFLOAT16) |
|
|
|
#if defined(DYNAMIC_ARCH) |
|
|
|
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1); |
|
|
|
#else |
|
|
|
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);; |
|
|
|
#endif |
|
|
|
pad_min_l = (min_l + BFLOAT16_ALIGN_K - 1) & ~(BFLOAT16_ALIGN_K - 1); |
|
|
|
#endif |
|
|
|
|
|
|
|
/* First, we have to move data A to L2 cache */ |
|
|
|