Browse Source

Define SBGEMM_ALIGN_K for DYNAMIC_ARCH build

tags/v0.3.22^2
Honglin Zhu 2 years ago
parent
commit
4989e039a5
5 changed files with 18 additions and 18 deletions
  1. +1
    -1
      common_param.h
  2. +5
    -6
      driver/level3/level3.c
  3. +5
    -5
      driver/level3/level3_thread.c
  4. +2
    -6
      kernel/setparam-ref.c
  5. +5
    -0
      param.h

+ 1
- 1
common_param.h View File

@@ -50,6 +50,7 @@ typedef struct {
#ifdef BUILD_BFLOAT16 #ifdef BUILD_BFLOAT16
int sbgemm_p, sbgemm_q, sbgemm_r; int sbgemm_p, sbgemm_q, sbgemm_r;
int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn; int sbgemm_unroll_m, sbgemm_unroll_n, sbgemm_unroll_mn;
int sbgemm_align_k;


void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG); void (*sbstobf16_k) (BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG); void (*sbdtobf16_k) (BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
@@ -1193,7 +1194,6 @@ BLASLONG (*ixamin_k)(BLASLONG, xdouble *, BLASLONG);
#ifdef BUILD_COMPLEX16 #ifdef BUILD_COMPLEX16
int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG); int (*zgeadd_k) (BLASLONG, BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
#endif #endif
int align_k; // must be 2^n
} gotoblas_t; } gotoblas_t;


extern gotoblas_t *gotoblas; extern gotoblas_t *gotoblas;


+ 5
- 6
driver/level3/level3.c View File

@@ -305,13 +305,12 @@ int CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
} }


BLASLONG pad_min_l = min_l; BLASLONG pad_min_l = min_l;
#if defined(HALF) && defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif #endif

#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
pad_min_l = (min_l + 3) & ~3;
#endif #endif


/* First, we have to move data A to L2 cache */ /* First, we have to move data A to L2 cache */


+ 5
- 5
driver/level3/level3_thread.c View File

@@ -327,12 +327,12 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
BLASLONG pad_min_l = min_l; BLASLONG pad_min_l = min_l;


#if defined(HALF) && defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->align_k - 1) & ~(gotoblas->align_k-1);
#if defined(HALF)
#if defined(DYNAMIC_ARCH)
pad_min_l = (min_l + gotoblas->sbgemm_align_k - 1) & ~(gotoblas->sbgemm_align_k-1);
#else
pad_min_l = (min_l + SBGEMM_ALIGN_K - 1) & ~(SBGEMM_ALIGN_K - 1);;
#endif #endif

#if defined(HALF) && !defined(DYNAMIC_ARCH) && defined(NEOVERSEN2)
pad_min_l = (min_l + 3) & ~3;
#endif #endif


/* Determine step size in m /* Determine step size in m


+ 2
- 6
kernel/setparam-ref.c View File

@@ -62,6 +62,8 @@ gotoblas_t TABLE_NAME = {
MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N), MAX(SBGEMM_DEFAULT_UNROLL_M, SBGEMM_DEFAULT_UNROLL_N),
#endif #endif


SBGEMM_ALIGN_K,

sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS, sbstobf16_kTS, sbdtobf16_kTS, sbf16tos_kTS, dbf16tod_kTS,


samax_kTS, samin_kTS, smax_kTS, smin_kTS, samax_kTS, samin_kTS, smax_kTS, smin_kTS,
@@ -973,12 +975,6 @@ static void init_parameter(void) {
TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r; TABLE_NAME.xgemm3m_r = TABLE_NAME.qgemm_r;
#endif #endif
#endif #endif
#if defined(NEOVERSEN2) && BUILD_BFLOAT16 == 1
TABLE_NAME.align_k = 4;
#else
TABLE_NAME.align_k = 1;
#endif


} }
#else // (ARCH_ARM64) #else // (ARCH_ARM64)


+ 5
- 0
param.h View File

@@ -79,6 +79,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define SBGEMM_DEFAULT_P 256 #define SBGEMM_DEFAULT_P 256
#define SBGEMM_DEFAULT_R 256 #define SBGEMM_DEFAULT_R 256
#define SBGEMM_DEFAULT_Q 256 #define SBGEMM_DEFAULT_Q 256
#define SBGEMM_ALIGN_K 1 // must be 2^x

#ifdef OPTERON #ifdef OPTERON


#define SNUMOPT 4 #define SNUMOPT 4
@@ -3394,6 +3396,9 @@ is a big desktop or server with abundant cache rather than a phone or embedded d


#elif defined(NEOVERSEN2) #elif defined(NEOVERSEN2)


#undef SBGEMM_ALIGN_K
#define SBGEMM_ALIGN_K 4

#undef SBGEMM_DEFAULT_UNROLL_M #undef SBGEMM_DEFAULT_UNROLL_M
#undef SBGEMM_DEFAULT_UNROLL_N #undef SBGEMM_DEFAULT_UNROLL_N
#define SBGEMM_DEFAULT_UNROLL_M 8 #define SBGEMM_DEFAULT_UNROLL_M 8


Loading…
Cancel
Save