|
|
@@ -41,15 +41,110 @@ |
|
|
|
|
|
|
|
#ifndef ASSEMBLER |
|
|
|
|
|
|
|
#ifdef DYNAMIC_ARCH |
|
|
|
|
|
|
|
#ifndef BFLOAT16 |
|
|
|
typedef unsigned short bfloat16; |
|
|
|
#endif |
|
|
|
#ifdef DYNAMIC_ARCH |
|
|
|
|
|
|
|
typedef struct { |
|
|
|
int dtb_entries; |
|
|
|
int offsetA, offsetB, align; |
|
|
|
|
|
|
|
#if 1 |
|
|
|
int shgemm_p, shgemm_q, shgemm_r; |
|
|
|
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn; |
|
|
|
|
|
|
|
float (*shamax_k) (BLASLONG, float *, BLASLONG); |
|
|
|
float (*shamin_k) (BLASLONG, float *, BLASLONG); |
|
|
|
float (*shmax_k) (BLASLONG, float *, BLASLONG); |
|
|
|
float (*shmin_k) (BLASLONG, float *, BLASLONG); |
|
|
|
BLASLONG (*ishamax_k)(BLASLONG, float *, BLASLONG); |
|
|
|
BLASLONG (*ishamin_k)(BLASLONG, float *, BLASLONG); |
|
|
|
BLASLONG (*ishmax_k) (BLASLONG, float *, BLASLONG); |
|
|
|
BLASLONG (*ishmin_k) (BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
float (*shnrm2_k) (BLASLONG, float *, BLASLONG); |
|
|
|
float (*shasum_k) (BLASLONG, float *, BLASLONG); |
|
|
|
float (*shsum_k) (BLASLONG, float *, BLASLONG); |
|
|
|
int (*shcopy_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
float (*shdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
double (*dshdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
int (*shrot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); |
|
|
|
|
|
|
|
int (*shaxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
int (*shscal_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
int (*shswap_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
int (*shgemv_n) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); |
|
|
|
int (*shgemv_t) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); |
|
|
|
int (*shger_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); |
|
|
|
|
|
|
|
int (*shsymv_L) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); |
|
|
|
int (*shsymv_U) (BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); |
|
|
|
|
|
|
|
int (*shgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG); |
|
|
|
int (*shgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
int (*shgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
|
|
|
|
int (*shtrsm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrsm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrsm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrsm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
|
|
|
|
int (*shtrsm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrsm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, float *); |
|
|
|
|
|
|
|
int (*shtrmm_kernel_RN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrmm_kernel_RT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrmm_kernel_LN)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
int (*shtrmm_kernel_LT)(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG, BLASLONG); |
|
|
|
|
|
|
|
int (*shtrmm_iunucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_iunncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_iutucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_iutncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_ilnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_ilnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_iltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_iltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_ounucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_ounncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_outucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_outncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_olnucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_olnncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_oltucopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shtrmm_oltncopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
|
|
|
|
int (*shsymm_iutcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shsymm_iltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shsymm_outcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
int (*shsymm_oltcopy)(BLASLONG, BLASLONG, float *, BLASLONG, BLASLONG, BLASLONG, float *); |
|
|
|
|
|
|
|
int (*shneg_tcopy) (BLASLONG, BLASLONG, float *, BLASLONG, float *); |
|
|
|
int (*shlaswp_ncopy) (BLASLONG, BLASLONG, BLASLONG, float *, BLASLONG, blasint *, float *); |
|
|
|
|
|
|
|
#endif |
|
|
|
int sgemm_p, sgemm_q, sgemm_r; |
|
|
|
int sgemm_unroll_m, sgemm_unroll_n, sgemm_unroll_mn; |
|
|
|
|
|
|
@@ -87,15 +182,6 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); |
|
|
|
int (*sgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, float *, float *, float *, BLASLONG); |
|
|
|
int (*sgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
int shgemm_p, shgemm_q, shgemm_r; |
|
|
|
int shgemm_unroll_m, shgemm_unroll_n, shgemm_unroll_mn; |
|
|
|
int (*shgemm_kernel )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, bfloat16 *, float *, BLASLONG); |
|
|
|
int (*shgemm_beta )(BLASLONG, BLASLONG, BLASLONG, float, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG); |
|
|
|
|
|
|
|
int (*shgemm_incopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_itcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_oncopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
int (*shgemm_otcopy )(BLASLONG, BLASLONG, bfloat16 *, BLASLONG, bfloat16 *); |
|
|
|
|
|
|
|
int (*sgemm_incopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); |
|
|
|
int (*sgemm_itcopy )(BLASLONG, BLASLONG, float *, BLASLONG, float *); |
|
|
|