/* AUTOGENERATED KERNEL Settings: LMUL=1 M=16 M_tail_scalar_from=2 N=8 __riscv_='__riscv_' complex=False conjugate=False cpu='zvl256b' force_acc_double=False index_type='BLASLONG' op='gemm' param_precision='float' reg_width_bits=256 tail_policy='' trace=False Derived: ELEN_ACC=32 ELEN_PARAM=32 LMUL_ACC=1 VFMACC='__riscv_vfmacc_vf_f32m1' VFMUL='__riscv_vfmul_vf_f32m1' VLEV='__riscv_vle32_v_f32m1' VLSEV='__riscv_vlse32_v_f32m1' VMACC_TO_ACC='__riscv_vfmacc_vf_f32m1' VMUL_TO_ACC='__riscv_vfmul_vf_f32m1' VSETVL='__riscv_vsetvl_e32m1' VSEV='__riscv_vse32_v_f32m1' VSSEV='__riscv_vsse32_v_f32m1' acc_vector_t='vfloat32m1_t' output='sgemm_kernel_16x8_zvl256b.c' param_scalar_t='float' param_vector_t='vfloat32m1_t' */ #include "common.h" int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alpha, FLOAT* A, FLOAT* B, FLOAT* C, BLASLONG ldc) { BLASLONG gvl = 0; BLASLONG m_top = 0; BLASLONG n_top = 0; // -- MAIN PASS for (BLASLONG j=0; j