/* AUTOGENERATED KERNEL Settings: LMUL=1 M=8 M_tail_scalar_from=1 N=4 __riscv_='__riscv_' complex=True conjugate=False cpu='zvl256b' force_acc_double=False index_type='BLASLONG' op='trmm' param_precision='double' reg_width_bits=256 tail_policy='' trace=False Derived: ELEN_ACC=64 ELEN_PARAM=64 LMUL_ACC=1 VFMACC='__riscv_vfmacc_vf_f64m1' VFMUL='__riscv_vfmul_vf_f64m1' VLEV='__riscv_vle64_v_f64m1' VLSEV='__riscv_vlse64_v_f64m1' VMACC_TO_ACC='__riscv_vfmacc_vf_f64m1' VMUL_TO_ACC='__riscv_vfmul_vf_f64m1' VSETVL='__riscv_vsetvl_e64m1' VSEV='__riscv_vse64_v_f64m1' VSSEV='__riscv_vsse64_v_f64m1' acc_vector_t='vfloat64m1_t' output='ztrmm_kernel_8x4_zvl256b.c' param_scalar_t='double' param_vector_t='vfloat64m1_t' */ #include "common.h" #if defined(NN) || defined(NT) || defined(TN) || defined(TT) #define S0 1 #define S1 -1 #define S2 1 #define S3 1 #define VFMACC_RR __riscv_vfmsac #define VFMACC_RI __riscv_vfmacc #endif #if defined(NR) || defined(NC) || defined(TR) || defined(TC) #define S0 1 #define S1 1 #define S2 1 #define S3 -1 #define VFMACC_RR __riscv_vfmacc #define VFMACC_RI __riscv_vfmsac #endif #if defined(RN) || defined(RT) || defined(CN) || defined(CT) #define S0 1 #define S1 1 #define S2 -1 #define S3 1 #define VFMACC_RR __riscv_vfmacc #define VFMACC_RI __riscv_vfnmsac #endif #if defined(RR) || defined(RC) || defined(CR) || defined(CC) #define S0 1 #define S1 -1 #define S2 -1 #define S3 -1 #define VFMACC_RR __riscv_vfmsac #define VFMACC_RI __riscv_vfnmacc #endif #if defined(LEFT) != defined(TRANSA) #define BACKWARDS #endif int CNAME(BLASLONG M, BLASLONG N, BLASLONG K, FLOAT alphar, FLOAT alphai, FLOAT* A, FLOAT* B, FLOAT* C, BLASLONG ldc, BLASLONG offset) { BLASLONG gvl = 0; BLASLONG m_top = 0; BLASLONG n_top = 0; // -- MAIN PASS for (BLASLONG j=0; j