|
- /*********************************************************************/
- /* Copyright 2009, 2010 The University of Texas at Austin. */
- /* All rights reserved. */
- /* */
- /* Redistribution and use in source and binary forms, with or */
- /* without modification, are permitted provided that the following */
- /* conditions are met: */
- /* */
- /* 1. Redistributions of source code must retain the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer. */
- /* */
- /* 2. Redistributions in binary form must reproduce the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer in the documentation and/or other materials */
- /* provided with the distribution. */
- /* */
- /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
- /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
- /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
- /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
- /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
- /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
- /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
- /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
- /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
- /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
- /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
- /* POSSIBILITY OF SUCH DAMAGE. */
- /* */
- /* The views and conclusions contained in the software and */
- /* documentation are those of the authors and should not be */
- /* interpreted as representing official policies, either expressed */
- /* or implied, of The University of Texas at Austin. */
- /*********************************************************************/
-
- #include "common.h"
-
- #if !defined(DOUBLE)
- #define VSETVL(n) __riscv_vsetvl_e32m4(n)
- #define FLOAT_V_T vfloat32m4_t
- #define FLOAT_VX2_T vfloat32m4x2_t
- #define VGET_VX2 __riscv_vget_v_f32m4x2_f32m4
- #define VSET_VX2 __riscv_vset_v_f32m4_f32m4x2
- #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4x2
- #define VSSEG_FLOAT __riscv_vsseg2e32_v_f32m4x2
- #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
- #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
- #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
- #define VFSUBVV_FLOAT __riscv_vfsub_vv_f32m4
- #else
- #define VSETVL(n) __riscv_vsetvl_e64m4(n)
- #define FLOAT_V_T vfloat64m4_t
- #define FLOAT_VX2_T vfloat64m4x2_t
- #define VGET_VX2 __riscv_vget_v_f64m4x2_f64m4
- #define VSET_VX2 __riscv_vset_v_f64m4_f64m4x2
- #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4x2
- #define VSSEG_FLOAT __riscv_vsseg2e64_v_f64m4x2
- #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
- #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
- #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
- #define VFSUBVV_FLOAT __riscv_vfsub_vv_f64m4
- #endif
-
- int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1,
- FLOAT beta_r, FLOAT beta_i,
- FLOAT *dummy2, BLASLONG dummy3,
- FLOAT *dummy4, BLASLONG dummy5,
- FLOAT *c, BLASLONG ldc)
- {
- BLASLONG chunk;
- FLOAT *c_offset;
- size_t vl;
- FLOAT_V_T vr, vi, v1, v2, v3, v4;
- FLOAT_VX2_T vx2;
-
- ldc *= 2;
- c_offset = c;
-
- if (beta_r == 0.0 && beta_i == 0.0) {
-
- vl = VSETVL(m);
- vr = VFMVVF_FLOAT(0.0, vl);
- vi = VFMVVF_FLOAT(0.0, vl);
- vx2 = VSET_VX2(vx2, 0, vr);
- vx2 = VSET_VX2(vx2, 1, vi);
-
- for( ; n > 0; n--, c += ldc) {
- c_offset = c;
-
- for(chunk=m; chunk > 0; chunk -= vl, c_offset += vl*2) {
- vl = VSETVL(chunk);
-
- VSSEG_FLOAT(c_offset, vx2, vl);
- }
- }
-
- } else {
-
- for( ; n > 0; n--, c += ldc) {
- c_offset = c;
-
- for(chunk=m; chunk > 0; chunk -= vl, c_offset += vl*2) {
- vl = VSETVL(chunk);
-
- vx2 = VLSEG_FLOAT(c_offset, vl);
- vr = VGET_VX2(vx2, 0);
- vi = VGET_VX2(vx2, 1);
-
- v1 = VFMULVF_FLOAT(vr, beta_r, vl);
- v2 = VFMULVF_FLOAT(vi, beta_i, vl);
-
- v3 = VFMULVF_FLOAT(vi, beta_r, vl);
- v4 = VFMULVF_FLOAT(vr, beta_i, vl);
-
- vr = VFSUBVV_FLOAT(v1, v2, vl);
- vi = VFADDVV_FLOAT(v3, v4, vl);
-
- vx2 = VSET_VX2(vx2, 0, vr);
- vx2 = VSET_VX2(vx2, 1, vi);
- VSSEG_FLOAT(c_offset, vx2, vl);
- }
- }
-
- }
-
- return 0;
- }
|