You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zgemm_beta_rvv.c 5.5 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. /*********************************************************************/
  2. /* Copyright 2009, 2010 The University of Texas at Austin. */
  3. /* All rights reserved. */
  4. /* */
  5. /* Redistribution and use in source and binary forms, with or */
  6. /* without modification, are permitted provided that the following */
  7. /* conditions are met: */
  8. /* */
  9. /* 1. Redistributions of source code must retain the above */
  10. /* copyright notice, this list of conditions and the following */
  11. /* disclaimer. */
  12. /* */
  13. /* 2. Redistributions in binary form must reproduce the above */
  14. /* copyright notice, this list of conditions and the following */
  15. /* disclaimer in the documentation and/or other materials */
  16. /* provided with the distribution. */
  17. /* */
  18. /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
  19. /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
  20. /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
  21. /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
  22. /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
  23. /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
  24. /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
  25. /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
  26. /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
  27. /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
  28. /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
  29. /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
  30. /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
  31. /* POSSIBILITY OF SUCH DAMAGE. */
  32. /* */
  33. /* The views and conclusions contained in the software and */
  34. /* documentation are those of the authors and should not be */
  35. /* interpreted as representing official policies, either expressed */
  36. /* or implied, of The University of Texas at Austin. */
  37. /*********************************************************************/
  38. #include "common.h"
  39. #if !defined(DOUBLE)
  40. #define VSETVL(n) __riscv_vsetvl_e32m4(n)
  41. #define FLOAT_V_T vfloat32m4_t
  42. #define FLOAT_VX2_T vfloat32m4x2_t
  43. #define VGET_VX2 __riscv_vget_v_f32m4x2_f32m4
  44. #define VSET_VX2 __riscv_vset_v_f32m4_f32m4x2
  45. #define VLSEG_FLOAT __riscv_vlseg2e32_v_f32m4x2
  46. #define VSSEG_FLOAT __riscv_vsseg2e32_v_f32m4x2
  47. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f32m4
  48. #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m4
  49. #define VFADDVV_FLOAT __riscv_vfadd_vv_f32m4
  50. #define VFSUBVV_FLOAT __riscv_vfsub_vv_f32m4
  51. #else
  52. #define VSETVL(n) __riscv_vsetvl_e64m4(n)
  53. #define FLOAT_V_T vfloat64m4_t
  54. #define FLOAT_VX2_T vfloat64m4x2_t
  55. #define VGET_VX2 __riscv_vget_v_f64m4x2_f64m4
  56. #define VSET_VX2 __riscv_vset_v_f64m4_f64m4x2
  57. #define VLSEG_FLOAT __riscv_vlseg2e64_v_f64m4x2
  58. #define VSSEG_FLOAT __riscv_vsseg2e64_v_f64m4x2
  59. #define VFMVVF_FLOAT __riscv_vfmv_v_f_f64m4
  60. #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m4
  61. #define VFADDVV_FLOAT __riscv_vfadd_vv_f64m4
  62. #define VFSUBVV_FLOAT __riscv_vfsub_vv_f64m4
  63. #endif
  64. int CNAME(BLASLONG m, BLASLONG n, BLASLONG dummy1,
  65. FLOAT beta_r, FLOAT beta_i,
  66. FLOAT *dummy2, BLASLONG dummy3,
  67. FLOAT *dummy4, BLASLONG dummy5,
  68. FLOAT *c, BLASLONG ldc)
  69. {
  70. BLASLONG chunk;
  71. FLOAT *c_offset;
  72. size_t vl;
  73. FLOAT_V_T vr, vi, v1, v2, v3, v4;
  74. FLOAT_VX2_T vx2;
  75. ldc *= 2;
  76. c_offset = c;
  77. if (beta_r == 0.0 && beta_i == 0.0) {
  78. vl = VSETVL(m);
  79. vr = VFMVVF_FLOAT(0.0, vl);
  80. vi = VFMVVF_FLOAT(0.0, vl);
  81. vx2 = VSET_VX2(vx2, 0, vr);
  82. vx2 = VSET_VX2(vx2, 1, vi);
  83. for( ; n > 0; n--, c += ldc) {
  84. c_offset = c;
  85. for(chunk=m; chunk > 0; chunk -= vl, c_offset += vl*2) {
  86. vl = VSETVL(chunk);
  87. VSSEG_FLOAT(c_offset, vx2, vl);
  88. }
  89. }
  90. } else {
  91. for( ; n > 0; n--, c += ldc) {
  92. c_offset = c;
  93. for(chunk=m; chunk > 0; chunk -= vl, c_offset += vl*2) {
  94. vl = VSETVL(chunk);
  95. vx2 = VLSEG_FLOAT(c_offset, vl);
  96. vr = VGET_VX2(vx2, 0);
  97. vi = VGET_VX2(vx2, 1);
  98. v1 = VFMULVF_FLOAT(vr, beta_r, vl);
  99. v2 = VFMULVF_FLOAT(vi, beta_i, vl);
  100. v3 = VFMULVF_FLOAT(vi, beta_r, vl);
  101. v4 = VFMULVF_FLOAT(vr, beta_i, vl);
  102. vr = VFSUBVV_FLOAT(v1, v2, vl);
  103. vi = VFADDVV_FLOAT(v3, v4, vl);
  104. vx2 = VSET_VX2(vx2, 0, vr);
  105. vx2 = VSET_VX2(vx2, 1, vi);
  106. VSSEG_FLOAT(c_offset, vx2, vl);
  107. }
  108. }
  109. }
  110. return 0;
  111. }