You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

vector-common.h 2.6 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. /*
  2. * Copyright (c) IBM Corporation 2020.
  3. * All rights reserved.
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. *
  9. * 1. Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. *
  12. * 2. Redistributions in binary form must reproduce the above copyright
  13. * notice, this list of conditions and the following disclaimer in
  14. * the documentation and/or other materials provided with the
  15. * distribution.
  16. * 3. Neither the name of the OpenBLAS project nor the names of
  17. * its contributors may be used to endorse or promote products
  18. * derived from this software without specific prior written
  19. * permission.
  20. *
  21. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22. * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23. * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  25. * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  26. * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  27. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  28. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  29. * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  30. * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. */
  32. #include <vecintrin.h>
  33. #define VLEN_BYTES 16
  34. #define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
  35. typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
  36. /**
  37. * Load a vector into register, and hint on 8-byte alignment to improve
  38. * performance. gcc-9 and newer will create these hints by itself. For older
  39. * compiler versions, use inline assembly to explicitly express the hint.
  40. * Provide explicit hex encoding to cater for binutils versions that do not know
  41. * about vector-load with alignment hints yet.
  42. *
  43. * Note that, for block sizes where we apply vectorization, vectors in A will
  44. * always be 8-byte aligned.
  45. */
  46. static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
  47. vector_float const *restrict addr = (vector_float const *restrict)a;
  48. vector_float y;
  49. #if __GNUC__ < 9 && !defined(__clang__)
  50. // hex-encode vl %[out],%[addr],3
  51. asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
  52. : [ out ] "=v"(y)
  53. : [ addr ] "R"(*addr));
  54. #else
  55. y = *addr;
  56. #endif
  57. return y;
  58. }