|
- /*
- * Copyright (c) IBM Corporation 2020.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * 3. Neither the name of the OpenBLAS project nor the names of
- * its contributors may be used to endorse or promote products
- * derived from this software without specific prior written
- * permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
- * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- #include <vecintrin.h>
-
- #define VLEN_BYTES 16
- #define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT))
-
- typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES)));
-
- /**
- * Load a vector into register, and hint on 8-byte alignment to improve
- * performance. gcc-9 and newer will create these hints by itself. For older
- * compiler versions, use inline assembly to explicitly express the hint.
- * Provide explicit hex encoding to cater for binutils versions that do not know
- * about vector-load with alignment hints yet.
- *
- * Note that, for block sizes where we apply vectorization, vectors in A will
- * always be 8-byte aligned.
- */
- static inline vector_float vec_load_hinted(FLOAT const *restrict a) {
- vector_float const *restrict addr = (vector_float const *restrict)a;
- vector_float y;
-
- #if __GNUC__ < 9 && !defined(__clang__)
- // hex-encode vl %[out],%[addr],3
- asm(".insn vrx,0xe70000003006,%[out],%[addr],3"
- : [ out ] "=v"(y)
- : [ addr ] "R"(*addr));
- #else
- y = *addr;
- #endif
-
- return y;
- }
|