|
|
@@ -0,0 +1,64 @@ |
|
|
|
/* |
|
|
|
* Copyright (c) IBM Corporation 2020. |
|
|
|
* All rights reserved. |
|
|
|
* |
|
|
|
* Redistribution and use in source and binary forms, with or without |
|
|
|
* modification, are permitted provided that the following conditions are |
|
|
|
* met: |
|
|
|
* |
|
|
|
* 1. Redistributions of source code must retain the above copyright |
|
|
|
* notice, this list of conditions and the following disclaimer. |
|
|
|
* |
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright |
|
|
|
* notice, this list of conditions and the following disclaimer in |
|
|
|
* the documentation and/or other materials provided with the |
|
|
|
* distribution. |
|
|
|
* 3. Neither the name of the OpenBLAS project nor the names of |
|
|
|
* its contributors may be used to endorse or promote products |
|
|
|
* derived from this software without specific prior written |
|
|
|
* permission. |
|
|
|
* |
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
|
|
|
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
|
|
|
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
|
|
|
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE |
|
|
|
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
|
|
|
*/ |
|
|
|
|
|
|
|
#include <vecintrin.h> |
|
|
|
|
|
|
|
#define VLEN_BYTES 16 |
|
|
|
#define VLEN_FLOATS (VLEN_BYTES / sizeof(FLOAT)) |
|
|
|
|
|
|
|
typedef FLOAT vector_float __attribute__ ((vector_size (VLEN_BYTES))); |
|
|
|
|
|
|
|
/** |
|
|
|
* Load a vector into register, and hint on 8-byte alignment to improve |
|
|
|
* performance. gcc-9 and newer will create these hints by itself. For older |
|
|
|
* compiler versions, use inline assembly to explicitly express the hint. |
|
|
|
* Provide explicit hex encoding to cater for binutils versions that do not know |
|
|
|
* about vector-load with alignment hints yet. |
|
|
|
* |
|
|
|
* Note that, for block sizes where we apply vectorization, vectors in A will |
|
|
|
* always be 8-byte aligned. |
|
|
|
*/ |
|
|
|
static inline vector_float vec_load_hinted(FLOAT const *restrict a) { |
|
|
|
vector_float const *restrict addr = (vector_float const *restrict)a; |
|
|
|
vector_float y; |
|
|
|
|
|
|
|
#if __GNUC__ < 9 && !defined(__clang__) |
|
|
|
// hex-encode vl %[out],%[addr],3 |
|
|
|
asm(".insn vrx,0xe70000003006,%[out],%[addr],3" |
|
|
|
: [ out ] "=v"(y) |
|
|
|
: [ addr ] "R"(*addr)); |
|
|
|
#else |
|
|
|
y = *addr; |
|
|
|
#endif |
|
|
|
|
|
|
|
return y; |
|
|
|
} |