@@ -97,7 +97,7 @@ CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | ZNRM2KERNEL = znrm2.S | ||||
DDOTKERNEL = dot.S | DDOTKERNEL = dot.S | ||||
SDOTKERNEL = dot.S | |||||
SDOTKERNEL = ../generic/dot.c | |||||
CDOTKERNEL = zdot.S | CDOTKERNEL = zdot.S | ||||
ZDOTKERNEL = zdot.S | ZDOTKERNEL = zdot.S | ||||
DSDOTKERNEL = dot.S | DSDOTKERNEL = dot.S | ||||
@@ -97,7 +97,7 @@ CNRM2KERNEL = znrm2.S | |||||
ZNRM2KERNEL = znrm2.S | ZNRM2KERNEL = znrm2.S | ||||
DDOTKERNEL = dot.S | DDOTKERNEL = dot.S | ||||
SDOTKERNEL = dot.S | |||||
SDOTKERNEL = ../generic/dot.c | |||||
CDOTKERNEL = zdot.S | CDOTKERNEL = zdot.S | ||||
ZDOTKERNEL = zdot.S | ZDOTKERNEL = zdot.S | ||||
DSDOTKERNEL = dot.S | DSDOTKERNEL = dot.S | ||||
@@ -70,7 +70,7 @@ DCOPYKERNEL = copy.S | |||||
CCOPYKERNEL = copy.S | CCOPYKERNEL = copy.S | ||||
ZCOPYKERNEL = copy.S | ZCOPYKERNEL = copy.S | ||||
SDOTKERNEL = dot.S | |||||
SDOTKERNEL = ../generic/dot.c | |||||
DDOTKERNEL = dot.S | DDOTKERNEL = dot.S | ||||
CDOTKERNEL = zdot.S | CDOTKERNEL = zdot.S | ||||
ZDOTKERNEL = zdot.S | ZDOTKERNEL = zdot.S | ||||
@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#include "../simd/intrin.h" | |||||
#if defined(DSDOT) | #if defined(DSDOT) | ||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | ||||
#else | #else | ||||
@@ -47,9 +47,9 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
if ( (inc_x == 1) && (inc_y == 1) ) | if ( (inc_x == 1) && (inc_y == 1) ) | ||||
{ | { | ||||
int n1 = n & -4; | |||||
int n1 = n & -4; | |||||
#if V_SIMD && !defined(DSDOT) | #if V_SIMD && !defined(DSDOT) | ||||
const int vstep = v_nlanes_f32; | |||||
const int vstep = v_nlanes_f32; | |||||
const int unrollx4 = n & (-vstep * 4); | const int unrollx4 = n & (-vstep * 4); | ||||
const int unrollx = n & -vstep; | const int unrollx = n & -vstep; | ||||
v_f32 vsum0 = v_zero_f32(); | v_f32 vsum0 = v_zero_f32(); | ||||