Further rearranged the rotm kernel for the different architectures.tags/v0.3.30
@@ -79,6 +79,9 @@ macro(SetDefaultL1) | |||||
SetFallback(CROTKERNEL zrot.S) | SetFallback(CROTKERNEL zrot.S) | ||||
SetFallback(ZROTKERNEL zrot.S) | SetFallback(ZROTKERNEL zrot.S) | ||||
SetFallback(XROTKERNEL zrot.S) | SetFallback(XROTKERNEL zrot.S) | ||||
SetFallback(SROTMKERNEL rotm.S) | |||||
SetFallback(DROTMKERNEL rotm.S) | |||||
SetFallback(QROTMKERNEL rotm.S) | |||||
SetFallback(SSCALKERNEL scal.S) | SetFallback(SSCALKERNEL scal.S) | ||||
SetFallback(DSCALKERNEL scal.S) | SetFallback(DSCALKERNEL scal.S) | ||||
SetFallback(CSCALKERNEL zscal.S) | SetFallback(CSCALKERNEL zscal.S) | ||||
@@ -22,6 +22,7 @@ | |||||
#define DSUM_K dsum_k | #define DSUM_K dsum_k | ||||
#define DSWAP_K dswap_k | #define DSWAP_K dswap_k | ||||
#define DROT_K drot_k | #define DROT_K drot_k | ||||
#define DROTM_K drotm_k | |||||
#define DGEMV_N dgemv_n | #define DGEMV_N dgemv_n | ||||
#define DGEMV_T dgemv_t | #define DGEMV_T dgemv_t | ||||
@@ -180,6 +181,7 @@ | |||||
#define DSUM_K gotoblas -> dsum_k | #define DSUM_K gotoblas -> dsum_k | ||||
#define DSWAP_K gotoblas -> dswap_k | #define DSWAP_K gotoblas -> dswap_k | ||||
#define DROT_K gotoblas -> drot_k | #define DROT_K gotoblas -> drot_k | ||||
#define DROTM_K gotoblas -> drotm_k | |||||
#define DGEMV_N gotoblas -> dgemv_n | #define DGEMV_N gotoblas -> dgemv_n | ||||
#define DGEMV_T gotoblas -> dgemv_t | #define DGEMV_T gotoblas -> dgemv_t | ||||
@@ -213,9 +213,9 @@ int srotmg_k(float *, float *, float *, float *, float *); | |||||
int drotmg_k(double *, double *, double *, double *, double *); | int drotmg_k(double *, double *, double *, double *, double *); | ||||
int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *); | int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *); | ||||
int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float); | |||||
int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double); | |||||
int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble); | |||||
int srotm_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
int drotm_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||||
int qrotm_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | |||||
int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | ||||
@@ -70,6 +70,7 @@ | |||||
#define SUM_K QSUM_K | #define SUM_K QSUM_K | ||||
#define SWAP_K QSWAP_K | #define SWAP_K QSWAP_K | ||||
#define ROT_K QROT_K | #define ROT_K QROT_K | ||||
#define ROTM_K QROTM_K | |||||
#define GEMV_N QGEMV_N | #define GEMV_N QGEMV_N | ||||
#define GEMV_T QGEMV_T | #define GEMV_T QGEMV_T | ||||
@@ -361,6 +362,7 @@ | |||||
#define SUM_K DSUM_K | #define SUM_K DSUM_K | ||||
#define SWAP_K DSWAP_K | #define SWAP_K DSWAP_K | ||||
#define ROT_K DROT_K | #define ROT_K DROT_K | ||||
#define ROTM_K DROTM_K | |||||
#define GEMV_N DGEMV_N | #define GEMV_N DGEMV_N | ||||
#define GEMV_T DGEMV_T | #define GEMV_T DGEMV_T | ||||
@@ -977,6 +979,7 @@ | |||||
#define SUM_K SSUM_K | #define SUM_K SSUM_K | ||||
#define SWAP_K SSWAP_K | #define SWAP_K SSWAP_K | ||||
#define ROT_K SROT_K | #define ROT_K SROT_K | ||||
#define ROTM_K SROTM_K | |||||
#define GEMV_N SGEMV_N | #define GEMV_N SGEMV_N | ||||
#define GEMV_T SGEMV_T | #define GEMV_T SGEMV_T | ||||
@@ -197,6 +197,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||||
//double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | ||||
int (*srotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
#endif | #endif | ||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | ||||
int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
@@ -330,6 +331,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||||
#endif | #endif | ||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | ||||
int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); | int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); | ||||
int (*drotm_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||||
int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
@@ -439,6 +441,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG); | |||||
int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble); | int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble); | ||||
int (*qrotm_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | |||||
int (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
int (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
@@ -22,6 +22,7 @@ | |||||
#define QSUM_K qsum_k | #define QSUM_K qsum_k | ||||
#define QSWAP_K qswap_k | #define QSWAP_K qswap_k | ||||
#define QROT_K qrot_k | #define QROT_K qrot_k | ||||
#define QROTM_K qrotm_k | |||||
#define QGEMV_N qgemv_n | #define QGEMV_N qgemv_n | ||||
#define QGEMV_T qgemv_t | #define QGEMV_T qgemv_t | ||||
@@ -165,6 +166,7 @@ | |||||
#define QSUM_K gotoblas -> qsum_k | #define QSUM_K gotoblas -> qsum_k | ||||
#define QSWAP_K gotoblas -> qswap_k | #define QSWAP_K gotoblas -> qswap_k | ||||
#define QROT_K gotoblas -> qrot_k | #define QROT_K gotoblas -> qrot_k | ||||
#define QROTM_K gotoblas -> qrotm_k | |||||
#define QGEMV_N gotoblas -> qgemv_n | #define QGEMV_N gotoblas -> qgemv_n | ||||
#define QGEMV_T gotoblas -> qgemv_t | #define QGEMV_T gotoblas -> qgemv_t | ||||
@@ -24,6 +24,7 @@ | |||||
#define SSCAL_K sscal_k | #define SSCAL_K sscal_k | ||||
#define SSWAP_K sswap_k | #define SSWAP_K sswap_k | ||||
#define SROT_K srot_k | #define SROT_K srot_k | ||||
#define SROTM_K srotm_k | |||||
#define SGEMV_N sgemv_n | #define SGEMV_N sgemv_n | ||||
#define SGEMV_T sgemv_t | #define SGEMV_T sgemv_t | ||||
@@ -189,6 +190,7 @@ | |||||
#define SSCAL_K gotoblas -> sscal_k | #define SSCAL_K gotoblas -> sscal_k | ||||
#define SSWAP_K gotoblas -> sswap_k | #define SSWAP_K gotoblas -> sswap_k | ||||
#define SROT_K gotoblas -> srot_k | #define SROT_K gotoblas -> srot_k | ||||
#define SROTM_K gotoblas -> srotm_k | |||||
#define SGEMV_N gotoblas -> sgemv_n | #define SGEMV_N gotoblas -> sgemv_n | ||||
#define SGEMV_T gotoblas -> sgemv_t | #define SGEMV_T gotoblas -> sgemv_t | ||||
@@ -7,149 +7,21 @@ | |||||
void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ | void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ | ||||
blasint n = *N; | |||||
blasint incx = *INCX; | |||||
blasint incy = *INCY; | |||||
blasint n = *N; | |||||
blasint incx = *INCX; | |||||
blasint incy = *INCY; | |||||
PRINT_DEBUG_NAME | |||||
#else | #else | ||||
void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ | void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ | ||||
#endif | |||||
blasint i__1, i__2; | |||||
PRINT_DEBUG_CNAME; | |||||
blasint i__; | |||||
FLOAT w, z__; | |||||
blasint kx, ky; | |||||
FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
blasint nsteps; | |||||
#ifndef CBLAS | |||||
PRINT_DEBUG_CNAME; | |||||
#else | |||||
PRINT_DEBUG_CNAME; | |||||
#endif | #endif | ||||
--dparam; | |||||
--dy; | |||||
--dx; | |||||
dflag = dparam[1]; | |||||
if (n <= 0 || dflag == - 2.0) goto L140; | |||||
if (! (incx == incy && incx > 0)) goto L70; | |||||
nsteps = n * incx; | |||||
if (dflag < 0.) { | |||||
goto L50; | |||||
} else if (dflag == 0) { | |||||
goto L10; | |||||
} else { | |||||
goto L30; | |||||
} | |||||
L10: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w + z__ * dh12; | |||||
dy[i__] = w * dh21 + z__; | |||||
/* L20: */ | |||||
} | |||||
goto L140; | |||||
L30: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
i__2 = nsteps; | |||||
i__1 = incx; | |||||
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w * dh11 + z__; | |||||
dy[i__] = -w + dh22 * z__; | |||||
/* L40: */ | |||||
} | |||||
goto L140; | |||||
L50: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w * dh11 + z__ * dh12; | |||||
dy[i__] = w * dh21 + z__ * dh22; | |||||
/* L60: */ | |||||
} | |||||
goto L140; | |||||
L70: | |||||
kx = 1; | |||||
ky = 1; | |||||
if (incx < 0) { | |||||
kx = (1 - n) * incx + 1; | |||||
} | |||||
if (incy < 0) { | |||||
ky = (1 - n) * incy + 1; | |||||
} | |||||
ROTM_K(n, dx, incx, dy, incy, dparam); | |||||
if (dflag < 0.) { | |||||
goto L120; | |||||
} else if (dflag == 0) { | |||||
goto L80; | |||||
} else { | |||||
goto L100; | |||||
} | |||||
L80: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w + z__ * dh12; | |||||
dy[ky] = w * dh21 + z__; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L90: */ | |||||
} | |||||
goto L140; | |||||
L100: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w * dh11 + z__; | |||||
dy[ky] = -w + dh22 * z__; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L110: */ | |||||
} | |||||
goto L140; | |||||
L120: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w * dh11 + z__ * dh12; | |||||
dy[ky] = w * dh21 + z__ * dh22; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L130: */ | |||||
} | |||||
L140: | |||||
return; | return; | ||||
} | } | ||||
@@ -65,6 +65,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTMKERNEL}" "" "rotm_k" false "" "" false ${float_type}) | |||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | ||||
GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | ||||
@@ -125,6 +126,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE") | |||||
endif () | endif () | ||||
if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | ||||
GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | ||||
@@ -148,6 +150,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") | |||||
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | ||||
@@ -1105,6 +1108,7 @@ endif () | |||||
GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") | |||||
GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | ||||
GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | ||||
@@ -336,6 +336,18 @@ ifndef XROTKERNEL | |||||
XROTKERNEL = zrot.S | XROTKERNEL = zrot.S | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = rotm.S | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = rotm.S | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = rotm.S | |||||
endif | |||||
### SCAL ### | ### SCAL ### | ||||
ifndef SSCALKERNEL | ifndef SSCALKERNEL | ||||
@@ -504,21 +516,21 @@ SBLASOBJS += \ | |||||
sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ | sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ | ||||
sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ | sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ | ||||
snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ | snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
saxpby_k$(TSUFFIX).$(SUFFIX) | |||||
saxpby_k$(TSUFFIX).$(SUFFIX) srotm_k$(TSUFFIX).$(SUFFIX) | |||||
DBLASOBJS += \ | DBLASOBJS += \ | ||||
damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ | damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ | idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ | dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ | ||||
dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ | dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) | |||||
daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) drotm_k$(TSUFFIX).$(SUFFIX) | |||||
QBLASOBJS += \ | QBLASOBJS += \ | ||||
qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ | qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
iqamax_k$(TSUFFIX).$(SUFFIX) iqamin_k$(TSUFFIX).$(SUFFIX) iqmax_k$(TSUFFIX).$(SUFFIX) iqmin_k$(TSUFFIX).$(SUFFIX) \ | iqamax_k$(TSUFFIX).$(SUFFIX) iqamin_k$(TSUFFIX).$(SUFFIX) iqmax_k$(TSUFFIX).$(SUFFIX) iqmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | ||||
qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) \ | qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
qsum_k$(TSUFFIX).$(SUFFIX) | |||||
qsum_k$(TSUFFIX).$(SUFFIX) qrotm_k$(TSUFFIX).$(SUFFIX) | |||||
CBLASOBJS += \ | CBLASOBJS += \ | ||||
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -842,7 +854,16 @@ $(KDIR)drot_k$(TSUFFIX).$(SUFFIX) $(KDIR)drot_k$(TPSUFFIX).$(PSUFFIX) : $(KERN | |||||
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | ||||
$(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) | $(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) | ||||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
$(KDIR)srotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)srotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SROTMKERNEL) | |||||
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ | |||||
$(KDIR)drotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)drotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTMKERNEL) | |||||
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | |||||
$(KDIR)qrotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTMKERNEL) | |||||
$(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
$(KDIR)csrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)csrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CROTKERNEL) | $(KDIR)csrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)csrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CROTKERNEL) | ||||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UDOUBLE $< -o $@ | ||||
@@ -122,3 +122,15 @@ ZTRSMKERNEL_LN = ztrsm_kernel_2x2_LN.S | |||||
ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S | ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S | ||||
ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S | ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S | ||||
ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S | ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -43,4 +43,14 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -45,4 +45,14 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -171,3 +171,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
#Dump kernel | #Dump kernel | ||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -146,4 +146,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -146,4 +146,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -0,0 +1,159 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2013, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include "common.h" | |||||
int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) | |||||
{ | |||||
BLASLONG i__1, i__2; | |||||
BLASLONG i__; | |||||
FLOAT w, z__; | |||||
BLASLONG kx, ky; | |||||
FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
BLASLONG nsteps; | |||||
--dparam; | |||||
--dy; | |||||
--dx; | |||||
dflag = dparam[1]; | |||||
if (n <= 0 || dflag == - 2.0) goto L140; | |||||
if (! (incx == incy && incx > 0)) goto L70; | |||||
nsteps = n * incx; | |||||
if (dflag < 0.) { | |||||
goto L50; | |||||
} else if (dflag == 0) { | |||||
goto L10; | |||||
} else { | |||||
goto L30; | |||||
} | |||||
L10: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w + z__ * dh12; | |||||
dy[i__] = w * dh21 + z__; | |||||
/* L20: */ | |||||
} | |||||
goto L140; | |||||
L30: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
i__2 = nsteps; | |||||
i__1 = incx; | |||||
for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w * dh11 + z__; | |||||
dy[i__] = -w + dh22 * z__; | |||||
/* L40: */ | |||||
} | |||||
goto L140; | |||||
L50: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
w = dx[i__]; | |||||
z__ = dy[i__]; | |||||
dx[i__] = w * dh11 + z__ * dh12; | |||||
dy[i__] = w * dh21 + z__ * dh22; | |||||
/* L60: */ | |||||
} | |||||
goto L140; | |||||
L70: | |||||
kx = 1; | |||||
ky = 1; | |||||
if (incx < 0) { | |||||
kx = (1 - n) * incx + 1; | |||||
} | |||||
if (incy < 0) { | |||||
ky = (1 - n) * incy + 1; | |||||
} | |||||
if (dflag < 0.) { | |||||
goto L120; | |||||
} else if (dflag == 0) { | |||||
goto L80; | |||||
} else { | |||||
goto L100; | |||||
} | |||||
L80: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w + z__ * dh12; | |||||
dy[ky] = w * dh21 + z__; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L90: */ | |||||
} | |||||
goto L140; | |||||
L100: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w * dh11 + z__; | |||||
dy[ky] = -w + dh22 * z__; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L110: */ | |||||
} | |||||
goto L140; | |||||
L120: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
i__2 = n; | |||||
for (i__ = 1; i__ <= i__2; ++i__) { | |||||
w = dx[kx]; | |||||
z__ = dy[ky]; | |||||
dx[kx] = w * dh11 + z__ * dh12; | |||||
dy[ky] = w * dh21 + z__ * dh22; | |||||
kx += incx; | |||||
ky += incy; | |||||
/* L130: */ | |||||
} | |||||
L140: | |||||
return(0); | |||||
} |
@@ -142,3 +142,15 @@ ZTRSMKERNEL_RT = ztrsm_kernel_RT.S | |||||
CGEMM3MKERNEL = zgemm3m_kernel.S | CGEMM3MKERNEL = zgemm3m_kernel.S | ||||
ZGEMM3MKERNEL = zgemm3m_kernel.S | ZGEMM3MKERNEL = zgemm3m_kernel.S | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -236,3 +236,15 @@ ZGEMM3MKERNEL = zgemm3m_kernel.S | |||||
endif | endif | ||||
DSDOTKERNEL = dot.S | DSDOTKERNEL = dot.S | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -169,3 +169,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
#Dump kernel | #Dump kernel | ||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -43,4 +43,14 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -158,3 +158,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -199,3 +199,15 @@ endif | |||||
ifndef IQMAXKERNEL | ifndef IQMAXKERNEL | ||||
IQMAXKERNEL = imax.S | IQMAXKERNEL = imax.S | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -158,3 +158,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -73,3 +73,15 @@ endif | |||||
ifndef IQMAXKERNEL | ifndef IQMAXKERNEL | ||||
IQMAXKERNEL = imax.S | IQMAXKERNEL = imax.S | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -27,4 +27,14 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -71,6 +71,10 @@ DROTKERNEL = rot_vector.c | |||||
CROTKERNEL = zrot_vector.c | CROTKERNEL = zrot_vector.c | ||||
ZROTKERNEL = zrot_vector.c | ZROTKERNEL = zrot_vector.c | ||||
SROTMKERNEL = ../generic/rotm.c | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
SSCALKERNEL = scal_vector.c | SSCALKERNEL = scal_vector.c | ||||
DSCALKERNEL = scal_vector.c | DSCALKERNEL = scal_vector.c | ||||
CSCALKERNEL = zscal_vector.c | CSCALKERNEL = zscal_vector.c | ||||
@@ -71,6 +71,10 @@ DROTKERNEL = ../riscv64/rot.c | |||||
CROTKERNEL = ../riscv64/zrot.c | CROTKERNEL = ../riscv64/zrot.c | ||||
ZROTKERNEL = ../riscv64/zrot.c | ZROTKERNEL = ../riscv64/zrot.c | ||||
SROTMKERNEL = ../generic/rotm.c | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
SSCALKERNEL = ../riscv64/scal.c | SSCALKERNEL = ../riscv64/scal.c | ||||
DSCALKERNEL = ../riscv64/scal.c | DSCALKERNEL = ../riscv64/scal.c | ||||
CSCALKERNEL = ../riscv64/zscal.c | CSCALKERNEL = ../riscv64/zscal.c | ||||
@@ -71,6 +71,10 @@ DROTKERNEL = rot_rvv.c | |||||
CROTKERNEL = zrot_rvv.c | CROTKERNEL = zrot_rvv.c | ||||
ZROTKERNEL = zrot_rvv.c | ZROTKERNEL = zrot_rvv.c | ||||
SROTMKERNEL = ../generic/rotm.c | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
SSCALKERNEL = scal_rvv.c | SSCALKERNEL = scal_rvv.c | ||||
DSCALKERNEL = scal_rvv.c | DSCALKERNEL = scal_rvv.c | ||||
CSCALKERNEL = zscal_rvv.c | CSCALKERNEL = zscal_rvv.c | ||||
@@ -66,6 +66,10 @@ DROTKERNEL = rot_vector.c | |||||
CROTKERNEL = zrot_vector.c | CROTKERNEL = zrot_vector.c | ||||
ZROTKERNEL = zrot_vector.c | ZROTKERNEL = zrot_vector.c | ||||
SROTMKERNEL = ../generic/rotm.c | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
SSCALKERNEL = scal_vector.c | SSCALKERNEL = scal_vector.c | ||||
DSCALKERNEL = scal_vector.c | DSCALKERNEL = scal_vector.c | ||||
CSCALKERNEL = zscal_vector.c | CSCALKERNEL = zscal_vector.c | ||||
@@ -98,6 +98,10 @@ DROTKERNEL = rot_rvv.c | |||||
CROTKERNEL = zrot_rvv.c | CROTKERNEL = zrot_rvv.c | ||||
ZROTKERNEL = zrot_rvv.c | ZROTKERNEL = zrot_rvv.c | ||||
SROTMKERNEL = rotm_rvv.c | |||||
DROTMKERNEL = rotm_rvv.c | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
SSCALKERNEL = scal_rvv.c | SSCALKERNEL = scal_rvv.c | ||||
DSCALKERNEL = scal_rvv.c | DSCALKERNEL = scal_rvv.c | ||||
CSCALKERNEL = zscal_rvv.c | CSCALKERNEL = zscal_rvv.c | ||||
@@ -0,0 +1,260 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2013, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include "common.h" | |||||
#if !defined(DOUBLE) | |||||
#define VSETVL(n) __riscv_vsetvl_e32m8(n) | |||||
#define FLOAT_V_T vfloat32m8_t | |||||
#define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | |||||
#define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | |||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | |||||
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m8 | |||||
#else | |||||
#define VSETVL(n) __riscv_vsetvl_e64m8(n) | |||||
#define FLOAT_V_T vfloat64m8_t | |||||
#define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | |||||
#define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | |||||
#define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | |||||
#define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | |||||
#define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m8 | |||||
#endif | |||||
int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) | |||||
{ | |||||
BLASLONG i__1, i__2; | |||||
BLASLONG kx, ky; | |||||
FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
BLASLONG nsteps; | |||||
--dparam; | |||||
--dy; | |||||
--dx; | |||||
FLOAT_V_T v_w, v_z__, v_dx, v_dy; | |||||
BLASLONG stride, stride_x, stride_y, offset; | |||||
dflag = dparam[1]; | |||||
if (n <= 0 || dflag == - 2.0) goto L140; | |||||
if (!(incx == incy && incx > 0)) goto L70; | |||||
nsteps = n * incx; | |||||
if (dflag < 0.) { | |||||
goto L50; | |||||
} else if (dflag == 0) { | |||||
goto L10; | |||||
} else { | |||||
goto L30; | |||||
} | |||||
L10: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
if(i__2 < 0){ | |||||
offset = i__1 - 2; | |||||
dx += offset; | |||||
dy += offset; | |||||
i__1 = -i__1; | |||||
i__2 = -i__2; | |||||
} | |||||
stride = i__2 * sizeof(FLOAT); | |||||
n = i__1 / i__2; | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); | |||||
v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); | |||||
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
} | |||||
goto L140; | |||||
L30: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
i__2 = nsteps; | |||||
i__1 = incx; | |||||
if(i__1 < 0){ | |||||
offset = i__2 - 2; | |||||
dx += offset; | |||||
dy += offset; | |||||
i__1 = -i__1; | |||||
i__2 = -i__2; | |||||
} | |||||
stride = i__1 * sizeof(FLOAT); | |||||
n = i__2 / i__1; | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*i__1, dy += vl*i__1) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); | |||||
v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); | |||||
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
} | |||||
goto L140; | |||||
L50: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
i__1 = nsteps; | |||||
i__2 = incx; | |||||
if(i__2 < 0){ | |||||
offset = i__1 - 2; | |||||
dx += offset; | |||||
dy += offset; | |||||
i__1 = -i__1; | |||||
i__2 = -i__2; | |||||
} | |||||
stride = i__2 * sizeof(FLOAT); | |||||
n = i__1 / i__2; | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
v_dx = VFMULVF_FLOAT(v_w, dh11, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); | |||||
VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
v_dy = VFMULVF_FLOAT(v_w, dh21, vl); | |||||
v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); | |||||
VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
} | |||||
goto L140; | |||||
L70: | |||||
kx = 1; | |||||
ky = 1; | |||||
if (incx < 0) { | |||||
kx = (1 - n) * incx + 1; | |||||
} | |||||
if (incy < 0) { | |||||
ky = (1 - n) * incy + 1; | |||||
} | |||||
if (dflag < 0.) { | |||||
goto L120; | |||||
} else if (dflag == 0) { | |||||
goto L80; | |||||
} else { | |||||
goto L100; | |||||
} | |||||
L80: | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
if(incx < 0){ | |||||
incx = -incx; | |||||
dx -= n*incx; | |||||
} | |||||
if(incy < 0){ | |||||
incy = -incy; | |||||
dy -= n*incy; | |||||
} | |||||
stride_x = incx * sizeof(FLOAT); | |||||
stride_y = incy * sizeof(FLOAT); | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); | |||||
v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); | |||||
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
} | |||||
goto L140; | |||||
L100: | |||||
dh11 = dparam[2]; | |||||
dh22 = dparam[5]; | |||||
if(incx < 0){ | |||||
incx = -incx; | |||||
dx -= n*incx; | |||||
} | |||||
if(incy < 0){ | |||||
incy = -incy; | |||||
dy -= n*incy; | |||||
} | |||||
stride_x = incx * sizeof(FLOAT); | |||||
stride_y = incy * sizeof(FLOAT); | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); | |||||
v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); | |||||
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
} | |||||
goto L140; | |||||
L120: | |||||
dh11 = dparam[2]; | |||||
dh12 = dparam[4]; | |||||
dh21 = dparam[3]; | |||||
dh22 = dparam[5]; | |||||
if(incx < 0){ | |||||
incx = -incx; | |||||
dx -= n*incx; | |||||
} | |||||
if(incy < 0){ | |||||
incy = -incy; | |||||
dy -= n*incy; | |||||
} | |||||
stride_x = incx * sizeof(FLOAT); | |||||
stride_y = incy * sizeof(FLOAT); | |||||
for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
vl = VSETVL(n); | |||||
v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
v_dx = VFMULVF_FLOAT(v_w, dh11, vl); | |||||
v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); | |||||
VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
v_dy = VFMULVF_FLOAT(v_w, dh21, vl); | |||||
v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); | |||||
VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
} | |||||
L140: | |||||
return(0); | |||||
} |
@@ -72,9 +72,9 @@ gotoblas_t TABLE_NAME = { | |||||
samax_kTS, samin_kTS, smax_kTS, smin_kTS, | samax_kTS, samin_kTS, smax_kTS, smin_kTS, | ||||
isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | ||||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||||
snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||||
dsdot_kTS, | dsdot_kTS, | ||||
srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||||
srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||||
sbgemv_nTS, sbgemv_tTS, sger_kTS, | sbgemv_nTS, sbgemv_tTS, sger_kTS, | ||||
ssymv_LTS, ssymv_UTS, | ssymv_LTS, ssymv_UTS, | ||||
@@ -158,7 +158,7 @@ gotoblas_t TABLE_NAME = { | |||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | ||||
scopy_kTS, sdot_kTS, | scopy_kTS, sdot_kTS, | ||||
// dsdot_kTS, | // dsdot_kTS, | ||||
srot_kTS, saxpy_kTS, | |||||
srot_kTS, srotm_kTS, saxpy_kTS, | |||||
#endif | #endif | ||||
#if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | ||||
sscal_kTS, | sscal_kTS, | ||||
@@ -260,6 +260,7 @@ gotoblas_t TABLE_NAME = { | |||||
#endif | #endif | ||||
#if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | ||||
drot_kTS, | drot_kTS, | ||||
drotm_kTS, | |||||
daxpy_kTS, | daxpy_kTS, | ||||
dscal_kTS, | dscal_kTS, | ||||
dswap_kTS, | dswap_kTS, | ||||
@@ -331,10 +332,9 @@ gotoblas_t TABLE_NAME = { | |||||
qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS, | qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS, | ||||
iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS, | iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS, | ||||
qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS, | qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS, | ||||
qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS, | |||||
qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS, | |||||
qgemv_nTS, qgemv_tTS, qger_kTS, | qgemv_nTS, qgemv_tTS, qger_kTS, | ||||
qsymv_LTS, qsymv_UTS, | qsymv_LTS, qsymv_UTS, | ||||
qgemm_kernelTS, qgemm_betaTS, | qgemm_kernelTS, qgemm_betaTS, | ||||
#if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N | #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N | ||||
qgemm_incopyTS, qgemm_itcopyTS, | qgemm_incopyTS, qgemm_itcopyTS, | ||||
@@ -75,3 +75,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -189,3 +189,14 @@ ZGEMM_BETA = ../generic/zgemm_beta.c | |||||
QGEMM_BETA = ../generic/gemm_beta.c | QGEMM_BETA = ../generic/gemm_beta.c | ||||
XGEMM_BETA = ../generic/zgemm_beta.c | XGEMM_BETA = ../generic/zgemm_beta.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -162,3 +162,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -290,6 +290,18 @@ ifndef QROTKERNEL | |||||
QROTKERNEL = rot.S | QROTKERNEL = rot.S | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef CROTKERNEL | ifndef CROTKERNEL | ||||
CROTKERNEL = zrot_sse.S | CROTKERNEL = zrot_sse.S | ||||
endif | endif | ||||
@@ -168,3 +168,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
#Dump kernel | #Dump kernel | ||||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -27,4 +27,14 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -135,5 +135,14 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | ||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ||||
ifndef SROTMKERNEL | |||||
SROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef DROTMKERNEL | |||||
DROTMKERNEL = ../generic/rotm.c | |||||
endif | |||||
ifndef QROTMKERNEL | |||||
QROTMKERNEL = ../generic/rotm.c | |||||
endif |
@@ -70,6 +70,24 @@ CTEST(rot,drot_inc_1) | |||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); | ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); | ||||
} | } | ||||
} | } | ||||
CTEST(rot,drotm_inc_1) | |||||
{ | |||||
blasint i = 0; | |||||
blasint N = 12, incX = 1, incY = 1; | |||||
double param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; | |||||
double x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
double y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
double x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; | |||||
double y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; | |||||
//OpenBLAS | |||||
BLASFUNC(drotm)(&N, x_actual, &incX, y_actual, &incY, param); | |||||
for(i = 0; i < N; i++){ | |||||
ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], DOUBLE_EPS); | |||||
ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], DOUBLE_EPS); | |||||
} | |||||
} | |||||
#endif | #endif | ||||
#ifdef BUILD_COMPLEX16 | #ifdef BUILD_COMPLEX16 | ||||
@@ -130,6 +148,24 @@ CTEST(rot,srot_inc_1) | |||||
ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS); | ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS); | ||||
} | } | ||||
} | } | ||||
CTEST(rot,srotm_inc_1) | |||||
{ | |||||
blasint i = 0; | |||||
blasint N = 12, incX = 1, incY = 1; | |||||
float param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; | |||||
float x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
float y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
float x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; | |||||
float y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; | |||||
//OpenBLAS | |||||
BLASFUNC(srotm)(&N, x_actual, &incX, y_actual, &incY, param); | |||||
for(i = 0; i < N; i++){ | |||||
ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], SINGLE_EPS); | |||||
ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], SINGLE_EPS); | |||||
} | |||||
} | |||||
#endif | #endif | ||||
#ifdef BUILD_COMPLEX | #ifdef BUILD_COMPLEX | ||||