Further rearranged the rotm kernel for the different architectures.tags/v0.3.30
| @@ -79,6 +79,9 @@ macro(SetDefaultL1) | |||||
| SetFallback(CROTKERNEL zrot.S) | SetFallback(CROTKERNEL zrot.S) | ||||
| SetFallback(ZROTKERNEL zrot.S) | SetFallback(ZROTKERNEL zrot.S) | ||||
| SetFallback(XROTKERNEL zrot.S) | SetFallback(XROTKERNEL zrot.S) | ||||
| SetFallback(SROTMKERNEL rotm.S) | |||||
| SetFallback(DROTMKERNEL rotm.S) | |||||
| SetFallback(QROTMKERNEL rotm.S) | |||||
| SetFallback(SSCALKERNEL scal.S) | SetFallback(SSCALKERNEL scal.S) | ||||
| SetFallback(DSCALKERNEL scal.S) | SetFallback(DSCALKERNEL scal.S) | ||||
| SetFallback(CSCALKERNEL zscal.S) | SetFallback(CSCALKERNEL zscal.S) | ||||
| @@ -22,6 +22,7 @@ | |||||
| #define DSUM_K dsum_k | #define DSUM_K dsum_k | ||||
| #define DSWAP_K dswap_k | #define DSWAP_K dswap_k | ||||
| #define DROT_K drot_k | #define DROT_K drot_k | ||||
| #define DROTM_K drotm_k | |||||
| #define DGEMV_N dgemv_n | #define DGEMV_N dgemv_n | ||||
| #define DGEMV_T dgemv_t | #define DGEMV_T dgemv_t | ||||
| @@ -180,6 +181,7 @@ | |||||
| #define DSUM_K gotoblas -> dsum_k | #define DSUM_K gotoblas -> dsum_k | ||||
| #define DSWAP_K gotoblas -> dswap_k | #define DSWAP_K gotoblas -> dswap_k | ||||
| #define DROT_K gotoblas -> drot_k | #define DROT_K gotoblas -> drot_k | ||||
| #define DROTM_K gotoblas -> drotm_k | |||||
| #define DGEMV_N gotoblas -> dgemv_n | #define DGEMV_N gotoblas -> dgemv_n | ||||
| #define DGEMV_T gotoblas -> dgemv_t | #define DGEMV_T gotoblas -> dgemv_t | ||||
| @@ -213,9 +213,9 @@ int srotmg_k(float *, float *, float *, float *, float *); | |||||
| int drotmg_k(double *, double *, double *, double *, double *); | int drotmg_k(double *, double *, double *, double *, double *); | ||||
| int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *); | int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *); | ||||
| int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float); | |||||
| int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double); | |||||
| int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble); | |||||
| int srotm_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
| int drotm_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||||
| int qrotm_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | |||||
| int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG); | ||||
| @@ -70,6 +70,7 @@ | |||||
| #define SUM_K QSUM_K | #define SUM_K QSUM_K | ||||
| #define SWAP_K QSWAP_K | #define SWAP_K QSWAP_K | ||||
| #define ROT_K QROT_K | #define ROT_K QROT_K | ||||
| #define ROTM_K QROTM_K | |||||
| #define GEMV_N QGEMV_N | #define GEMV_N QGEMV_N | ||||
| #define GEMV_T QGEMV_T | #define GEMV_T QGEMV_T | ||||
| @@ -361,6 +362,7 @@ | |||||
| #define SUM_K DSUM_K | #define SUM_K DSUM_K | ||||
| #define SWAP_K DSWAP_K | #define SWAP_K DSWAP_K | ||||
| #define ROT_K DROT_K | #define ROT_K DROT_K | ||||
| #define ROTM_K DROTM_K | |||||
| #define GEMV_N DGEMV_N | #define GEMV_N DGEMV_N | ||||
| #define GEMV_T DGEMV_T | #define GEMV_T DGEMV_T | ||||
| @@ -977,6 +979,7 @@ | |||||
| #define SUM_K SSUM_K | #define SUM_K SSUM_K | ||||
| #define SWAP_K SSWAP_K | #define SWAP_K SSWAP_K | ||||
| #define ROT_K SROT_K | #define ROT_K SROT_K | ||||
| #define ROTM_K SROTM_K | |||||
| #define GEMV_N SGEMV_N | #define GEMV_N SGEMV_N | ||||
| #define GEMV_T SGEMV_T | #define GEMV_T SGEMV_T | ||||
| @@ -197,6 +197,7 @@ BLASLONG (*ismin_k) (BLASLONG, float *, BLASLONG); | |||||
| //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | //double (*dsdot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | int (*srot_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float, float); | ||||
| int (*srotm_k) (BLASLONG, float *, BLASLONG, float *, BLASLONG, float *); | |||||
| #endif | #endif | ||||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | ||||
| int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | int (*saxpy_k) (BLASLONG, BLASLONG, BLASLONG, float, float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | ||||
| @@ -330,6 +331,7 @@ BLASLONG (*idmin_k) (BLASLONG, double *, BLASLONG); | |||||
| #endif | #endif | ||||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | ||||
| int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); | int (*drot_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double); | ||||
| int (*drotm_k) (BLASLONG, double *, BLASLONG, double *, BLASLONG, double *); | |||||
| int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*daxpy_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
| int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*dscal_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
| int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | int (*dswap_k) (BLASLONG, BLASLONG, BLASLONG, double, double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | ||||
| @@ -439,6 +441,7 @@ BLASLONG (*iqmin_k) (BLASLONG, xdouble *, BLASLONG); | |||||
| int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qcopy_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | xdouble (*qdot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble); | int (*qrot_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble); | ||||
| int (*qrotm_k) (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *); | |||||
| int (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qaxpy_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| int (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | int (*qscal_k) (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | ||||
| @@ -22,6 +22,7 @@ | |||||
| #define QSUM_K qsum_k | #define QSUM_K qsum_k | ||||
| #define QSWAP_K qswap_k | #define QSWAP_K qswap_k | ||||
| #define QROT_K qrot_k | #define QROT_K qrot_k | ||||
| #define QROTM_K qrotm_k | |||||
| #define QGEMV_N qgemv_n | #define QGEMV_N qgemv_n | ||||
| #define QGEMV_T qgemv_t | #define QGEMV_T qgemv_t | ||||
| @@ -165,6 +166,7 @@ | |||||
| #define QSUM_K gotoblas -> qsum_k | #define QSUM_K gotoblas -> qsum_k | ||||
| #define QSWAP_K gotoblas -> qswap_k | #define QSWAP_K gotoblas -> qswap_k | ||||
| #define QROT_K gotoblas -> qrot_k | #define QROT_K gotoblas -> qrot_k | ||||
| #define QROTM_K gotoblas -> qrotm_k | |||||
| #define QGEMV_N gotoblas -> qgemv_n | #define QGEMV_N gotoblas -> qgemv_n | ||||
| #define QGEMV_T gotoblas -> qgemv_t | #define QGEMV_T gotoblas -> qgemv_t | ||||
| @@ -24,6 +24,7 @@ | |||||
| #define SSCAL_K sscal_k | #define SSCAL_K sscal_k | ||||
| #define SSWAP_K sswap_k | #define SSWAP_K sswap_k | ||||
| #define SROT_K srot_k | #define SROT_K srot_k | ||||
| #define SROTM_K srotm_k | |||||
| #define SGEMV_N sgemv_n | #define SGEMV_N sgemv_n | ||||
| #define SGEMV_T sgemv_t | #define SGEMV_T sgemv_t | ||||
| @@ -189,6 +190,7 @@ | |||||
| #define SSCAL_K gotoblas -> sscal_k | #define SSCAL_K gotoblas -> sscal_k | ||||
| #define SSWAP_K gotoblas -> sswap_k | #define SSWAP_K gotoblas -> sswap_k | ||||
| #define SROT_K gotoblas -> srot_k | #define SROT_K gotoblas -> srot_k | ||||
| #define SROTM_K gotoblas -> srotm_k | |||||
| #define SGEMV_N gotoblas -> sgemv_n | #define SGEMV_N gotoblas -> sgemv_n | ||||
| #define SGEMV_T gotoblas -> sgemv_t | #define SGEMV_T gotoblas -> sgemv_t | ||||
| @@ -7,149 +7,21 @@ | |||||
| void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ | void NAME(blasint *N, FLOAT *dx, blasint *INCX, FLOAT *dy, blasint *INCY, FLOAT *dparam){ | ||||
| blasint n = *N; | |||||
| blasint incx = *INCX; | |||||
| blasint incy = *INCY; | |||||
| blasint n = *N; | |||||
| blasint incx = *INCX; | |||||
| blasint incy = *INCY; | |||||
| PRINT_DEBUG_NAME | |||||
| #else | #else | ||||
| void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ | void CNAME(blasint n, FLOAT *dx, blasint incx, FLOAT *dy, blasint incy, FLOAT *dparam){ | ||||
| #endif | |||||
| blasint i__1, i__2; | |||||
| PRINT_DEBUG_CNAME; | |||||
| blasint i__; | |||||
| FLOAT w, z__; | |||||
| blasint kx, ky; | |||||
| FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
| blasint nsteps; | |||||
| #ifndef CBLAS | |||||
| PRINT_DEBUG_CNAME; | |||||
| #else | |||||
| PRINT_DEBUG_CNAME; | |||||
| #endif | #endif | ||||
| --dparam; | |||||
| --dy; | |||||
| --dx; | |||||
| dflag = dparam[1]; | |||||
| if (n <= 0 || dflag == - 2.0) goto L140; | |||||
| if (! (incx == incy && incx > 0)) goto L70; | |||||
| nsteps = n * incx; | |||||
| if (dflag < 0.) { | |||||
| goto L50; | |||||
| } else if (dflag == 0) { | |||||
| goto L10; | |||||
| } else { | |||||
| goto L30; | |||||
| } | |||||
| L10: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w + z__ * dh12; | |||||
| dy[i__] = w * dh21 + z__; | |||||
| /* L20: */ | |||||
| } | |||||
| goto L140; | |||||
| L30: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = nsteps; | |||||
| i__1 = incx; | |||||
| for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w * dh11 + z__; | |||||
| dy[i__] = -w + dh22 * z__; | |||||
| /* L40: */ | |||||
| } | |||||
| goto L140; | |||||
| L50: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w * dh11 + z__ * dh12; | |||||
| dy[i__] = w * dh21 + z__ * dh22; | |||||
| /* L60: */ | |||||
| } | |||||
| goto L140; | |||||
| L70: | |||||
| kx = 1; | |||||
| ky = 1; | |||||
| if (incx < 0) { | |||||
| kx = (1 - n) * incx + 1; | |||||
| } | |||||
| if (incy < 0) { | |||||
| ky = (1 - n) * incy + 1; | |||||
| } | |||||
| ROTM_K(n, dx, incx, dy, incy, dparam); | |||||
| if (dflag < 0.) { | |||||
| goto L120; | |||||
| } else if (dflag == 0) { | |||||
| goto L80; | |||||
| } else { | |||||
| goto L100; | |||||
| } | |||||
| L80: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w + z__ * dh12; | |||||
| dy[ky] = w * dh21 + z__; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L90: */ | |||||
| } | |||||
| goto L140; | |||||
| L100: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w * dh11 + z__; | |||||
| dy[ky] = -w + dh22 * z__; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L110: */ | |||||
| } | |||||
| goto L140; | |||||
| L120: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w * dh11 + z__ * dh12; | |||||
| dy[ky] = w * dh21 + z__ * dh22; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L130: */ | |||||
| } | |||||
| L140: | |||||
| return; | return; | ||||
| } | } | ||||
| @@ -65,6 +65,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}COPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}NRM2KERNEL}" "" "nrm2_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTKERNEL}" "" "rot_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}ROTMKERNEL}" "" "rotm_k" false "" "" false ${float_type}) | |||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}SCALKERNEL}" "" "scal_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}SWAPKERNEL}" "" "swap_k" false "" "" false ${float_type}) | ||||
| GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | GenerateNamedObjects("${KERNELDIR}/${${float_char}AXPBYKERNEL}" "" "axpby_k" false "" "" false ${float_type}) | ||||
| @@ -125,6 +126,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SNRM2KERNEL}" "" "nrm2_k" false "" "" false "SINGLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SDOTKERNEL}" "" "dot_k" false "" "" false "SINGLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | GenerateNamedObjects("${KERNELDIR}/${SROTKERNEL}" "" "rot_k" false "" "" false "SINGLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${SROTMKERNEL}" "" "rotm_k" false "" "" false "SINGLE") | |||||
| endif () | endif () | ||||
| if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | if (BUILD_COMPLEX16 AND NOT BUILD_DOUBLE) | ||||
| GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAMAXKERNEL}" "USE_ABS" "amax_k" false "" "" false "DOUBLE") | ||||
| @@ -148,6 +150,7 @@ function (build_core TARGET_CORE KDIR TSUFFIX KERNEL_DEFINITIONS) | |||||
| GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") | |||||
| GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | ||||
| @@ -1105,6 +1108,7 @@ endif () | |||||
| GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DCOPYKERNEL}" "C_INTERFACE" "copy_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DNRM2KERNEL}" "" "nrm2_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DROTKERNEL}" "" "rot_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DROTMKERNEL}" "" "rotm_k" false "" "" false "DOUBLE") | |||||
| GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DDOTKERNEL}" "" "dot_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DSWAPKERNEL}" "" "swap_k" false "" "" false "DOUBLE") | ||||
| GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | GenerateNamedObjects("${KERNELDIR}/${DAXPYKERNEL}" "" "axpy_k" false "" "" false "DOUBLE") | ||||
| @@ -336,6 +336,18 @@ ifndef XROTKERNEL | |||||
| XROTKERNEL = zrot.S | XROTKERNEL = zrot.S | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = rotm.S | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = rotm.S | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = rotm.S | |||||
| endif | |||||
| ### SCAL ### | ### SCAL ### | ||||
| ifndef SSCALKERNEL | ifndef SSCALKERNEL | ||||
| @@ -504,21 +516,21 @@ SBLASOBJS += \ | |||||
| sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ | sasum_k$(TSUFFIX).$(SUFFIX) ssum_k$(TSUFFIX).$(SUFFIX) saxpy_k$(TSUFFIX).$(SUFFIX) scopy_k$(TSUFFIX).$(SUFFIX) \ | ||||
| sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ | sdot_k$(TSUFFIX).$(SUFFIX) sdsdot_k$(TSUFFIX).$(SUFFIX) dsdot_k$(TSUFFIX).$(SUFFIX) \ | ||||
| snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ | snrm2_k$(TSUFFIX).$(SUFFIX) srot_k$(TSUFFIX).$(SUFFIX) sscal_k$(TSUFFIX).$(SUFFIX) sswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
| saxpby_k$(TSUFFIX).$(SUFFIX) | |||||
| saxpby_k$(TSUFFIX).$(SUFFIX) srotm_k$(TSUFFIX).$(SUFFIX) | |||||
| DBLASOBJS += \ | DBLASOBJS += \ | ||||
| damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ | damax_k$(TSUFFIX).$(SUFFIX) damin_k$(TSUFFIX).$(SUFFIX) dmax_k$(TSUFFIX).$(SUFFIX) dmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ | idamax_k$(TSUFFIX).$(SUFFIX) idamin_k$(TSUFFIX).$(SUFFIX) idmax_k$(TSUFFIX).$(SUFFIX) idmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ | dasum_k$(TSUFFIX).$(SUFFIX) daxpy_k$(TSUFFIX).$(SUFFIX) dcopy_k$(TSUFFIX).$(SUFFIX) ddot_k$(TSUFFIX).$(SUFFIX) \ | ||||
| dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ | dnrm2_k$(TSUFFIX).$(SUFFIX) drot_k$(TSUFFIX).$(SUFFIX) dscal_k$(TSUFFIX).$(SUFFIX) dswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
| daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) | |||||
| daxpby_k$(TSUFFIX).$(SUFFIX) dsum_k$(TSUFFIX).$(SUFFIX) drotm_k$(TSUFFIX).$(SUFFIX) | |||||
| QBLASOBJS += \ | QBLASOBJS += \ | ||||
| qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ | qamax_k$(TSUFFIX).$(SUFFIX) qamin_k$(TSUFFIX).$(SUFFIX) qmax_k$(TSUFFIX).$(SUFFIX) qmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| iqamax_k$(TSUFFIX).$(SUFFIX) iqamin_k$(TSUFFIX).$(SUFFIX) iqmax_k$(TSUFFIX).$(SUFFIX) iqmin_k$(TSUFFIX).$(SUFFIX) \ | iqamax_k$(TSUFFIX).$(SUFFIX) iqamin_k$(TSUFFIX).$(SUFFIX) iqmax_k$(TSUFFIX).$(SUFFIX) iqmin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | ||||
| qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) \ | qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) \ | ||||
| qsum_k$(TSUFFIX).$(SUFFIX) | |||||
| qsum_k$(TSUFFIX).$(SUFFIX) qrotm_k$(TSUFFIX).$(SUFFIX) | |||||
| CBLASOBJS += \ | CBLASOBJS += \ | ||||
| camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | ||||
| @@ -842,7 +854,16 @@ $(KDIR)drot_k$(TSUFFIX).$(SUFFIX) $(KDIR)drot_k$(TPSUFFIX).$(PSUFFIX) : $(KERN | |||||
| $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | ||||
| $(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) | $(KDIR)qrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTKERNEL) | ||||
| $(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
| $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
| $(KDIR)srotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)srotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SROTMKERNEL) | |||||
| $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -UDOUBLE $< -o $@ | |||||
| $(KDIR)drotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)drotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(DROTMKERNEL) | |||||
| $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DDOUBLE $< -o $@ | |||||
| $(KDIR)qrotm_k$(TSUFFIX).$(SUFFIX) $(KDIR)qrotm_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QROTMKERNEL) | |||||
| $(CC) -c $(CFLAGS) $(FMAFLAG) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||||
| $(KDIR)csrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)csrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CROTKERNEL) | $(KDIR)csrot_k$(TSUFFIX).$(SUFFIX) $(KDIR)csrot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CROTKERNEL) | ||||
| $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UDOUBLE $< -o $@ | $(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UDOUBLE $< -o $@ | ||||
| @@ -122,3 +122,15 @@ ZTRSMKERNEL_LN = ztrsm_kernel_2x2_LN.S | |||||
| ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S | ZTRSMKERNEL_LT = ztrsm_kernel_2x2_LT.S | ||||
| ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S | ZTRSMKERNEL_RN = ztrsm_kernel_2x2_LT.S | ||||
| ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S | ZTRSMKERNEL_RT = ztrsm_kernel_2x2_RT.S | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -43,4 +43,14 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -45,4 +45,14 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -171,3 +171,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | #Dump kernel | ||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -146,4 +146,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
| CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -146,4 +146,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
| CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -0,0 +1,159 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) | |||||
| { | |||||
| BLASLONG i__1, i__2; | |||||
| BLASLONG i__; | |||||
| FLOAT w, z__; | |||||
| BLASLONG kx, ky; | |||||
| FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
| BLASLONG nsteps; | |||||
| --dparam; | |||||
| --dy; | |||||
| --dx; | |||||
| dflag = dparam[1]; | |||||
| if (n <= 0 || dflag == - 2.0) goto L140; | |||||
| if (! (incx == incy && incx > 0)) goto L70; | |||||
| nsteps = n * incx; | |||||
| if (dflag < 0.) { | |||||
| goto L50; | |||||
| } else if (dflag == 0) { | |||||
| goto L10; | |||||
| } else { | |||||
| goto L30; | |||||
| } | |||||
| L10: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w + z__ * dh12; | |||||
| dy[i__] = w * dh21 + z__; | |||||
| /* L20: */ | |||||
| } | |||||
| goto L140; | |||||
| L30: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = nsteps; | |||||
| i__1 = incx; | |||||
| for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w * dh11 + z__; | |||||
| dy[i__] = -w + dh22 * z__; | |||||
| /* L40: */ | |||||
| } | |||||
| goto L140; | |||||
| L50: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| for (i__ = 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ += i__2) { | |||||
| w = dx[i__]; | |||||
| z__ = dy[i__]; | |||||
| dx[i__] = w * dh11 + z__ * dh12; | |||||
| dy[i__] = w * dh21 + z__ * dh22; | |||||
| /* L60: */ | |||||
| } | |||||
| goto L140; | |||||
| L70: | |||||
| kx = 1; | |||||
| ky = 1; | |||||
| if (incx < 0) { | |||||
| kx = (1 - n) * incx + 1; | |||||
| } | |||||
| if (incy < 0) { | |||||
| ky = (1 - n) * incy + 1; | |||||
| } | |||||
| if (dflag < 0.) { | |||||
| goto L120; | |||||
| } else if (dflag == 0) { | |||||
| goto L80; | |||||
| } else { | |||||
| goto L100; | |||||
| } | |||||
| L80: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w + z__ * dh12; | |||||
| dy[ky] = w * dh21 + z__; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L90: */ | |||||
| } | |||||
| goto L140; | |||||
| L100: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w * dh11 + z__; | |||||
| dy[ky] = -w + dh22 * z__; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L110: */ | |||||
| } | |||||
| goto L140; | |||||
| L120: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = n; | |||||
| for (i__ = 1; i__ <= i__2; ++i__) { | |||||
| w = dx[kx]; | |||||
| z__ = dy[ky]; | |||||
| dx[kx] = w * dh11 + z__ * dh12; | |||||
| dy[ky] = w * dh21 + z__ * dh22; | |||||
| kx += incx; | |||||
| ky += incy; | |||||
| /* L130: */ | |||||
| } | |||||
| L140: | |||||
| return(0); | |||||
| } | |||||
| @@ -142,3 +142,15 @@ ZTRSMKERNEL_RT = ztrsm_kernel_RT.S | |||||
| CGEMM3MKERNEL = zgemm3m_kernel.S | CGEMM3MKERNEL = zgemm3m_kernel.S | ||||
| ZGEMM3MKERNEL = zgemm3m_kernel.S | ZGEMM3MKERNEL = zgemm3m_kernel.S | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -236,3 +236,15 @@ ZGEMM3MKERNEL = zgemm3m_kernel.S | |||||
| endif | endif | ||||
| DSDOTKERNEL = dot.S | DSDOTKERNEL = dot.S | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -169,3 +169,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | #Dump kernel | ||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -43,4 +43,14 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -158,3 +158,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -199,3 +199,15 @@ endif | |||||
| ifndef IQMAXKERNEL | ifndef IQMAXKERNEL | ||||
| IQMAXKERNEL = imax.S | IQMAXKERNEL = imax.S | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -158,3 +158,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -73,3 +73,15 @@ endif | |||||
| ifndef IQMAXKERNEL | ifndef IQMAXKERNEL | ||||
| IQMAXKERNEL = imax.S | IQMAXKERNEL = imax.S | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -27,4 +27,14 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -71,6 +71,10 @@ DROTKERNEL = rot_vector.c | |||||
| CROTKERNEL = zrot_vector.c | CROTKERNEL = zrot_vector.c | ||||
| ZROTKERNEL = zrot_vector.c | ZROTKERNEL = zrot_vector.c | ||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| SSCALKERNEL = scal_vector.c | SSCALKERNEL = scal_vector.c | ||||
| DSCALKERNEL = scal_vector.c | DSCALKERNEL = scal_vector.c | ||||
| CSCALKERNEL = zscal_vector.c | CSCALKERNEL = zscal_vector.c | ||||
| @@ -71,6 +71,10 @@ DROTKERNEL = ../riscv64/rot.c | |||||
| CROTKERNEL = ../riscv64/zrot.c | CROTKERNEL = ../riscv64/zrot.c | ||||
| ZROTKERNEL = ../riscv64/zrot.c | ZROTKERNEL = ../riscv64/zrot.c | ||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| SSCALKERNEL = ../riscv64/scal.c | SSCALKERNEL = ../riscv64/scal.c | ||||
| DSCALKERNEL = ../riscv64/scal.c | DSCALKERNEL = ../riscv64/scal.c | ||||
| CSCALKERNEL = ../riscv64/zscal.c | CSCALKERNEL = ../riscv64/zscal.c | ||||
| @@ -71,6 +71,10 @@ DROTKERNEL = rot_rvv.c | |||||
| CROTKERNEL = zrot_rvv.c | CROTKERNEL = zrot_rvv.c | ||||
| ZROTKERNEL = zrot_rvv.c | ZROTKERNEL = zrot_rvv.c | ||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| SSCALKERNEL = scal_rvv.c | SSCALKERNEL = scal_rvv.c | ||||
| DSCALKERNEL = scal_rvv.c | DSCALKERNEL = scal_rvv.c | ||||
| CSCALKERNEL = zscal_rvv.c | CSCALKERNEL = zscal_rvv.c | ||||
| @@ -66,6 +66,10 @@ DROTKERNEL = rot_vector.c | |||||
| CROTKERNEL = zrot_vector.c | CROTKERNEL = zrot_vector.c | ||||
| ZROTKERNEL = zrot_vector.c | ZROTKERNEL = zrot_vector.c | ||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| SSCALKERNEL = scal_vector.c | SSCALKERNEL = scal_vector.c | ||||
| DSCALKERNEL = scal_vector.c | DSCALKERNEL = scal_vector.c | ||||
| CSCALKERNEL = zscal_vector.c | CSCALKERNEL = zscal_vector.c | ||||
| @@ -98,6 +98,10 @@ DROTKERNEL = rot_rvv.c | |||||
| CROTKERNEL = zrot_rvv.c | CROTKERNEL = zrot_rvv.c | ||||
| ZROTKERNEL = zrot_rvv.c | ZROTKERNEL = zrot_rvv.c | ||||
| SROTMKERNEL = rotm_rvv.c | |||||
| DROTMKERNEL = rotm_rvv.c | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| SSCALKERNEL = scal_rvv.c | SSCALKERNEL = scal_rvv.c | ||||
| DSCALKERNEL = scal_rvv.c | DSCALKERNEL = scal_rvv.c | ||||
| CSCALKERNEL = zscal_rvv.c | CSCALKERNEL = zscal_rvv.c | ||||
| @@ -0,0 +1,260 @@ | |||||
| /*************************************************************************** | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include "common.h" | |||||
| #if !defined(DOUBLE) | |||||
| #define VSETVL(n) __riscv_vsetvl_e32m8(n) | |||||
| #define FLOAT_V_T vfloat32m8_t | |||||
| #define VLSEV_FLOAT __riscv_vlse32_v_f32m8 | |||||
| #define VSSEV_FLOAT __riscv_vsse32_v_f32m8 | |||||
| #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f32m8 | |||||
| #define VFMULVF_FLOAT __riscv_vfmul_vf_f32m8 | |||||
| #define VFMSACVF_FLOAT __riscv_vfmsac_vf_f32m8 | |||||
| #else | |||||
| #define VSETVL(n) __riscv_vsetvl_e64m8(n) | |||||
| #define FLOAT_V_T vfloat64m8_t | |||||
| #define VLSEV_FLOAT __riscv_vlse64_v_f64m8 | |||||
| #define VSSEV_FLOAT __riscv_vsse64_v_f64m8 | |||||
| #define VFMACCVF_FLOAT __riscv_vfmacc_vf_f64m8 | |||||
| #define VFMULVF_FLOAT __riscv_vfmul_vf_f64m8 | |||||
| #define VFMSACVF_FLOAT __riscv_vfmsac_vf_f64m8 | |||||
| #endif | |||||
| int CNAME(BLASLONG n, FLOAT *dx, BLASLONG incx, FLOAT *dy, BLASLONG incy, FLOAT *dparam) | |||||
| { | |||||
| BLASLONG i__1, i__2; | |||||
| BLASLONG kx, ky; | |||||
| FLOAT dh11, dh12, dh22, dh21, dflag; | |||||
| BLASLONG nsteps; | |||||
| --dparam; | |||||
| --dy; | |||||
| --dx; | |||||
| FLOAT_V_T v_w, v_z__, v_dx, v_dy; | |||||
| BLASLONG stride, stride_x, stride_y, offset; | |||||
| dflag = dparam[1]; | |||||
| if (n <= 0 || dflag == - 2.0) goto L140; | |||||
| if (!(incx == incy && incx > 0)) goto L70; | |||||
| nsteps = n * incx; | |||||
| if (dflag < 0.) { | |||||
| goto L50; | |||||
| } else if (dflag == 0) { | |||||
| goto L10; | |||||
| } else { | |||||
| goto L30; | |||||
| } | |||||
| L10: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| if(i__2 < 0){ | |||||
| offset = i__1 - 2; | |||||
| dx += offset; | |||||
| dy += offset; | |||||
| i__1 = -i__1; | |||||
| i__2 = -i__2; | |||||
| } | |||||
| stride = i__2 * sizeof(FLOAT); | |||||
| n = i__1 / i__2; | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); | |||||
| v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); | |||||
| VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
| VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
| } | |||||
| goto L140; | |||||
| L30: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| i__2 = nsteps; | |||||
| i__1 = incx; | |||||
| if(i__1 < 0){ | |||||
| offset = i__2 - 2; | |||||
| dx += offset; | |||||
| dy += offset; | |||||
| i__1 = -i__1; | |||||
| i__2 = -i__2; | |||||
| } | |||||
| stride = i__1 * sizeof(FLOAT); | |||||
| n = i__2 / i__1; | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*i__1, dy += vl*i__1) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); | |||||
| v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); | |||||
| VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
| VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
| } | |||||
| goto L140; | |||||
| L50: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| i__1 = nsteps; | |||||
| i__2 = incx; | |||||
| if(i__2 < 0){ | |||||
| offset = i__1 - 2; | |||||
| dx += offset; | |||||
| dy += offset; | |||||
| i__1 = -i__1; | |||||
| i__2 = -i__2; | |||||
| } | |||||
| stride = i__2 * sizeof(FLOAT); | |||||
| n = i__1 / i__2; | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*i__2, dy += vl*i__2) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[1], stride, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[1], stride, vl); | |||||
| v_dx = VFMULVF_FLOAT(v_w, dh11, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); | |||||
| VSSEV_FLOAT(&dx[1], stride, v_dx, vl); | |||||
| v_dy = VFMULVF_FLOAT(v_w, dh21, vl); | |||||
| v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); | |||||
| VSSEV_FLOAT(&dy[1], stride, v_dy, vl); | |||||
| } | |||||
| goto L140; | |||||
| L70: | |||||
| kx = 1; | |||||
| ky = 1; | |||||
| if (incx < 0) { | |||||
| kx = (1 - n) * incx + 1; | |||||
| } | |||||
| if (incy < 0) { | |||||
| ky = (1 - n) * incy + 1; | |||||
| } | |||||
| if (dflag < 0.) { | |||||
| goto L120; | |||||
| } else if (dflag == 0) { | |||||
| goto L80; | |||||
| } else { | |||||
| goto L100; | |||||
| } | |||||
| L80: | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| if(incx < 0){ | |||||
| incx = -incx; | |||||
| dx -= n*incx; | |||||
| } | |||||
| if(incy < 0){ | |||||
| incy = -incy; | |||||
| dy -= n*incy; | |||||
| } | |||||
| stride_x = incx * sizeof(FLOAT); | |||||
| stride_y = incy * sizeof(FLOAT); | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_w, dh12, v_z__, vl); | |||||
| v_dy = VFMACCVF_FLOAT(v_z__, dh21, v_w, vl); | |||||
| VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
| VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
| } | |||||
| goto L140; | |||||
| L100: | |||||
| dh11 = dparam[2]; | |||||
| dh22 = dparam[5]; | |||||
| if(incx < 0){ | |||||
| incx = -incx; | |||||
| dx -= n*incx; | |||||
| } | |||||
| if(incy < 0){ | |||||
| incy = -incy; | |||||
| dy -= n*incy; | |||||
| } | |||||
| stride_x = incx * sizeof(FLOAT); | |||||
| stride_y = incy * sizeof(FLOAT); | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_z__, dh11, v_w, vl); | |||||
| v_dy = VFMSACVF_FLOAT(v_w, dh22, v_z__, vl); | |||||
| VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
| VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
| } | |||||
| goto L140; | |||||
| L120: | |||||
| dh11 = dparam[2]; | |||||
| dh12 = dparam[4]; | |||||
| dh21 = dparam[3]; | |||||
| dh22 = dparam[5]; | |||||
| if(incx < 0){ | |||||
| incx = -incx; | |||||
| dx -= n*incx; | |||||
| } | |||||
| if(incy < 0){ | |||||
| incy = -incy; | |||||
| dy -= n*incy; | |||||
| } | |||||
| stride_x = incx * sizeof(FLOAT); | |||||
| stride_y = incy * sizeof(FLOAT); | |||||
| for (size_t vl; n > 0; n -= vl, dx += vl*incx, dy += vl*incy) { | |||||
| vl = VSETVL(n); | |||||
| v_w = VLSEV_FLOAT(&dx[kx], stride_x, vl); | |||||
| v_z__ = VLSEV_FLOAT(&dy[ky], stride_y, vl); | |||||
| v_dx = VFMULVF_FLOAT(v_w, dh11, vl); | |||||
| v_dx = VFMACCVF_FLOAT(v_dx, dh12, v_z__, vl); | |||||
| VSSEV_FLOAT(&dx[kx], stride_x, v_dx, vl); | |||||
| v_dy = VFMULVF_FLOAT(v_w, dh21, vl); | |||||
| v_dy = VFMACCVF_FLOAT(v_dy, dh22, v_z__, vl); | |||||
| VSSEV_FLOAT(&dy[ky], stride_y, v_dy, vl); | |||||
| } | |||||
| L140: | |||||
| return(0); | |||||
| } | |||||
| @@ -72,9 +72,9 @@ gotoblas_t TABLE_NAME = { | |||||
| samax_kTS, samin_kTS, smax_kTS, smin_kTS, | samax_kTS, samin_kTS, smax_kTS, smin_kTS, | ||||
| isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | isamax_kTS, isamin_kTS, ismax_kTS, ismin_kTS, | ||||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||||
| snrm2_kTS, sasum_kTS, ssum_kTS, scopy_kTS, sbdot_kTS, | |||||
| dsdot_kTS, | dsdot_kTS, | ||||
| srot_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||||
| srot_kTS, srotm_kTS, saxpy_kTS, sscal_kTS, sswap_kTS, | |||||
| sbgemv_nTS, sbgemv_tTS, sger_kTS, | sbgemv_nTS, sbgemv_tTS, sger_kTS, | ||||
| ssymv_LTS, ssymv_UTS, | ssymv_LTS, ssymv_UTS, | ||||
| @@ -158,7 +158,7 @@ gotoblas_t TABLE_NAME = { | |||||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) | ||||
| scopy_kTS, sdot_kTS, | scopy_kTS, sdot_kTS, | ||||
| // dsdot_kTS, | // dsdot_kTS, | ||||
| srot_kTS, saxpy_kTS, | |||||
| srot_kTS, srotm_kTS, saxpy_kTS, | |||||
| #endif | #endif | ||||
| #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | #if (BUILD_SINGLE==1) || (BUILD_DOUBLE==1) || (BUILD_COMPLEX==1) || (BUILD_COMPLEX16==1) | ||||
| sscal_kTS, | sscal_kTS, | ||||
| @@ -260,6 +260,7 @@ gotoblas_t TABLE_NAME = { | |||||
| #endif | #endif | ||||
| #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | #if (BUILD_DOUBLE==1) || (BUILD_COMPLEX16==1) | ||||
| drot_kTS, | drot_kTS, | ||||
| drotm_kTS, | |||||
| daxpy_kTS, | daxpy_kTS, | ||||
| dscal_kTS, | dscal_kTS, | ||||
| dswap_kTS, | dswap_kTS, | ||||
| @@ -331,10 +332,9 @@ gotoblas_t TABLE_NAME = { | |||||
| qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS, | qamax_kTS, qamin_kTS, qmax_kTS, qmin_kTS, | ||||
| iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS, | iqamax_kTS, iqamin_kTS, iqmax_kTS, iqmin_kTS, | ||||
| qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS, | qnrm2_kTS, qasum_kTS, qsum_kTS, qcopy_kTS, qdot_kTS, | ||||
| qrot_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS, | |||||
| qrot_kTS, qrotm_kTS, qaxpy_kTS, qscal_kTS, qswap_kTS, | |||||
| qgemv_nTS, qgemv_tTS, qger_kTS, | qgemv_nTS, qgemv_tTS, qger_kTS, | ||||
| qsymv_LTS, qsymv_UTS, | qsymv_LTS, qsymv_UTS, | ||||
| qgemm_kernelTS, qgemm_betaTS, | qgemm_kernelTS, qgemm_betaTS, | ||||
| #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N | #if QGEMM_DEFAULT_UNROLL_M != QGEMM_DEFAULT_UNROLL_N | ||||
| qgemm_incopyTS, qgemm_itcopyTS, | qgemm_incopyTS, qgemm_itcopyTS, | ||||
| @@ -75,3 +75,14 @@ DGEMM_BETA = ../generic/gemm_beta.c | |||||
| CGEMM_BETA = ../generic/zgemm_beta.c | CGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -189,3 +189,14 @@ ZGEMM_BETA = ../generic/zgemm_beta.c | |||||
| QGEMM_BETA = ../generic/gemm_beta.c | QGEMM_BETA = ../generic/gemm_beta.c | ||||
| XGEMM_BETA = ../generic/zgemm_beta.c | XGEMM_BETA = ../generic/zgemm_beta.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -162,3 +162,15 @@ ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -290,6 +290,18 @@ ifndef QROTKERNEL | |||||
| QROTKERNEL = rot.S | QROTKERNEL = rot.S | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef CROTKERNEL | ifndef CROTKERNEL | ||||
| CROTKERNEL = zrot_sse.S | CROTKERNEL = zrot_sse.S | ||||
| endif | endif | ||||
| @@ -168,3 +168,15 @@ QCABS_KERNEL = ../generic/cabs.c | |||||
| #Dump kernel | #Dump kernel | ||||
| CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -27,4 +27,14 @@ ifndef ZGEMM_BETA | |||||
| ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
| endif | endif | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -135,5 +135,14 @@ ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | ||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | ||||
| ifndef SROTMKERNEL | |||||
| SROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef DROTMKERNEL | |||||
| DROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| ifndef QROTMKERNEL | |||||
| QROTMKERNEL = ../generic/rotm.c | |||||
| endif | |||||
| @@ -70,6 +70,24 @@ CTEST(rot,drot_inc_1) | |||||
| ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); | ASSERT_DBL_NEAR_TOL(y2[i], y1[i], DOUBLE_EPS); | ||||
| } | } | ||||
| } | } | ||||
| CTEST(rot,drotm_inc_1) | |||||
| { | |||||
| blasint i = 0; | |||||
| blasint N = 12, incX = 1, incY = 1; | |||||
| double param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; | |||||
| double x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
| double y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
| double x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; | |||||
| double y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; | |||||
| //OpenBLAS | |||||
| BLASFUNC(drotm)(&N, x_actual, &incX, y_actual, &incY, param); | |||||
| for(i = 0; i < N; i++){ | |||||
| ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], DOUBLE_EPS); | |||||
| ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], DOUBLE_EPS); | |||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| #ifdef BUILD_COMPLEX16 | #ifdef BUILD_COMPLEX16 | ||||
| @@ -130,6 +148,24 @@ CTEST(rot,srot_inc_1) | |||||
| ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS); | ASSERT_DBL_NEAR_TOL(y2[i], y1[i], SINGLE_EPS); | ||||
| } | } | ||||
| } | } | ||||
| CTEST(rot,srotm_inc_1) | |||||
| { | |||||
| blasint i = 0; | |||||
| blasint N = 12, incX = 1, incY = 1; | |||||
| float param[5] = {1.0, 2.0, 3.0, 4.0, 5.0}; | |||||
| float x_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
| float y_actual[] = {1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0}; | |||||
| float x_referece[] = {3.0, 6.0, 9.0, 12.0, 15.0, 18.0, 21.0, 24.0, 27.0, 30.0, 33.0, 36.0}; | |||||
| float y_referece[] = {4.0, 8.0, 12.0, 16.0, 20.0, 24.0, 28.0, 32.0, 36.0, 40.0, 44.0, 48.0}; | |||||
| //OpenBLAS | |||||
| BLASFUNC(srotm)(&N, x_actual, &incX, y_actual, &incY, param); | |||||
| for(i = 0; i < N; i++){ | |||||
| ASSERT_DBL_NEAR_TOL(x_referece[i], x_actual[i], SINGLE_EPS); | |||||
| ASSERT_DBL_NEAR_TOL(y_referece[i], y_actual[i], SINGLE_EPS); | |||||
| } | |||||
| } | |||||
| #endif | #endif | ||||
| #ifdef BUILD_COMPLEX | #ifdef BUILD_COMPLEX | ||||