|
- /*********************************************************************/
- /* Copyright 2009, 2010 The University of Texas at Austin. */
- /* All rights reserved. */
- /* */
- /* Redistribution and use in source and binary forms, with or */
- /* without modification, are permitted provided that the following */
- /* conditions are met: */
- /* */
- /* 1. Redistributions of source code must retain the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer. */
- /* */
- /* 2. Redistributions in binary form must reproduce the above */
- /* copyright notice, this list of conditions and the following */
- /* disclaimer in the documentation and/or other materials */
- /* provided with the distribution. */
- /* */
- /* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */
- /* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */
- /* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */
- /* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */
- /* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */
- /* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */
- /* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */
- /* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */
- /* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */
- /* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */
- /* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */
- /* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */
- /* POSSIBILITY OF SUCH DAMAGE. */
- /* */
- /* The views and conclusions contained in the software and */
- /* documentation are those of the authors and should not be */
- /* interpreted as representing official policies, either expressed */
- /* or implied, of The University of Texas at Austin. */
- /*********************************************************************/
-
- #ifndef ASSEMBLER
-
- #ifdef __CUDACC__
- extern "C" {
- #endif
-
- float sdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
- double dsdot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
- double ddot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
- xdouble qdot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- float sbdot_k(BLASLONG, bfloat16 *, BLASLONG, bfloat16 *, BLASLONG);
-
- void sbstobf16_k(BLASLONG, float *, BLASLONG, bfloat16 *, BLASLONG);
- void sbdtobf16_k(BLASLONG, double *, BLASLONG, bfloat16 *, BLASLONG);
- void sbf16tos_k (BLASLONG, bfloat16 *, BLASLONG, float *, BLASLONG);
- void dbf16tod_k (BLASLONG, bfloat16 *, BLASLONG, double *, BLASLONG);
-
- openblas_complex_float cdotc_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
- openblas_complex_float cdotu_k (BLASLONG, float *, BLASLONG, float *, BLASLONG);
- openblas_complex_double zdotc_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
- openblas_complex_double zdotu_k (BLASLONG, double *, BLASLONG, double *, BLASLONG);
- openblas_complex_xdouble xdotc_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- openblas_complex_xdouble xdotu_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
-
- int saxpy_k (BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int xaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- int caxpyc_k (BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zaxpyc_k (BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int xaxpyc_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
-
- int scopy_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int dcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int qcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- int ccopy_k(BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zcopy_k(BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int xcopy_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
-
- int sswap_k (BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int dswap_k (BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
- int qswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
- int cswap_k (BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zswap_k (BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG, double*, BLASLONG);
- int xswap_k (BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble*, BLASLONG);
-
- float sasum_k (BLASLONG, float *, BLASLONG);
- double dasum_k (BLASLONG, double *, BLASLONG);
- xdouble qasum_k (BLASLONG, xdouble *, BLASLONG);
- float casum_k (BLASLONG, float *, BLASLONG);
- double zasum_k (BLASLONG, double *, BLASLONG);
- xdouble xasum_k (BLASLONG, xdouble *, BLASLONG);
-
- float ssum_k (BLASLONG, float *, BLASLONG);
- double dsum_k (BLASLONG, double *, BLASLONG);
- xdouble qsum_k (BLASLONG, xdouble *, BLASLONG);
- float csum_k (BLASLONG, float *, BLASLONG);
- double zsum_k (BLASLONG, double *, BLASLONG);
- xdouble xsum_k (BLASLONG, xdouble *, BLASLONG);
-
- float samax_k (BLASLONG, float *, BLASLONG);
- double damax_k (BLASLONG, double *, BLASLONG);
- xdouble qamax_k (BLASLONG, xdouble *, BLASLONG);
- float camax_k (BLASLONG, float *, BLASLONG);
- double zamax_k (BLASLONG, double *, BLASLONG);
- xdouble xamax_k (BLASLONG, xdouble *, BLASLONG);
-
- float samin_k (BLASLONG, float *, BLASLONG);
- double damin_k (BLASLONG, double *, BLASLONG);
- xdouble qamin_k (BLASLONG, xdouble *, BLASLONG);
- float camin_k (BLASLONG, float *, BLASLONG);
- double zamin_k (BLASLONG, double *, BLASLONG);
- xdouble xamin_k (BLASLONG, xdouble *, BLASLONG);
-
- BLASLONG isamax_k(BLASLONG, float *, BLASLONG);
- BLASLONG idamax_k(BLASLONG, double *, BLASLONG);
- BLASLONG iqamax_k(BLASLONG, xdouble *, BLASLONG);
- BLASLONG icamax_k(BLASLONG, float *, BLASLONG);
- BLASLONG izamax_k(BLASLONG, double *, BLASLONG);
- BLASLONG ixamax_k(BLASLONG, xdouble *, BLASLONG);
-
- BLASLONG isamin_k(BLASLONG, float *, BLASLONG);
- BLASLONG idamin_k(BLASLONG, double *, BLASLONG);
- BLASLONG iqamin_k(BLASLONG, xdouble *, BLASLONG);
- BLASLONG icamin_k(BLASLONG, float *, BLASLONG);
- BLASLONG izamin_k(BLASLONG, double *, BLASLONG);
- BLASLONG ixamin_k(BLASLONG, xdouble *, BLASLONG);
-
- float smax_k (BLASLONG, float *, BLASLONG);
- double dmax_k (BLASLONG, double *, BLASLONG);
- xdouble qmax_k (BLASLONG, xdouble *, BLASLONG);
- float cmax_k (BLASLONG, float *, BLASLONG);
- double zmax_k (BLASLONG, double *, BLASLONG);
- xdouble xmax_k (BLASLONG, xdouble *, BLASLONG);
-
- float smin_k (BLASLONG, float *, BLASLONG);
- double dmin_k (BLASLONG, double *, BLASLONG);
- xdouble qmin_k (BLASLONG, xdouble *, BLASLONG);
- float cmin_k (BLASLONG, float *, BLASLONG);
- double zmin_k (BLASLONG, double *, BLASLONG);
- xdouble xmin_k (BLASLONG, xdouble *, BLASLONG);
-
- BLASLONG ismax_k(BLASLONG, float *, BLASLONG);
- BLASLONG idmax_k(BLASLONG, double *, BLASLONG);
- BLASLONG iqmax_k(BLASLONG, xdouble *, BLASLONG);
- BLASLONG icmax_k(BLASLONG, float *, BLASLONG);
- BLASLONG izmax_k(BLASLONG, double *, BLASLONG);
- BLASLONG ixmax_k(BLASLONG, xdouble *, BLASLONG);
-
- BLASLONG ismin_k(BLASLONG, float *, BLASLONG);
- BLASLONG idmin_k(BLASLONG, double *, BLASLONG);
- BLASLONG iqmin_k(BLASLONG, xdouble *, BLASLONG);
- BLASLONG icmin_k(BLASLONG, float *, BLASLONG);
- BLASLONG izmin_k(BLASLONG, double *, BLASLONG);
- BLASLONG ixmin_k(BLASLONG, xdouble *, BLASLONG);
-
- int sscal_k(BLASLONG, BLASLONG, BLASLONG, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int dscal_k(BLASLONG, BLASLONG, BLASLONG, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int qscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- int cscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int xscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
- int csscal_k(BLASLONG, BLASLONG, BLASLONG, float, float,
- float *, BLASLONG, float *, BLASLONG, float *, BLASLONG);
- int zdscal_k(BLASLONG, BLASLONG, BLASLONG, double, double,
- double *, BLASLONG, double *, BLASLONG, double *, BLASLONG);
- int xqscal_k(BLASLONG, BLASLONG, BLASLONG, xdouble, xdouble,
- xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG);
-
- float snrm2_k(BLASLONG, float *, BLASLONG);
- double dnrm2_k(BLASLONG, double *, BLASLONG);
- xdouble qnrm2_k(BLASLONG, xdouble *, BLASLONG);
- float cnrm2_k(BLASLONG, float *, BLASLONG);
- double znrm2_k(BLASLONG, double *, BLASLONG);
- xdouble xnrm2_k(BLASLONG, xdouble *, BLASLONG);
-
- int srot_k (BLASLONG, float *, BLASLONG, float *, BLASLONG, float , float );
- int drot_k (BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
- int qrot_k (BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
- int csrot_k(BLASLONG, float *, BLASLONG, float *, BLASLONG, float , float );
- int zdrot_k(BLASLONG, double *, BLASLONG, double *, BLASLONG, double, double);
- int xqrot_k(BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble, xdouble);
-
- int srotg_k(float *, float *, float *, float *);
- int drotg_k(double *, double *, double *, double *);
- int qrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);
- int csrotg_k(float *, float *, float *, float *);
- int zdrotg_k(double *, double *, double *, double *);
- int xqrotg_k(xdouble *, xdouble *, xdouble *, xdouble *);
-
- int srotmg_k(float *, float *, float *, float *, float *);
- int drotmg_k(double *, double *, double *, double *, double *);
- int qrotmg_k(xdouble *, xdouble *, xdouble *, xdouble *, xdouble *);
-
- int srotm_k (BLASLONG, float, BLASLONG, float, BLASLONG, float);
- int drotm_k (BLASLONG, double, BLASLONG, double, BLASLONG, double);
- int qrotm_k (BLASLONG, xdouble, BLASLONG, xdouble, BLASLONG, xdouble);
-
-
- int saxpby_k (BLASLONG, float, float *, BLASLONG, float, float *, BLASLONG);
- int daxpby_k (BLASLONG, double, double *, BLASLONG, double, double *, BLASLONG);
- int caxpby_k (BLASLONG, float, float, float *, BLASLONG, float, float, float *, BLASLONG);
- int zaxpby_k (BLASLONG, double, double, double *, BLASLONG, double, double, double *, BLASLONG);
-
-
- #ifdef __CUDACC__
- }
- #endif
-
- #endif
|