Author | SHA1 | Message | Date |
---|---|---|---|
|
034ffa93fa | Provide iaxpy and cblas_iaxpy for integer vectors. make INTEGER_PRECISION=1 | 10 years ago |
@@ -114,6 +114,9 @@ NO_AFFINITY = 1 | |||
# Support for IEEE quad precision(it's *real* REAL*16)( under testing) | |||
# QUAD_PRECISION = 1 | |||
# Support for integer matrix and vector (e.g. iaxpy) | |||
# INTEGER_PRECISION = 1 | |||
# Theads are still working for a while after finishing BLAS operation | |||
# to reduce thread activate/deactivate overhead. You can determine | |||
# time out to improve performance. This number should be from 4 to 30 | |||
@@ -309,6 +309,10 @@ CCOMMON_OPT += -DQUAD_PRECISION | |||
NO_EXPRECISION = 1 | |||
endif | |||
ifdef INTEGER_PRECISION | |||
CCOMMON_OPT += -DINTEGER_PRECISION | |||
endif | |||
ifneq ($(ARCH), x86) | |||
ifneq ($(ARCH), x86_64) | |||
NO_EXPRECISION = 1 | |||
@@ -4,6 +4,7 @@ QBLASOBJS_P = $(QBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
CBLASOBJS_P = $(CBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
ZBLASOBJS_P = $(ZBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
XBLASOBJS_P = $(XBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
IBLASOBJS_P = $(IBLASOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
COMMONOBJS_P = $(COMMONOBJS:.$(SUFFIX)=.$(PSUFFIX)) | |||
@@ -22,12 +23,18 @@ BLASOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
BLASOBJS_P += $(QBLASOBJS_P) $(XBLASOBJS_P) | |||
endif | |||
ifdef INTEGER_PRECISION | |||
BLASOBJS += $(IBLASOBJS) | |||
BLASOBJS_P += $(IBLASOBJS_P) | |||
endif | |||
$(SBLASOBJS) $(SBLASOBJS_P) : override CFLAGS += -UDOUBLE -UCOMPLEX | |||
$(DBLASOBJS) $(DBLASOBJS_P) : override CFLAGS += -DDOUBLE -UCOMPLEX | |||
$(QBLASOBJS) $(QBLASOBJS_P) : override CFLAGS += -DXDOUBLE -UCOMPLEX | |||
$(CBLASOBJS) $(CBLASOBJS_P) : override CFLAGS += -UDOUBLE -DCOMPLEX | |||
$(ZBLASOBJS) $(ZBLASOBJS_P) : override CFLAGS += -DDOUBLE -DCOMPLEX | |||
$(XBLASOBJS) $(XBLASOBJS_P) : override CFLAGS += -DXDOUBLE -DCOMPLEX | |||
$(IBLASOBJS) $(IBLASOBJS_P) : override CFLAGS += -DINTEGER -UCOMPLEX | |||
$(SBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(DBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
@@ -35,6 +42,7 @@ $(QBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(CBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(ZBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(XBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
$(IBLASOBJS_P) : override CFLAGS += -DPROFILE $(COMMON_PROF) | |||
libs :: $(BLASOBJS) $(COMMONOBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
@@ -276,6 +276,11 @@ typedef int blasint; | |||
#define SIZE 8 | |||
#define BASE_SHIFT 3 | |||
#define ZBASE_SHIFT 4 | |||
#elif defined(INTEGER) //extend for integer matrix | |||
#define FLOAT int | |||
#define SIZE 4 | |||
#define BASE_SHIFT 2 | |||
#define ZBASE_SHIFT 3 | |||
#else | |||
#define FLOAT float | |||
#define SIZE 4 | |||
@@ -0,0 +1,9 @@ | |||
#ifndef COMMON_I_H | |||
#define COMMON_I_H | |||
#ifndef DYNAMIC_ARCH | |||
#define IAXPYU_K iaxpy_k | |||
#else | |||
#error | |||
#endif | |||
#endif |
@@ -93,6 +93,7 @@ openblas_complex_xdouble BLASFUNC(xdotc) (blasint *, xdouble *, blasint *, xdo | |||
void BLASFUNC(saxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
void BLASFUNC(daxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
void BLASFUNC(iaxpy) (blasint *, int *, int *, blasint *, int *, blasint *); | |||
void BLASFUNC(qaxpy) (blasint *, xdouble *, xdouble *, blasint *, xdouble *, blasint *); | |||
void BLASFUNC(caxpy) (blasint *, float *, float *, blasint *, float *, blasint *); | |||
void BLASFUNC(zaxpy) (blasint *, double *, double *, blasint *, double *, blasint *); | |||
@@ -60,6 +60,8 @@ int daxpy_k (BLASLONG, BLASLONG, BLASLONG, double, | |||
double *, BLASLONG, double *, BLASLONG, double *, BLASLONG); | |||
int qaxpy_k (BLASLONG, BLASLONG, BLASLONG, xdouble, | |||
xdouble *, BLASLONG, xdouble *, BLASLONG, xdouble *, BLASLONG); | |||
int iaxpy_k (BLASLONG, BLASLONG, BLASLONG, int, | |||
int *, BLASLONG, int *, BLASLONG, int *, BLASLONG); | |||
int caxpy_k (BLASLONG, BLASLONG, BLASLONG, float, float, | |||
float *, BLASLONG, float *, BLASLONG, float *, BLASLONG); | |||
int zaxpy_k (BLASLONG, BLASLONG, BLASLONG, double, double, | |||
@@ -47,6 +47,10 @@ | |||
#include "common_z.h" | |||
#include "common_x.h" | |||
#ifdef INTEGER_PRECISION | |||
#include "common_i.h" | |||
#endif | |||
#ifndef COMPLEX | |||
#ifdef XDOUBLE | |||
@@ -635,6 +639,9 @@ | |||
#define OMATCOPY_K_CT DOMATCOPY_K_CT | |||
#define OMATCOPY_K_RT DOMATCOPY_K_RT | |||
#define GEADD_K DGEADD_K | |||
#elif defined(INTEGER) | |||
#define AXPYU_K IAXPYU_K | |||
#else | |||
#define AMAX_K SAMAX_K | |||
@@ -65,6 +65,7 @@ extern int blas_omp_linked; | |||
#define BLAS_XDOUBLE 0x0002U | |||
#define BLAS_REAL 0x0000U | |||
#define BLAS_COMPLEX 0x0004U | |||
#define BLAS_INTEGER 0x0008U | |||
#define BLAS_TRANSA 0x0030U /* 2bit */ | |||
#define BLAS_TRANSA_N 0x0000U | |||
@@ -189,6 +189,20 @@ static void legacy_exec(void *func, int mode, blas_arg_t *args, void *sb){ | |||
args -> b, args -> ldb, | |||
args -> c, args -> ldc, sb); | |||
} else | |||
#endif | |||
#ifdef INTEGER_PRECISION | |||
if (mode & BLAS_INTEGER){ | |||
/* REAL / Extended Double */ | |||
void (*afunc)(BLASLONG, BLASLONG, BLASLONG, int, | |||
int *, BLASLONG, int *, BLASLONG, | |||
int *, BLASLONG, void *) = func; | |||
afunc(args -> m, args -> n, args -> k, | |||
((int *)args -> alpha)[0], | |||
args -> a, args -> lda, | |||
args -> b, args -> ldb, | |||
args -> c, args -> ldc, sb); | |||
} else | |||
#endif | |||
if (mode & BLAS_DOUBLE){ | |||
/* REAL / Double */ | |||
@@ -253,6 +253,15 @@ XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | |||
endif | |||
endif | |||
ifdef INTEGER_PRECISION | |||
IBLAS1OBJS = \ | |||
iaxpy.$(SUFFIX) | |||
IBLAS2OBJS = | |||
IBLAS3OBJS = | |||
endif | |||
endif | |||
HPLOBJS = dgemm.$(SUFFIX) dtrsm.$(SUFFIX) \ | |||
@@ -343,6 +352,9 @@ CZBLAS3OBJS = \ | |||
cblas_zomatcopy.$(SUFFIX) cblas_zimatcopy.$(SUFFIX) \ | |||
cblas_zgeadd.$(SUFFIX) | |||
CIBLAS1OBJS = \ | |||
cblas_iaxpy.$(SUFFIX) | |||
ifeq ($(SUPPORT_GEMM3M), 1) | |||
@@ -372,6 +384,10 @@ ZBLAS1OBJS += $(CZBLAS1OBJS) | |||
ZBLAS2OBJS += $(CZBLAS2OBJS) | |||
ZBLAS3OBJS += $(CZBLAS3OBJS) | |||
IBLAS1OBJS += $(CIBLAS1OBJS) | |||
IBLAS2OBJS += $(CIBLAS2OBJS) | |||
IBLAS3OBJS += $(CIBLAS3OBJS) | |||
endif | |||
SBLASOBJS = $(SBLAS1OBJS) $(SBLAS2OBJS) $(SBLAS3OBJS) | |||
@@ -380,6 +396,7 @@ QBLASOBJS = $(QBLAS1OBJS) $(QBLAS2OBJS) $(QBLAS3OBJS) | |||
CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | |||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | |||
IBLASOBJS = $(IBLAS1OBJS) $(IBLAS2OBJS) $(IBLAS3OBJS) | |||
#SLAPACKOBJS = \ | |||
# sgetrf.$(SUFFIX) sgetrs.$(SUFFIX) spotrf.$(SUFFIX) sgetf2.$(SUFFIX) \ | |||
@@ -458,6 +475,10 @@ ifdef QUAD_PRECISION | |||
FUNCOBJS += $(QBLASOBJS) $(XBLASOBJS) | |||
endif | |||
ifdef INTEGER_PRECISION | |||
FUNCOBJS += $(IBLASOBJS) | |||
endif | |||
FUNCALLFILES = $(FUNCOBJS:.$(SUFFIX)=) | |||
include $(TOPDIR)/Makefile.tail | |||
@@ -476,17 +497,18 @@ endif | |||
clean :: | |||
@rm -f functable.h | |||
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) | |||
level1 : $(SBLAS1OBJS) $(DBLAS1OBJS) $(QBLAS1OBJS) $(CBLAS1OBJS) $(ZBLAS1OBJS) $(XBLAS1OBJS) $(IBLAS1OBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) | |||
level2 : $(SBLAS2OBJS) $(DBLAS2OBJS) $(QBLAS2OBJS) $(CBLAS2OBJS) $(ZBLAS2OBJS) $(XBLAS2OBJS) $(IBLAS2OBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) | |||
level3 : $(SBLAS3OBJS) $(DBLAS3OBJS) $(QBLAS3OBJS) $(CBLAS3OBJS) $(ZBLAS3OBJS) $(XBLAS3OBJS) $(IBLAS3OBJS) | |||
$(AR) $(ARFLAGS) -ru $(TOPDIR)/$(LIBNAME) $^ | |||
$(CSBLASOBJS) $(CSBLASOBJS_P) $(CDBLASOBJS) $(CDBLASOBJS_P) $(CQBLASOBJS) $(CQBLASOBJS_P) \ | |||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
$(CCBLASOBJS) $(CCBLASOBJS_P) $(CZBLASOBJS) $(CZBLASOBJS_P) $(CXBLASOBJS) $(CXBLASOBJS_P) \ | |||
$(CIBLASOBJS) $(CIBLASOBJS_P) : override CFLAGS += -DCBLAS | |||
srot.$(SUFFIX) srot.$(PSUFFIX) : rot.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
@@ -725,6 +747,9 @@ saxpy.$(SUFFIX) saxpy.$(PSUFFIX) : axpy.c | |||
daxpy.$(SUFFIX) daxpy.$(PSUFFIX) : axpy.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
iaxpy.$(SUFFIX) iaxpy.$(PSUFFIX) : axpy.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
qaxpy.$(SUFFIX) qaxpy.$(PSUFFIX) : axpy.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
@@ -1437,6 +1462,9 @@ cblas_saxpy.$(SUFFIX) cblas_saxpy.$(PSUFFIX) : axpy.c | |||
cblas_daxpy.$(SUFFIX) cblas_daxpy.$(PSUFFIX) : axpy.c | |||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
cblas_iaxpy.$(SUFFIX) cblas_iaxpy.$(PSUFFIX) : axpy.c | |||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
cblas_caxpy.$(SUFFIX) cblas_caxpy.$(PSUFFIX) : zaxpy.c | |||
$(CC) $(CFLAGS) -DCBLAS -c $< -o $(@F) | |||
@@ -103,6 +103,8 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
mode = BLAS_XDOUBLE | BLAS_REAL; | |||
#elif defined(DOUBLE) | |||
mode = BLAS_DOUBLE | BLAS_REAL; | |||
#elif defined(INTEGER) | |||
mode = BLAS_INTEGER | BLAS_REAL; | |||
#else | |||
mode = BLAS_SINGLE | BLAS_REAL; | |||
#endif | |||
@@ -210,6 +210,10 @@ ifndef XAXPYKERNEL | |||
XAXPYKERNEL = zaxpy.S | |||
endif | |||
ifndef IAXPYKERNEL | |||
IAXPYKERNEL = ../generic/iaxpy.c | |||
endif | |||
### COPY ### | |||
ifndef SCOPYKERNEL | |||
@@ -471,6 +475,9 @@ QBLASOBJS += \ | |||
qasum_k$(TSUFFIX).$(SUFFIX) qaxpy_k$(TSUFFIX).$(SUFFIX) qcopy_k$(TSUFFIX).$(SUFFIX) qdot_k$(TSUFFIX).$(SUFFIX) \ | |||
qnrm2_k$(TSUFFIX).$(SUFFIX) qrot_k$(TSUFFIX).$(SUFFIX) qscal_k$(TSUFFIX).$(SUFFIX) qswap_k$(TSUFFIX).$(SUFFIX) | |||
IBLASOBJS += \ | |||
iaxpy_k$(TSUFFIX).$(SUFFIX) | |||
CBLASOBJS += \ | |||
camax_k$(TSUFFIX).$(SUFFIX) camin_k$(TSUFFIX).$(SUFFIX) icamax_k$(TSUFFIX).$(SUFFIX) icamin_k$(TSUFFIX).$(SUFFIX) \ | |||
casum_k$(TSUFFIX).$(SUFFIX) caxpy_k$(TSUFFIX).$(SUFFIX) caxpyc_k$(TSUFFIX).$(SUFFIX) ccopy_k$(TSUFFIX).$(SUFFIX) \ | |||
@@ -645,6 +652,9 @@ $(KDIR)daxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)daxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KE | |||
$(KDIR)qaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)qaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(QAXPYKERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DXDOUBLE $< -o $@ | |||
$(KDIR)iaxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)iaxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(IAXPYKERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UCOMPLEX -DINTEGER $< -o $@ | |||
$(KDIR)caxpy_k$(TSUFFIX).$(SUFFIX) $(KDIR)caxpy_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(CAXPYKERNEL) | |||
$(CC) -c $(CFLAGS) -DCOMPLEX -DCOMPLEX -UCONJ -UDOUBLE $< -o $@ | |||
@@ -0,0 +1,52 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2015, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "common.h" | |||
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, int da, int *x, BLASLONG inc_x, int *y, BLASLONG inc_y, int *dummy, BLASLONG dummy2) | |||
{ | |||
BLASLONG i=0; | |||
BLASLONG ix,iy; | |||
if ( n < 0 ) return(0); | |||
if ( da == 0 ) return(0); | |||
ix = 0; | |||
iy = 0; | |||
while(i < n) | |||
{ | |||
y[iy] += da * x[ix] ; | |||
ix += inc_x ; | |||
iy += inc_y ; | |||
i++ ; | |||
} | |||
return 0; | |||
} |