@@ -247,10 +247,16 @@ ifndef NOFORTRAN | |||||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
ifeq ($(F_COMPILER), GFORTRAN) | |||||
ifeq ($(FC), GFORTRAN) | |||||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
ifdef SMP | |||||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
else | |||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
endif | |||||
else | else | ||||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||||
endif | endif | ||||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | ||||
endif | endif | ||||
@@ -3,7 +3,7 @@ | |||||
# | # | ||||
# This library's version | # This library's version | ||||
VERSION = 0.2.10.rc1 | |||||
VERSION = 0.2.10.rc2 | |||||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | ||||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | ||||
@@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads); | |||||
/*Get the build configure on runtime.*/ | /*Get the build configure on runtime.*/ | ||||
char* openblas_get_config(void); | char* openblas_get_config(void); | ||||
/*Get the CPU corename on runtime.*/ | |||||
char* openblas_get_corename(void); | |||||
/* Get the parallelization type which is used by OpenBLAS */ | /* Get the parallelization type which is used by OpenBLAS */ | ||||
int openblas_get_parallel(void); | int openblas_get_parallel(void); | ||||
/* OpenBLAS is compiled for sequential use */ | /* OpenBLAS is compiled for sequential use */ | ||||
@@ -1,12 +1,14 @@ | |||||
TOPDIR = ../.. | TOPDIR = ../.. | ||||
include ../../Makefile.system | include ../../Makefile.system | ||||
USE_GEMM3M = 0 | |||||
ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
USE_GEMM3M = 1 | |||||
USE_GEMM3M = 0 | |||||
endif | endif | ||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
USE_GEMM3M = 1 | |||||
USE_GEMM3M = 0 | |||||
endif | endif | ||||
ifeq ($(ARCH), ia64) | ifeq ($(ARCH), ia64) | ||||
@@ -168,7 +170,7 @@ XBLASOBJS += \ | |||||
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ | xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ | ||||
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) | xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) | ||||
ifdef USE_GEMM3M | |||||
ifeq ($(USE_GEMM3M), 1) | |||||
CBLASOBJS += \ | CBLASOBJS += \ | ||||
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ | cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ | ||||
@@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread | |||||
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) | ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) | ||||
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) | XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) | ||||
ifdef USE_GEMM3M | |||||
ifeq ($(USE_GEMM3M), 1) | |||||
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) | CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) | ||||
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) | CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) | ||||
@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#include "common.h" | #include "common.h" | ||||
#include <string.h> | |||||
static char* openblas_config_str="" | static char* openblas_config_str="" | ||||
#ifdef USE64BITINT | #ifdef USE64BITINT | ||||
"USE64BITINT " | "USE64BITINT " | ||||
@@ -50,10 +52,33 @@ static char* openblas_config_str="" | |||||
#endif | #endif | ||||
#ifdef NO_AFFINITY | #ifdef NO_AFFINITY | ||||
"NO_AFFINITY " | "NO_AFFINITY " | ||||
#endif | |||||
#ifndef DYNAMIC_ARCH | |||||
CHAR_CORENAME | |||||
#endif | #endif | ||||
; | ; | ||||
#ifdef DYNAMIC_ARCH | |||||
char *gotoblas_corename(); | |||||
static char tmp_config_str[256]; | |||||
#endif | |||||
char* CNAME() { | char* CNAME() { | ||||
#ifndef DYNAMIC_ARCH | |||||
return openblas_config_str; | return openblas_config_str; | ||||
#else | |||||
strcpy(tmp_config_str, openblas_config_str); | |||||
strcat(tmp_config_str, gotoblas_corename()); | |||||
return tmp_config_str; | |||||
#endif | |||||
} | } | ||||
char* openblas_get_corename() { | |||||
#ifndef DYNAMIC_ARCH | |||||
return CHAR_CORENAME; | |||||
#else | |||||
return gotoblas_corename(); | |||||
#endif | |||||
} |
@@ -165,7 +165,8 @@ int get_L2_size(void){ | |||||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | ||||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | ||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | |||||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | |||||
defined(PILEDRIVER) || defined(HASWELL) | |||||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | ||||
@@ -73,7 +73,7 @@ | |||||
); | ); | ||||
@gemm3mobjs = ( | @gemm3mobjs = ( | ||||
zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m, | |||||
); | ); | ||||
@@ -85,6 +85,7 @@ | |||||
@misc_no_underscore_objs = ( | @misc_no_underscore_objs = ( | ||||
goto_set_num_threads, | goto_set_num_threads, | ||||
openblas_get_config, | openblas_get_config, | ||||
openblas_get_corename, | |||||
); | ); | ||||
@misc_underscore_objs = ( | @misc_underscore_objs = ( | ||||
@@ -952,6 +952,15 @@ int main(int argc, char *argv[]){ | |||||
#else | #else | ||||
get_cpuconfig(); | get_cpuconfig(); | ||||
#endif | #endif | ||||
#ifdef FORCE | |||||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||||
#else | |||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||||
#endif | |||||
#endif | |||||
break; | break; | ||||
case '2' : /* SMP */ | case '2' : /* SMP */ | ||||
@@ -1,6 +1,8 @@ | |||||
TOPDIR = .. | TOPDIR = .. | ||||
include $(TOPDIR)/Makefile.system | include $(TOPDIR)/Makefile.system | ||||
SUPPORT_GEMM3M = 0 | |||||
ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
SUPPORT_GEMM3M = 0 | SUPPORT_GEMM3M = 0 | ||||
endif | endif | ||||
@@ -124,7 +126,7 @@ ZBLAS3OBJS = \ | |||||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ | zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ | ||||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) | zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) | ||||
ifdef SUPPORT_GEMM3M | |||||
ifeq ($(SUPPORT_GEMM3M), 1) | |||||
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) | CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) | ||||
@@ -182,7 +184,7 @@ XBLAS3OBJS = \ | |||||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | ||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | ||||
ifdef SUPPORT_GEMM3M | |||||
ifeq ($(SUPPORT_GEMM3M), 1) | |||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | ||||
@@ -238,7 +240,7 @@ XBLAS3OBJS = \ | |||||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | ||||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | ||||
ifdef SUPPORT_GEMM3M | |||||
ifeq ($(SUPPORT_GEMM3M), 1) | |||||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | ||||
@@ -42,6 +42,12 @@ | |||||
#include "functable.h" | #include "functable.h" | ||||
#endif | #endif | ||||
#ifdef SMP | |||||
#ifdef __64BIT__ | |||||
#define SMPTEST 1 | |||||
#endif | |||||
#endif | |||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#define ERROR_NAME "QGER " | #define ERROR_NAME "QGER " | ||||
#elif defined DOUBLE | #elif defined DOUBLE | ||||
@@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||||
blasint incy = *INCY; | blasint incy = *INCY; | ||||
blasint lda = *LDA; | blasint lda = *LDA; | ||||
FLOAT *buffer; | FLOAT *buffer; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
FLOAT *buffer; | FLOAT *buffer; | ||||
blasint info, t; | blasint info, t; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
nthreads = num_cpu_avail(2); | nthreads = num_cpu_avail(2); | ||||
@@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); | GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
} else { | } else { | ||||
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); | GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); | ||||
@@ -43,6 +43,14 @@ | |||||
#include "functable.h" | #include "functable.h" | ||||
#endif | #endif | ||||
/* | |||||
#ifdef SMP | |||||
#ifdef __64BIT__ | |||||
#define SMPTEST 1 | |||||
#endif | |||||
#endif | |||||
*/ | |||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#define ERROR_NAME "QSBMV " | #define ERROR_NAME "QSBMV " | ||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA | |||||
#endif | #endif | ||||
}; | }; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | ||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
qsbmv_thread_U, qsbmv_thread_L, | qsbmv_thread_U, qsbmv_thread_L, | ||||
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||||
blasint info; | blasint info; | ||||
int uplo; | int uplo; | ||||
FLOAT *buffer; | FLOAT *buffer; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
FLOAT *buffer; | FLOAT *buffer; | ||||
int uplo; | int uplo; | ||||
blasint info; | blasint info; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
nthreads = num_cpu_avail(2); | nthreads = num_cpu_avail(2); | ||||
if (nthreads == 1) { | if (nthreads == 1) { | ||||
@@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); | (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
} else { | } else { | ||||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); | (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); | ||||
@@ -42,6 +42,13 @@ | |||||
#include "functable.h" | #include "functable.h" | ||||
#endif | #endif | ||||
#ifdef SMP | |||||
#ifdef __64BIT__ | |||||
#define SMPTEST 1 | |||||
#endif | |||||
#endif | |||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#ifndef CONJ | #ifndef CONJ | ||||
#define ERROR_NAME "XGERU " | #define ERROR_NAME "XGERU " | ||||
@@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||||
blasint incy = *INCY; | blasint incy = *INCY; | ||||
blasint lda = *LDA; | blasint lda = *LDA; | ||||
FLOAT *buffer; | FLOAT *buffer; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
FLOAT *buffer; | FLOAT *buffer; | ||||
blasint info, t; | blasint info, t; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
nthreads = num_cpu_avail(2); | nthreads = num_cpu_avail(2); | ||||
if (nthreads == 1) { | if (nthreads == 1) { | ||||
@@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order, | |||||
} | } | ||||
#endif | #endif | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
} else { | } else { | ||||
@@ -43,6 +43,14 @@ | |||||
#include "functable.h" | #include "functable.h" | ||||
#endif | #endif | ||||
/* | |||||
#ifdef SMP | |||||
#ifdef __64BIT__ | |||||
#define SMPTEST 1 | |||||
#endif | |||||
#endif | |||||
*/ | |||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#define ERROR_NAME "XSBMV " | #define ERROR_NAME "XSBMV " | ||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT | |||||
#endif | #endif | ||||
}; | }; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | ||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
xsbmv_thread_U, xsbmv_thread_L, | xsbmv_thread_U, xsbmv_thread_L, | ||||
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||||
blasint info; | blasint info; | ||||
int uplo; | int uplo; | ||||
FLOAT *buffer; | FLOAT *buffer; | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
int nthreads; | int nthreads; | ||||
#endif | #endif | ||||
@@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||||
buffer = (FLOAT *)blas_memory_alloc(1); | buffer = (FLOAT *)blas_memory_alloc(1); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
nthreads = num_cpu_avail(2); | nthreads = num_cpu_avail(2); | ||||
if (nthreads == 1) { | if (nthreads == 1) { | ||||
@@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); | (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); | ||||
#ifdef SMPBUG | |||||
#ifdef SMPTEST | |||||
} else { | } else { | ||||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); | (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); | ||||
@@ -1,3 +1,5 @@ | |||||
USE_GEMM3M = 0 | |||||
ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
USE_GEMM3M = 1 | USE_GEMM3M = 1 | ||||
endif | endif | ||||
@@ -122,7 +124,7 @@ XBLASOBJS += \ | |||||
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | ||||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | ||||
ifdef USE_GEMM3M | |||||
ifeq ($(USE_GEMM3M), 1) | |||||
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) | CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) | ||||
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) | ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) | ||||
@@ -256,7 +258,7 @@ XBLASOBJS += \ | |||||
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | ||||
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) | xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) | ||||
ifdef USE_GEMM3M | |||||
ifeq ($(USE_GEMM3M), 1) | |||||
CBLASOBJS += \ | CBLASOBJS += \ | ||||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | ||||
@@ -0,0 +1,104 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include "common.h" | |||||
#if defined(DSDOT) | |||||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
#else | |||||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
#endif | |||||
{ | |||||
BLASLONG i=0; | |||||
BLASLONG ix=0,iy=0; | |||||
#if defined(DSDOT) | |||||
double dot = 0.0 ; | |||||
#else | |||||
FLOAT dot = 0.0 ; | |||||
#endif | |||||
if ( n < 0 ) return(dot); | |||||
if ( (inc_x == 1) && (inc_y == 1) ) | |||||
{ | |||||
int n1 = n & -4; | |||||
while(i < n1) | |||||
{ | |||||
#if defined(DSDOT) | |||||
dot += (double) y[i] * (double) x[i] | |||||
+ (double) y[i+1] * (double) x[i+1] | |||||
+ (double) y[i+2] * (double) x[i+2] | |||||
+ (double) y[i+3] * (double) x[i+3] ; | |||||
#else | |||||
dot += y[i] * x[i] | |||||
+ y[i+1] * x[i+1] | |||||
+ y[i+2] * x[i+2] | |||||
+ y[i+3] * x[i+3] ; | |||||
#endif | |||||
i+=4 ; | |||||
} | |||||
while(i < n) | |||||
{ | |||||
#if defined(DSDOT) | |||||
dot += (double) y[i] * (double) x[i] ; | |||||
#else | |||||
dot += y[i] * x[i] ; | |||||
#endif | |||||
i++ ; | |||||
} | |||||
return(dot); | |||||
} | |||||
while(i < n) | |||||
{ | |||||
#if defined(DSDOT) | |||||
dot += (double) y[iy] * (double) x[ix] ; | |||||
#else | |||||
dot += y[iy] * x[ix] ; | |||||
#endif | |||||
ix += inc_x ; | |||||
iy += inc_y ; | |||||
i++ ; | |||||
} | |||||
return(dot); | |||||
} | |||||
@@ -714,13 +714,13 @@ static void init_parameter(void) { | |||||
fprintf(stderr, "Core2\n"); | fprintf(stderr, "Core2\n"); | ||||
#endif | #endif | ||||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9); | |||||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9); | |||||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9); | |||||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9); | |||||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; | |||||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; | |||||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; | |||||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; | |||||
#ifdef EXPRECISION | #ifdef EXPRECISION | ||||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9); | |||||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9); | |||||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; | |||||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; | |||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -740,6 +740,23 @@ static void init_parameter(void) { | |||||
#endif | #endif | ||||
#endif | #endif | ||||
#ifdef DUNNINGTON | |||||
#ifdef DEBUG | |||||
fprintf(stderr, "Dunnington\n"); | |||||
#endif | |||||
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||||
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||||
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||||
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||||
#ifdef EXPRECISION | |||||
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||||
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||||
#endif | |||||
#endif | |||||
#ifdef NEHALEM | #ifdef NEHALEM | ||||
#ifdef DEBUG | #ifdef DEBUG | ||||
@@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S | |||||
endif | endif | ||||
ifndef SDOTKERNEL | ifndef SDOTKERNEL | ||||
SDOTKERNEL = dot_sse.S | |||||
SDOTKERNEL = ../generic/dot.c | |||||
endif | endif | ||||
ifndef DSDOTKERNEL | ifndef DSDOTKERNEL | ||||
DSDOTKERNEL = ../arm/dot.c | |||||
DSDOTKERNEL = ../generic/dot.c | |||||
endif | endif | ||||
ifndef DDOTKERNEL | ifndef DDOTKERNEL | ||||
DDOTKERNEL = dot_sse2.S | DDOTKERNEL = dot_sse2.S | ||||
endif | endif | ||||
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||||
DGEMVNKERNEL = dgemv_n_bulldozer.S | DGEMVNKERNEL = dgemv_n_bulldozer.S | ||||
DGEMVTKERNEL = dgemv_t_bulldozer.S | DGEMVTKERNEL = dgemv_t_bulldozer.S | ||||
DAXPYKERNEL = daxpy_bulldozer.S | |||||
DDOTKERNEL = ddot_bulldozer.S | DDOTKERNEL = ddot_bulldozer.S | ||||
DCOPYKERNEL = dcopy_bulldozer.S | DCOPYKERNEL = dcopy_bulldozer.S | ||||
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||||
DGEMVNKERNEL = dgemv_n_bulldozer.S | DGEMVNKERNEL = dgemv_n_bulldozer.S | ||||
DGEMVTKERNEL = dgemv_t_bulldozer.S | DGEMVTKERNEL = dgemv_t_bulldozer.S | ||||
DAXPYKERNEL = daxpy_bulldozer.S | |||||
DDOTKERNEL = ddot_bulldozer.S | DDOTKERNEL = ddot_bulldozer.S | ||||
DCOPYKERNEL = dcopy_bulldozer.S | DCOPYKERNEL = dcopy_bulldozer.S | ||||
@@ -19,7 +19,7 @@ DGEMMINCOPYOBJ = | |||||
DGEMMITCOPYOBJ = | DGEMMITCOPYOBJ = | ||||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | ||||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | ||||
CGEMMKERNEL = zgemm_kernel_4x2_sse3.S | |||||
CGEMMKERNEL = zgemm_kernel_4x2_sse.S | |||||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | ||||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | ||||
CGEMMONCOPY = zgemm_ncopy_2.S | CGEMMONCOPY = zgemm_ncopy_2.S | ||||
@@ -1,7 +1,6 @@ | |||||
SHELL = /bin/sh | SHELL = /bin/sh | ||||
PLAT = _LINUX | PLAT = _LINUX | ||||
DRVOPTS = $(OPTS) | DRVOPTS = $(OPTS) | ||||
LOADER = $(FORTRAN) | |||||
ARCHFLAGS= -ru | ARCHFLAGS= -ru | ||||
#RANLIB = ranlib | #RANLIB = ranlib | ||||
@@ -1,15 +1,19 @@ | |||||
UTEST_CHECK = 1 | UTEST_CHECK = 1 | ||||
TOPDIR = .. | TOPDIR = .. | ||||
include $(TOPDIR)/Makefile.system | |||||
TARGET=openblas_utest | TARGET=openblas_utest | ||||
.PHONY : all | |||||
.NOTPARALLEL : all run_test $(TARGET) | |||||
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 | CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 | ||||
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 | CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 | ||||
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a | CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a | ||||
CFLAGS+=-I$(CUNIT_DIR)/include | |||||
CFLAGS +=-I$(CUNIT_DIR)/include | |||||
include $(TOPDIR)/Makefile.system | |||||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o | OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o | ||||