@@ -247,10 +247,16 @@ ifndef NOFORTRAN | |||
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
ifeq ($(F_COMPILER), GFORTRAN) | |||
ifeq ($(FC), GFORTRAN) | |||
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
ifdef SMP | |||
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
else | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
else | |||
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc | |||
endif | |||
@@ -3,7 +3,7 @@ | |||
# | |||
# This library's version | |||
VERSION = 0.2.10.rc1 | |||
VERSION = 0.2.10.rc2 | |||
# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a | |||
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library | |||
@@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads); | |||
/*Get the build configure on runtime.*/ | |||
char* openblas_get_config(void); | |||
/*Get the CPU corename on runtime.*/ | |||
char* openblas_get_corename(void); | |||
/* Get the parallelization type which is used by OpenBLAS */ | |||
int openblas_get_parallel(void); | |||
/* OpenBLAS is compiled for sequential use */ | |||
@@ -1,12 +1,14 @@ | |||
TOPDIR = ../.. | |||
include ../../Makefile.system | |||
USE_GEMM3M = 0 | |||
ifeq ($(ARCH), x86) | |||
USE_GEMM3M = 1 | |||
USE_GEMM3M = 0 | |||
endif | |||
ifeq ($(ARCH), x86_64) | |||
USE_GEMM3M = 1 | |||
USE_GEMM3M = 0 | |||
endif | |||
ifeq ($(ARCH), ia64) | |||
@@ -168,7 +170,7 @@ XBLASOBJS += \ | |||
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ | |||
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) | |||
ifdef USE_GEMM3M | |||
ifeq ($(USE_GEMM3M), 1) | |||
CBLASOBJS += \ | |||
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ | |||
@@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread | |||
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) | |||
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) | |||
ifdef USE_GEMM3M | |||
ifeq ($(USE_GEMM3M), 1) | |||
CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) | |||
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) | |||
@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#include <string.h> | |||
static char* openblas_config_str="" | |||
#ifdef USE64BITINT | |||
"USE64BITINT " | |||
@@ -50,10 +52,33 @@ static char* openblas_config_str="" | |||
#endif | |||
#ifdef NO_AFFINITY | |||
"NO_AFFINITY " | |||
#endif | |||
#ifndef DYNAMIC_ARCH | |||
CHAR_CORENAME | |||
#endif | |||
; | |||
#ifdef DYNAMIC_ARCH | |||
char *gotoblas_corename(); | |||
static char tmp_config_str[256]; | |||
#endif | |||
char* CNAME() { | |||
#ifndef DYNAMIC_ARCH | |||
return openblas_config_str; | |||
#else | |||
strcpy(tmp_config_str, openblas_config_str); | |||
strcat(tmp_config_str, gotoblas_corename()); | |||
return tmp_config_str; | |||
#endif | |||
} | |||
char* openblas_get_corename() { | |||
#ifndef DYNAMIC_ARCH | |||
return CHAR_CORENAME; | |||
#else | |||
return gotoblas_corename(); | |||
#endif | |||
} |
@@ -165,7 +165,8 @@ int get_L2_size(void){ | |||
#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ | |||
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ | |||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) | |||
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \ | |||
defined(PILEDRIVER) || defined(HASWELL) | |||
cpuid(0x80000006, &eax, &ebx, &ecx, &edx); | |||
@@ -73,7 +73,7 @@ | |||
); | |||
@gemm3mobjs = ( | |||
zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m, | |||
); | |||
@@ -85,6 +85,7 @@ | |||
@misc_no_underscore_objs = ( | |||
goto_set_num_threads, | |||
openblas_get_config, | |||
openblas_get_corename, | |||
); | |||
@misc_underscore_objs = ( | |||
@@ -952,6 +952,15 @@ int main(int argc, char *argv[]){ | |||
#else | |||
get_cpuconfig(); | |||
#endif | |||
#ifdef FORCE | |||
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME); | |||
#else | |||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
printf("#define CHAR_CORENAME \"%s\"\n", get_corename()); | |||
#endif | |||
#endif | |||
break; | |||
case '2' : /* SMP */ | |||
@@ -1,6 +1,8 @@ | |||
TOPDIR = .. | |||
include $(TOPDIR)/Makefile.system | |||
SUPPORT_GEMM3M = 0 | |||
ifeq ($(ARCH), x86) | |||
SUPPORT_GEMM3M = 0 | |||
endif | |||
@@ -124,7 +126,7 @@ ZBLAS3OBJS = \ | |||
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ | |||
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) | |||
ifdef SUPPORT_GEMM3M | |||
ifeq ($(SUPPORT_GEMM3M), 1) | |||
CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) | |||
@@ -182,7 +184,7 @@ XBLAS3OBJS = \ | |||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | |||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | |||
ifdef SUPPORT_GEMM3M | |||
ifeq ($(SUPPORT_GEMM3M), 1) | |||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | |||
@@ -238,7 +240,7 @@ XBLAS3OBJS = \ | |||
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ | |||
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) | |||
ifdef SUPPORT_GEMM3M | |||
ifeq ($(SUPPORT_GEMM3M), 1) | |||
XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) | |||
@@ -42,6 +42,12 @@ | |||
#include "functable.h" | |||
#endif | |||
#ifdef SMP | |||
#ifdef __64BIT__ | |||
#define SMPTEST 1 | |||
#endif | |||
#endif | |||
#ifdef XDOUBLE | |||
#define ERROR_NAME "QGER " | |||
#elif defined DOUBLE | |||
@@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||
blasint incy = *INCY; | |||
blasint lda = *LDA; | |||
FLOAT *buffer; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
FLOAT *buffer; | |||
blasint info, t; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
buffer = (FLOAT *)blas_memory_alloc(1); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
nthreads = num_cpu_avail(2); | |||
@@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
} else { | |||
GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); | |||
@@ -43,6 +43,14 @@ | |||
#include "functable.h" | |||
#endif | |||
/* | |||
#ifdef SMP | |||
#ifdef __64BIT__ | |||
#define SMPTEST 1 | |||
#endif | |||
#endif | |||
*/ | |||
#ifdef XDOUBLE | |||
#define ERROR_NAME "QSBMV " | |||
#elif defined(DOUBLE) | |||
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA | |||
#endif | |||
}; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | |||
#ifdef XDOUBLE | |||
qsbmv_thread_U, qsbmv_thread_L, | |||
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||
blasint info; | |||
int uplo; | |||
FLOAT *buffer; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
FLOAT *buffer; | |||
int uplo; | |||
blasint info; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
buffer = (FLOAT *)blas_memory_alloc(1); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
nthreads = num_cpu_avail(2); | |||
if (nthreads == 1) { | |||
@@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
} else { | |||
(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); | |||
@@ -42,6 +42,13 @@ | |||
#include "functable.h" | |||
#endif | |||
#ifdef SMP | |||
#ifdef __64BIT__ | |||
#define SMPTEST 1 | |||
#endif | |||
#endif | |||
#ifdef XDOUBLE | |||
#ifndef CONJ | |||
#define ERROR_NAME "XGERU " | |||
@@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha, | |||
blasint incy = *INCY; | |||
blasint lda = *LDA; | |||
FLOAT *buffer; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
FLOAT *buffer; | |||
blasint info, t; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
buffer = (FLOAT *)blas_memory_alloc(1); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
nthreads = num_cpu_avail(2); | |||
if (nthreads == 1) { | |||
@@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order, | |||
} | |||
#endif | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
} else { | |||
@@ -43,6 +43,14 @@ | |||
#include "functable.h" | |||
#endif | |||
/* | |||
#ifdef SMP | |||
#ifdef __64BIT__ | |||
#define SMPTEST 1 | |||
#endif | |||
#endif | |||
*/ | |||
#ifdef XDOUBLE | |||
#define ERROR_NAME "XSBMV " | |||
#elif defined(DOUBLE) | |||
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT | |||
#endif | |||
}; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { | |||
#ifdef XDOUBLE | |||
xsbmv_thread_U, xsbmv_thread_L, | |||
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||
blasint info; | |||
int uplo; | |||
FLOAT *buffer; | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
int nthreads; | |||
#endif | |||
@@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||
buffer = (FLOAT *)blas_memory_alloc(1); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
nthreads = num_cpu_avail(2); | |||
if (nthreads == 1) { | |||
@@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint * | |||
(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); | |||
#ifdef SMPBUG | |||
#ifdef SMPTEST | |||
} else { | |||
(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); | |||
@@ -1,3 +1,5 @@ | |||
USE_GEMM3M = 0 | |||
ifeq ($(ARCH), x86) | |||
USE_GEMM3M = 1 | |||
endif | |||
@@ -122,7 +124,7 @@ XBLASOBJS += \ | |||
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ | |||
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ | |||
ifdef USE_GEMM3M | |||
ifeq ($(USE_GEMM3M), 1) | |||
CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) | |||
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) | |||
@@ -256,7 +258,7 @@ XBLASOBJS += \ | |||
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ | |||
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) | |||
ifdef USE_GEMM3M | |||
ifeq ($(USE_GEMM3M), 1) | |||
CBLASOBJS += \ | |||
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ | |||
@@ -0,0 +1,104 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2014, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include "common.h" | |||
#if defined(DSDOT) | |||
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
#else | |||
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
#endif | |||
{ | |||
BLASLONG i=0; | |||
BLASLONG ix=0,iy=0; | |||
#if defined(DSDOT) | |||
double dot = 0.0 ; | |||
#else | |||
FLOAT dot = 0.0 ; | |||
#endif | |||
if ( n < 0 ) return(dot); | |||
if ( (inc_x == 1) && (inc_y == 1) ) | |||
{ | |||
int n1 = n & -4; | |||
while(i < n1) | |||
{ | |||
#if defined(DSDOT) | |||
dot += (double) y[i] * (double) x[i] | |||
+ (double) y[i+1] * (double) x[i+1] | |||
+ (double) y[i+2] * (double) x[i+2] | |||
+ (double) y[i+3] * (double) x[i+3] ; | |||
#else | |||
dot += y[i] * x[i] | |||
+ y[i+1] * x[i+1] | |||
+ y[i+2] * x[i+2] | |||
+ y[i+3] * x[i+3] ; | |||
#endif | |||
i+=4 ; | |||
} | |||
while(i < n) | |||
{ | |||
#if defined(DSDOT) | |||
dot += (double) y[i] * (double) x[i] ; | |||
#else | |||
dot += y[i] * x[i] ; | |||
#endif | |||
i++ ; | |||
} | |||
return(dot); | |||
} | |||
while(i < n) | |||
{ | |||
#if defined(DSDOT) | |||
dot += (double) y[iy] * (double) x[ix] ; | |||
#else | |||
dot += y[iy] * x[ix] ; | |||
#endif | |||
ix += inc_x ; | |||
iy += inc_y ; | |||
i++ ; | |||
} | |||
return(dot); | |||
} | |||
@@ -714,13 +714,13 @@ static void init_parameter(void) { | |||
fprintf(stderr, "Core2\n"); | |||
#endif | |||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9); | |||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9); | |||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9); | |||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9); | |||
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8; | |||
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8; | |||
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4; | |||
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4; | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9); | |||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9); | |||
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8; | |||
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4; | |||
#endif | |||
#endif | |||
@@ -740,6 +740,23 @@ static void init_parameter(void) { | |||
#endif | |||
#endif | |||
#ifdef DUNNINGTON | |||
#ifdef DEBUG | |||
fprintf(stderr, "Dunnington\n"); | |||
#endif | |||
TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8; | |||
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8; | |||
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4; | |||
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4; | |||
#ifdef EXPRECISION | |||
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8; | |||
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4; | |||
#endif | |||
#endif | |||
#ifdef NEHALEM | |||
#ifdef DEBUG | |||
@@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S | |||
endif | |||
ifndef SDOTKERNEL | |||
SDOTKERNEL = dot_sse.S | |||
SDOTKERNEL = ../generic/dot.c | |||
endif | |||
ifndef DSDOTKERNEL | |||
DSDOTKERNEL = ../arm/dot.c | |||
DSDOTKERNEL = ../generic/dot.c | |||
endif | |||
ifndef DDOTKERNEL | |||
DDOTKERNEL = dot_sse2.S | |||
endif | |||
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||
DGEMVNKERNEL = dgemv_n_bulldozer.S | |||
DGEMVTKERNEL = dgemv_t_bulldozer.S | |||
DAXPYKERNEL = daxpy_bulldozer.S | |||
DDOTKERNEL = ddot_bulldozer.S | |||
DCOPYKERNEL = dcopy_bulldozer.S | |||
@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S | |||
DGEMVNKERNEL = dgemv_n_bulldozer.S | |||
DGEMVTKERNEL = dgemv_t_bulldozer.S | |||
DAXPYKERNEL = daxpy_bulldozer.S | |||
DDOTKERNEL = ddot_bulldozer.S | |||
DCOPYKERNEL = dcopy_bulldozer.S | |||
@@ -19,7 +19,7 @@ DGEMMINCOPYOBJ = | |||
DGEMMITCOPYOBJ = | |||
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) | |||
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) | |||
CGEMMKERNEL = zgemm_kernel_4x2_sse3.S | |||
CGEMMKERNEL = zgemm_kernel_4x2_sse.S | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPY = zgemm_ncopy_2.S | |||
@@ -1,7 +1,6 @@ | |||
SHELL = /bin/sh | |||
PLAT = _LINUX | |||
DRVOPTS = $(OPTS) | |||
LOADER = $(FORTRAN) | |||
ARCHFLAGS= -ru | |||
#RANLIB = ranlib | |||
@@ -1,15 +1,19 @@ | |||
UTEST_CHECK = 1 | |||
TOPDIR = .. | |||
include $(TOPDIR)/Makefile.system | |||
TARGET=openblas_utest | |||
.PHONY : all | |||
.NOTPARALLEL : all run_test $(TARGET) | |||
CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 | |||
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 | |||
CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a | |||
CFLAGS+=-I$(CUNIT_DIR)/include | |||
CFLAGS +=-I$(CUNIT_DIR)/include | |||
include $(TOPDIR)/Makefile.system | |||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o | |||