Browse Source

Merge branch 'develop'

tags/v0.2.10.rc2^0
Zhang Xianyi 11 years ago
parent
commit
f9991fd5f6
22 changed files with 246 additions and 46 deletions
  1. +7
    -1
      Makefile
  2. +1
    -1
      Makefile.rule
  3. +3
    -0
      cblas.h
  4. +6
    -4
      driver/level3/Makefile
  5. +25
    -0
      driver/others/openblas_get_config.c
  6. +2
    -1
      driver/others/parameter.c
  7. +2
    -1
      exports/gensymbol
  8. +9
    -0
      getarch.c
  9. +5
    -3
      interface/Makefile
  10. +10
    -4
      interface/ger.c
  11. +13
    -5
      interface/sbmv.c
  12. +11
    -4
      interface/zger.c
  13. +12
    -4
      interface/zsbmv.c
  14. +4
    -2
      kernel/Makefile.L3
  15. +104
    -0
      kernel/generic/dot.c
  16. +23
    -6
      kernel/setparam-ref.c
  17. +2
    -4
      kernel/x86_64/KERNEL
  18. +0
    -1
      kernel/x86_64/KERNEL.BULLDOZER
  19. +0
    -1
      kernel/x86_64/KERNEL.PILEDRIVER
  20. +1
    -1
      kernel/x86_64/KERNEL.PRESCOTT
  21. +0
    -1
      make.inc
  22. +6
    -2
      utest/Makefile

+ 7
- 1
Makefile View File

@@ -247,10 +247,16 @@ ifndef NOFORTRAN
-@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "SUFFIX = $(SUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "PSUFFIX = $(PSUFFIX)" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "CEXTRALIB = $(EXTRALIB)" >> $(NETLIB_LAPACK_DIR)/make.inc
ifeq ($(F_COMPILER), GFORTRAN)
ifeq ($(FC), GFORTRAN)
-@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TIMER = INT_ETIME" >> $(NETLIB_LAPACK_DIR)/make.inc
ifdef SMP
-@echo "LOADER = $(FC) -pthread" >> $(NETLIB_LAPACK_DIR)/make.inc
else
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif
else else
-@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc -@echo "TIMER = NONE" >> $(NETLIB_LAPACK_DIR)/make.inc
-@echo "LOADER = $(FC)" >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif
-@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc -@cat make.inc >> $(NETLIB_LAPACK_DIR)/make.inc
endif endif


+ 1
- 1
Makefile.rule View File

@@ -3,7 +3,7 @@
# #


# This library's version # This library's version
VERSION = 0.2.10.rc1
VERSION = 0.2.10.rc2


# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a # If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library # and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library


+ 3
- 0
cblas.h View File

@@ -16,6 +16,9 @@ void goto_set_num_threads(int num_threads);
/*Get the build configure on runtime.*/ /*Get the build configure on runtime.*/
char* openblas_get_config(void); char* openblas_get_config(void);


/*Get the CPU corename on runtime.*/
char* openblas_get_corename(void);

/* Get the parallelization type which is used by OpenBLAS */ /* Get the parallelization type which is used by OpenBLAS */
int openblas_get_parallel(void); int openblas_get_parallel(void);
/* OpenBLAS is compiled for sequential use */ /* OpenBLAS is compiled for sequential use */


+ 6
- 4
driver/level3/Makefile View File

@@ -1,12 +1,14 @@
TOPDIR = ../.. TOPDIR = ../..
include ../../Makefile.system include ../../Makefile.system


USE_GEMM3M = 0

ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
USE_GEMM3M = 1
USE_GEMM3M = 0
endif endif


ifeq ($(ARCH), x86_64) ifeq ($(ARCH), x86_64)
USE_GEMM3M = 1
USE_GEMM3M = 0
endif endif


ifeq ($(ARCH), ia64) ifeq ($(ARCH), ia64)
@@ -168,7 +170,7 @@ XBLASOBJS += \
xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \ xher2k_kernel_UN.$(SUFFIX) xher2k_kernel_UC.$(SUFFIX) \
xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX) xher2k_kernel_LN.$(SUFFIX) xher2k_kernel_LC.$(SUFFIX)


ifdef USE_GEMM3M
ifeq ($(USE_GEMM3M), 1)


CBLASOBJS += \ CBLASOBJS += \
cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \ cgemm3m_nn.$(SUFFIX) cgemm3m_cn.$(SUFFIX) cgemm3m_tn.$(SUFFIX) cgemm3m_nc.$(SUFFIX) \
@@ -239,7 +241,7 @@ CBLASOBJS += cherk_thread_UN.$(SUFFIX) cherk_thread_UC.$(SUFFIX) cherk_thread
ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX) ZBLASOBJS += zherk_thread_UN.$(SUFFIX) zherk_thread_UC.$(SUFFIX) zherk_thread_LN.$(SUFFIX) zherk_thread_LC.$(SUFFIX)
XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX) XBLASOBJS += xherk_thread_UN.$(SUFFIX) xherk_thread_UC.$(SUFFIX) xherk_thread_LN.$(SUFFIX) xherk_thread_LC.$(SUFFIX)


ifdef USE_GEMM3M
ifeq ($(USE_GEMM3M), 1)


CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX) CBLASOBJS += cgemm3m_thread_nn.$(SUFFIX) cgemm3m_thread_nt.$(SUFFIX) cgemm3m_thread_nr.$(SUFFIX) cgemm3m_thread_nc.$(SUFFIX)
CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX) CBLASOBJS += cgemm3m_thread_tn.$(SUFFIX) cgemm3m_thread_tt.$(SUFFIX) cgemm3m_thread_tr.$(SUFFIX) cgemm3m_thread_tc.$(SUFFIX)


+ 25
- 0
driver/others/openblas_get_config.c View File

@@ -32,6 +32,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#include "common.h" #include "common.h"


#include <string.h>

static char* openblas_config_str="" static char* openblas_config_str=""
#ifdef USE64BITINT #ifdef USE64BITINT
"USE64BITINT " "USE64BITINT "
@@ -50,10 +52,33 @@ static char* openblas_config_str=""
#endif #endif
#ifdef NO_AFFINITY #ifdef NO_AFFINITY
"NO_AFFINITY " "NO_AFFINITY "
#endif
#ifndef DYNAMIC_ARCH
CHAR_CORENAME
#endif #endif
; ;


#ifdef DYNAMIC_ARCH
char *gotoblas_corename();
static char tmp_config_str[256];
#endif


char* CNAME() { char* CNAME() {
#ifndef DYNAMIC_ARCH
return openblas_config_str; return openblas_config_str;
#else
strcpy(tmp_config_str, openblas_config_str);
strcat(tmp_config_str, gotoblas_corename());
return tmp_config_str;
#endif
} }



char* openblas_get_corename() {
#ifndef DYNAMIC_ARCH
return CHAR_CORENAME;
#else
return gotoblas_corename();
#endif
}

+ 2
- 1
driver/others/parameter.c View File

@@ -165,7 +165,8 @@ int get_L2_size(void){


#if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \ #if defined(ATHLON) || defined(OPTERON) || defined(BARCELONA) || defined(BOBCAT) || defined(BULLDOZER) || \
defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \ defined(CORE_PRESCOTT) || defined(CORE_CORE2) || defined(PENRYN) || defined(DUNNINGTON) || \
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC)
defined(CORE_NEHALEM) || defined(CORE_SANDYBRIDGE) || defined(ATOM) || defined(GENERIC) || \
defined(PILEDRIVER) || defined(HASWELL)


cpuid(0x80000006, &eax, &ebx, &ecx, &edx); cpuid(0x80000006, &eax, &ebx, &ecx, &edx);




+ 2
- 1
exports/gensymbol View File

@@ -73,7 +73,7 @@
); );


@gemm3mobjs = ( @gemm3mobjs = (
zgemm3m, cgemm3m, zsymm3m, csymm3m, zhemm3m, chemm3m,
); );




@@ -85,6 +85,7 @@
@misc_no_underscore_objs = ( @misc_no_underscore_objs = (
goto_set_num_threads, goto_set_num_threads,
openblas_get_config, openblas_get_config,
openblas_get_corename,
); );


@misc_underscore_objs = ( @misc_underscore_objs = (


+ 9
- 0
getarch.c View File

@@ -952,6 +952,15 @@ int main(int argc, char *argv[]){
#else #else
get_cpuconfig(); get_cpuconfig();
#endif #endif

#ifdef FORCE
printf("#define CHAR_CORENAME \"%s\"\n", CORENAME);
#else
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__)
printf("#define CHAR_CORENAME \"%s\"\n", get_corename());
#endif
#endif

break; break;


case '2' : /* SMP */ case '2' : /* SMP */


+ 5
- 3
interface/Makefile View File

@@ -1,6 +1,8 @@
TOPDIR = .. TOPDIR = ..
include $(TOPDIR)/Makefile.system include $(TOPDIR)/Makefile.system


SUPPORT_GEMM3M = 0

ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
SUPPORT_GEMM3M = 0 SUPPORT_GEMM3M = 0
endif endif
@@ -124,7 +126,7 @@ ZBLAS3OBJS = \
zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \ zhemm.$(SUFFIX) zherk.$(SUFFIX) zher2k.$(SUFFIX) \
zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX) zomatcopy.$(SUFFIX) zimatcopy.$(SUFFIX)


ifdef SUPPORT_GEMM3M
ifeq ($(SUPPORT_GEMM3M), 1)


CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX) CBLAS3OBJS += cgemm3m.$(SUFFIX) csymm3m.$(SUFFIX) chemm3m.$(SUFFIX)


@@ -182,7 +184,7 @@ XBLAS3OBJS = \
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)


ifdef SUPPORT_GEMM3M
ifeq ($(SUPPORT_GEMM3M), 1)


XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)


@@ -238,7 +240,7 @@ XBLAS3OBJS = \
xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \ xtrsm.$(SUFFIX) xsyrk.$(SUFFIX) xsyr2k.$(SUFFIX) \
xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX) xhemm.$(SUFFIX) xherk.$(SUFFIX) xher2k.$(SUFFIX)


ifdef SUPPORT_GEMM3M
ifeq ($(SUPPORT_GEMM3M), 1)


XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX) XBLAS3OBJS += xgemm3m.$(SUFFIX) xsymm3m.$(SUFFIX) xhemm3m.$(SUFFIX)




+ 10
- 4
interface/ger.c View File

@@ -42,6 +42,12 @@
#include "functable.h" #include "functable.h"
#endif #endif


#ifdef SMP
#ifdef __64BIT__
#define SMPTEST 1
#endif
#endif

#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QGER " #define ERROR_NAME "QGER "
#elif defined DOUBLE #elif defined DOUBLE
@@ -75,7 +81,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
blasint incy = *INCY; blasint incy = *INCY;
blasint lda = *LDA; blasint lda = *LDA;
FLOAT *buffer; FLOAT *buffer;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -107,7 +113,7 @@ void CNAME(enum CBLAS_ORDER order,


FLOAT *buffer; FLOAT *buffer;
blasint info, t; blasint info, t;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -167,7 +173,7 @@ void CNAME(enum CBLAS_ORDER order,


buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);


#ifdef SMPBUG
#ifdef SMPTEST
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);




@@ -176,7 +182,7 @@ void CNAME(enum CBLAS_ORDER order,


GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer); GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);


#ifdef SMPBUG
#ifdef SMPTEST
} else { } else {


GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads); GER_THREAD(m, n, alpha, x, incx, y, incy, a, lda, buffer, nthreads);


+ 13
- 5
interface/sbmv.c View File

@@ -43,6 +43,14 @@
#include "functable.h" #include "functable.h"
#endif #endif


/*
#ifdef SMP
#ifdef __64BIT__
#define SMPTEST 1
#endif
#endif
*/

#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QSBMV " #define ERROR_NAME "QSBMV "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLA
#endif #endif
}; };


#ifdef SMPBUG
#ifdef SMPTEST
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE #ifdef XDOUBLE
qsbmv_thread_U, qsbmv_thread_L, qsbmv_thread_U, qsbmv_thread_L,
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info; blasint info;
int uplo; int uplo;
FLOAT *buffer; FLOAT *buffer;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -130,7 +138,7 @@ void CNAME(enum CBLAS_ORDER order,
FLOAT *buffer; FLOAT *buffer;
int uplo; int uplo;
blasint info; blasint info;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -189,7 +197,7 @@ void CNAME(enum CBLAS_ORDER order,


buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);


#ifdef SMPBUG
#ifdef SMPTEST
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);


if (nthreads == 1) { if (nthreads == 1) {
@@ -197,7 +205,7 @@ void CNAME(enum CBLAS_ORDER order,


(sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer); (sbmv[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer);


#ifdef SMPBUG
#ifdef SMPTEST
} else { } else {


(sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads); (sbmv_thread[uplo])(n, k, alpha, a, lda, x, incx, y, incy, buffer, nthreads);


+ 11
- 4
interface/zger.c View File

@@ -42,6 +42,13 @@
#include "functable.h" #include "functable.h"
#endif #endif


#ifdef SMP
#ifdef __64BIT__
#define SMPTEST 1
#endif
#endif


#ifdef XDOUBLE #ifdef XDOUBLE
#ifndef CONJ #ifndef CONJ
#define ERROR_NAME "XGERU " #define ERROR_NAME "XGERU "
@@ -109,7 +116,7 @@ void NAME(blasint *M, blasint *N, FLOAT *Alpha,
blasint incy = *INCY; blasint incy = *INCY;
blasint lda = *LDA; blasint lda = *LDA;
FLOAT *buffer; FLOAT *buffer;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -144,7 +151,7 @@ void CNAME(enum CBLAS_ORDER order,


FLOAT *buffer; FLOAT *buffer;
blasint info, t; blasint info, t;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -205,7 +212,7 @@ void CNAME(enum CBLAS_ORDER order,


buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);


#ifdef SMPBUG
#ifdef SMPTEST
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);


if (nthreads == 1) { if (nthreads == 1) {
@@ -221,7 +228,7 @@ void CNAME(enum CBLAS_ORDER order,
} }
#endif #endif


#ifdef SMPBUG
#ifdef SMPTEST


} else { } else {




+ 12
- 4
interface/zsbmv.c View File

@@ -43,6 +43,14 @@
#include "functable.h" #include "functable.h"
#endif #endif


/*
#ifdef SMP
#ifdef __64BIT__
#define SMPTEST 1
#endif
#endif
*/

#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XSBMV " #define ERROR_NAME "XSBMV "
#elif defined(DOUBLE) #elif defined(DOUBLE)
@@ -61,7 +69,7 @@ static int (*sbmv[])(BLASLONG, BLASLONG, FLOAT, FLOAT, FLOAT *, BLASLONG, FLOAT
#endif #endif
}; };


#ifdef SMPBUG
#ifdef SMPTEST
static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = { static int (*sbmv_thread[])(BLASLONG, BLASLONG, FLOAT *, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, BLASLONG, FLOAT *, int) = {
#ifdef XDOUBLE #ifdef XDOUBLE
xsbmv_thread_U, xsbmv_thread_L, xsbmv_thread_U, xsbmv_thread_L,
@@ -90,7 +98,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *
blasint info; blasint info;
int uplo; int uplo;
FLOAT *buffer; FLOAT *buffer;
#ifdef SMPBUG
#ifdef SMPTEST
int nthreads; int nthreads;
#endif #endif


@@ -131,7 +139,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *


buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);


#ifdef SMPBUG
#ifdef SMPTEST
nthreads = num_cpu_avail(2); nthreads = num_cpu_avail(2);


if (nthreads == 1) { if (nthreads == 1) {
@@ -139,7 +147,7 @@ void NAME(char *UPLO, blasint *N, blasint *K, FLOAT *ALPHA, FLOAT *a, blasint *


(sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer); (sbmv[uplo])(n, k, alpha_r, alpha_i, a, lda, b, incx, c, incy, buffer);


#ifdef SMPBUG
#ifdef SMPTEST
} else { } else {


(sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads); (sbmv_thread[uplo])(n, k, ALPHA, a, lda, b, incx, c, incy, buffer, nthreads);


+ 4
- 2
kernel/Makefile.L3 View File

@@ -1,3 +1,5 @@
USE_GEMM3M = 0

ifeq ($(ARCH), x86) ifeq ($(ARCH), x86)
USE_GEMM3M = 1 USE_GEMM3M = 1
endif endif
@@ -122,7 +124,7 @@ XBLASOBJS += \
xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RN$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RT$(TSUFFIX).$(SUFFIX) \
xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \ xtrsm_kernel_RR$(TSUFFIX).$(SUFFIX) xtrsm_kernel_RC$(TSUFFIX).$(SUFFIX) \


ifdef USE_GEMM3M
ifeq ($(USE_GEMM3M), 1)


CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX) CBLASOBJS += cgemm3m_kernel$(TSUFFIX).$(SUFFIX)
ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX) ZBLASOBJS += zgemm3m_kernel$(TSUFFIX).$(SUFFIX)
@@ -256,7 +258,7 @@ XBLASOBJS += \
xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \ xhemm_iutcopy$(TSUFFIX).$(SUFFIX) xhemm_iltcopy$(TSUFFIX).$(SUFFIX) \
xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX) xhemm_outcopy$(TSUFFIX).$(SUFFIX) xhemm_oltcopy$(TSUFFIX).$(SUFFIX)


ifdef USE_GEMM3M
ifeq ($(USE_GEMM3M), 1)


CBLASOBJS += \ CBLASOBJS += \
cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \ cgemm3m_incopyb$(TSUFFIX).$(SUFFIX) cgemm3m_itcopyb$(TSUFFIX).$(SUFFIX) \


+ 104
- 0
kernel/generic/dot.c View File

@@ -0,0 +1,104 @@
/***************************************************************************
Copyright (c) 2014, The OpenBLAS Project
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the
distribution.
3. Neither the name of the OpenBLAS project nor the names of
its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*****************************************************************************/


#include "common.h"

#if defined(DSDOT)
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;

#if defined(DSDOT)
double dot = 0.0 ;
#else
FLOAT dot = 0.0 ;
#endif

if ( n < 0 ) return(dot);

if ( (inc_x == 1) && (inc_y == 1) )
{

int n1 = n & -4;

while(i < n1)
{

#if defined(DSDOT)
dot += (double) y[i] * (double) x[i]
+ (double) y[i+1] * (double) x[i+1]
+ (double) y[i+2] * (double) x[i+2]
+ (double) y[i+3] * (double) x[i+3] ;
#else
dot += y[i] * x[i]
+ y[i+1] * x[i+1]
+ y[i+2] * x[i+2]
+ y[i+3] * x[i+3] ;
#endif
i+=4 ;

}

while(i < n)
{

#if defined(DSDOT)
dot += (double) y[i] * (double) x[i] ;
#else
dot += y[i] * x[i] ;
#endif
i++ ;

}
return(dot);


}

while(i < n)
{

#if defined(DSDOT)
dot += (double) y[iy] * (double) x[ix] ;
#else
dot += y[iy] * x[ix] ;
#endif
ix += inc_x ;
iy += inc_y ;
i++ ;

}
return(dot);

}



+ 23
- 6
kernel/setparam-ref.c View File

@@ -714,13 +714,13 @@ static void init_parameter(void) {
fprintf(stderr, "Core2\n"); fprintf(stderr, "Core2\n");
#endif #endif


TABLE_NAME.sgemm_p = 92 * (l2 >> 9);
TABLE_NAME.dgemm_p = 46 * (l2 >> 9);
TABLE_NAME.cgemm_p = 46 * (l2 >> 9);
TABLE_NAME.zgemm_p = 23 * (l2 >> 9);
TABLE_NAME.sgemm_p = 92 * (l2 >> 9) + 8;
TABLE_NAME.dgemm_p = 46 * (l2 >> 9) + 8;
TABLE_NAME.cgemm_p = 46 * (l2 >> 9) + 4;
TABLE_NAME.zgemm_p = 23 * (l2 >> 9) + 4;
#ifdef EXPRECISION #ifdef EXPRECISION
TABLE_NAME.qgemm_p = 92 * (l2 >> 9);
TABLE_NAME.xgemm_p = 46 * (l2 >> 9);
TABLE_NAME.qgemm_p = 92 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 46 * (l2 >> 9) + 4;
#endif #endif
#endif #endif


@@ -740,6 +740,23 @@ static void init_parameter(void) {
#endif #endif
#endif #endif


#ifdef DUNNINGTON

#ifdef DEBUG
fprintf(stderr, "Dunnington\n");
#endif

TABLE_NAME.sgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.dgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.cgemm_p = 21 * (l2 >> 9) + 4;
TABLE_NAME.zgemm_p = 21 * (l2 >> 9) + 4;
#ifdef EXPRECISION
TABLE_NAME.qgemm_p = 42 * (l2 >> 9) + 8;
TABLE_NAME.xgemm_p = 21 * (l2 >> 9) + 4;
#endif
#endif


#ifdef NEHALEM #ifdef NEHALEM


#ifdef DEBUG #ifdef DEBUG


+ 2
- 4
kernel/x86_64/KERNEL View File

@@ -119,15 +119,13 @@ XCOPYKERNEL = zcopy.S
endif endif


ifndef SDOTKERNEL ifndef SDOTKERNEL
SDOTKERNEL = dot_sse.S
SDOTKERNEL = ../generic/dot.c
endif endif



ifndef DSDOTKERNEL ifndef DSDOTKERNEL
DSDOTKERNEL = ../arm/dot.c
DSDOTKERNEL = ../generic/dot.c
endif endif



ifndef DDOTKERNEL ifndef DDOTKERNEL
DDOTKERNEL = dot_sse2.S DDOTKERNEL = dot_sse2.S
endif endif


+ 0
- 1
kernel/x86_64/KERNEL.BULLDOZER View File

@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S


DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S
DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S




+ 0
- 1
kernel/x86_64/KERNEL.PILEDRIVER View File

@@ -6,7 +6,6 @@ ZGEMVTKERNEL = zgemv_t.S


DGEMVNKERNEL = dgemv_n_bulldozer.S DGEMVNKERNEL = dgemv_n_bulldozer.S
DGEMVTKERNEL = dgemv_t_bulldozer.S DGEMVTKERNEL = dgemv_t_bulldozer.S
DAXPYKERNEL = daxpy_bulldozer.S
DDOTKERNEL = ddot_bulldozer.S DDOTKERNEL = ddot_bulldozer.S
DCOPYKERNEL = dcopy_bulldozer.S DCOPYKERNEL = dcopy_bulldozer.S




+ 1
- 1
kernel/x86_64/KERNEL.PRESCOTT View File

@@ -19,7 +19,7 @@ DGEMMINCOPYOBJ =
DGEMMITCOPYOBJ = DGEMMITCOPYOBJ =
DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX)
DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX)
CGEMMKERNEL = zgemm_kernel_4x2_sse3.S
CGEMMKERNEL = zgemm_kernel_4x2_sse.S
CGEMMINCOPY = ../generic/zgemm_ncopy_4.c CGEMMINCOPY = ../generic/zgemm_ncopy_4.c
CGEMMITCOPY = ../generic/zgemm_tcopy_4.c CGEMMITCOPY = ../generic/zgemm_tcopy_4.c
CGEMMONCOPY = zgemm_ncopy_2.S CGEMMONCOPY = zgemm_ncopy_2.S


+ 0
- 1
make.inc View File

@@ -1,7 +1,6 @@
SHELL = /bin/sh SHELL = /bin/sh
PLAT = _LINUX PLAT = _LINUX
DRVOPTS = $(OPTS) DRVOPTS = $(OPTS)
LOADER = $(FORTRAN)
ARCHFLAGS= -ru ARCHFLAGS= -ru
#RANLIB = ranlib #RANLIB = ranlib



+ 6
- 2
utest/Makefile View File

@@ -1,15 +1,19 @@
UTEST_CHECK = 1 UTEST_CHECK = 1
TOPDIR = .. TOPDIR = ..
include $(TOPDIR)/Makefile.system


TARGET=openblas_utest TARGET=openblas_utest


.PHONY : all
.NOTPARALLEL : all run_test $(TARGET)

CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2 CUNIT_URL=http://downloads.sourceforge.net/project/cunit/CUnit/2.1-2/CUnit-2.1-2-src.tar.bz2
CUNIT_DIR=$(CURDIR)/CUnit-2.1-2 CUNIT_DIR=$(CURDIR)/CUnit-2.1-2


CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a CUNIT_LIB=$(CUNIT_DIR)/lib/libcunit.a


CFLAGS+=-I$(CUNIT_DIR)/include
CFLAGS +=-I$(CUNIT_DIR)/include

include $(TOPDIR)/Makefile.system


OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o test_amax.o test_fork.o




Loading…
Cancel
Save