Browse Source

Merge branch 'develop'

tags/v0.2.1^0
Xianyi Zhang 13 years ago
parent
commit
551f478477
7 changed files with 25 additions and 4 deletions
  1. +8
    -0
      Changelog.txt
  2. +1
    -1
      Makefile.rule
  3. +1
    -0
      README.md
  4. +2
    -0
      cpuid.h
  5. +6
    -0
      cpuid_x86.c
  6. +3
    -0
      driver/others/init.c
  7. +4
    -3
      getarch.c

+ 8
- 0
Changelog.txt View File

@@ -1,4 +1,12 @@
OpenBLAS ChangeLog
====================================================================
Version 0.2.1
30-Jun-2012
common:
x86/x86-64:
* Fixed the SEGFAULT bug about hyper-theading
* Support AMD Bulldozer by using GotoBLAS2 AMD Barcelona codes

====================================================================
Version 0.2.0
26-Jun-2012


+ 1
- 1
Makefile.rule View File

@@ -3,7 +3,7 @@
#

# This library's version
VERSION = 0.2.0
VERSION = 0.2.1

# If you set the suffix, the library name will be libopenblas_$(LIBNAMESUFFIX).a
# and libopenblas_$(LIBNAMESUFFIX).so. Meanwhile, the soname in shared library


+ 1
- 0
README.md View File

@@ -44,6 +44,7 @@ Please read GotoBLAS_01Readme.txt
- **Intel Xeon 56xx (Westmere)**: Used GotoBLAS2 Nehalem codes.
- **Intel Sandy Bridge**: Optimized Level-3 BLAS with AVX on x86-64.
- **AMD Bobcat**: Used GotoBLAS2 Barcelona codes.
- **AMD Bulldozer**: Used GotoBLAS2 Barcelona codes.

#### MIPS64:
- **ICT Loongson 3A**: Optimized Level-3 BLAS and the part of Level-1,2.


+ 2
- 0
cpuid.h View File

@@ -105,6 +105,7 @@
#define CORE_NANO 19
#define CORE_SANDYBRIDGE 20
#define CORE_BOBCAT 21
#define CORE_BULLDOZER 22

#define HAVE_SSE (1 << 0)
#define HAVE_SSE2 (1 << 1)
@@ -193,4 +194,5 @@ typedef struct {
#define CPUTYPE_NANO 43
#define CPUTYPE_SANDYBRIDGE 44
#define CPUTYPE_BOBCAT 45
#define CPUTYPE_BULLDOZER 46
#endif

+ 6
- 0
cpuid_x86.c View File

@@ -1027,6 +1027,7 @@ int get_cpuname(void){
return CPUTYPE_OPTERON;
case 1:
case 10:
case 6: //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
return CPUTYPE_BARCELONA;
case 5:
return CPUTYPE_BOBCAT;
@@ -1151,6 +1152,7 @@ static char *cpuname[] = {
"NANO",
"SANDYBRIDGE",
"BOBCAT",
"BULLDOZER",
};

static char *lowercpuname[] = {
@@ -1199,6 +1201,7 @@ static char *lowercpuname[] = {
"nano",
"sandybridge",
"bobcat",
"bulldozer",
};

static char *corename[] = {
@@ -1224,6 +1227,7 @@ static char *corename[] = {
"NANO",
"SANDYBRIDGE",
"BOBCAT",
"BULLDOZER",
};

static char *corename_lower[] = {
@@ -1249,6 +1253,7 @@ static char *corename_lower[] = {
"nano",
"sandybridge",
"bobcat",
"bulldozer",
};


@@ -1359,6 +1364,7 @@ int get_coretype(void){
if (family == 0xf){
if ((exfamily == 0) || (exfamily == 2)) return CORE_OPTERON;
else if (exfamily == 5) return CORE_BOBCAT;
else if (exfamily == 6) return CORE_BARCELONA; //AMD Bulldozer Opteron 6200 / Opteron 4200 / AMD FX-Series
else return CORE_BARCELONA;
}
}


+ 3
- 0
driver/others/init.c View File

@@ -447,6 +447,9 @@ static void disable_hyperthread(void) {

//When the shared cpu are in different element of share & avail array, this may be a bug.
for (i = 0; i < count ; i++){

share[i] &= common->avail[i];

if (popcount(share[i]) > 1) {
#ifdef DEBUG


+ 4
- 3
getarch.c View File

@@ -102,6 +102,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/* #define FORCE_BARCELONA */
/* #define FORCE_SHANGHAI */
/* #define FORCE_ISTANBUL */
/* #define FORCE_BULLDOZER */
/* #define FORCE_BOBCAT */
/* #define FORCE_SSE_GENERIC */
/* #define FORCE_VIAC3 */
@@ -349,7 +350,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define CORENAME "OPTERON"
#endif

#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL)
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_BULLDOZER)
#define FORCE
#define FORCE_INTEL
#define ARCHITECTURE "X86"
@@ -357,8 +358,8 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#define ARCHCONFIG "-DBARCELONA " \
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 " \
"-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL3_SIZE=2097152 " \
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 -DHAVE_3DNOW " \
"-DHAVE_3DNOWEX -DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
"-DDTB_DEFAULT_ENTRIES=48 -DDTB_SIZE=4096 " \
"-DHAVE_MMX -DHAVE_SSE -DHAVE_SSE2 -DHAVE_SSE3 " \
"-DHAVE_SSE4A -DHAVE_MISALIGNSSE -DHAVE_128BITFPU -DHAVE_FASTMOVU"
#define LIBNAME "barcelona"
#define CORENAME "BARCELONA"


Loading…
Cancel
Save