For example, make NO_AVX=1 or make DYNAMIC_ARCH=1 NO_AVX=1tags/v0.2.4
@@ -71,6 +71,10 @@ VERSION = 0.2.3 | |||||
# If you want to disable CPU/Memory affinity on Linux. | # If you want to disable CPU/Memory affinity on Linux. | ||||
# NO_AFFINITY = 1 | # NO_AFFINITY = 1 | ||||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers | |||||
# and OS. However, the performance is low. | |||||
# NO_AVX = 1 | |||||
# If you would like to know minute performance report of GotoBLAS. | # If you would like to know minute performance report of GotoBLAS. | ||||
# FUNCTION_PROFILE = 1 | # FUNCTION_PROFILE = 1 | ||||
@@ -57,6 +57,10 @@ GEMM_MULTITHREAD_THRESHOLD=50 | |||||
endif | endif | ||||
GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD) | GETARCH_FLAGS += -DGEMM_MULTITHREAD_THRESHOLD=$(GEMM_MULTITHREAD_THRESHOLD) | ||||
ifeq ($(NO_AVX), 1) | |||||
GETARCH_FLAGS += -DNO_AVX | |||||
endif | |||||
# This operation is expensive, so execution should be once. | # This operation is expensive, so execution should be once. | ||||
ifndef GOTOBLAS_MAKEFILE | ifndef GOTOBLAS_MAKEFILE | ||||
export GOTOBLAS_MAKEFILE = 1 | export GOTOBLAS_MAKEFILE = 1 | ||||
@@ -247,11 +251,17 @@ endif | |||||
ifeq ($(DYNAMIC_ARCH), 1) | ifeq ($(DYNAMIC_ARCH), 1) | ||||
ifeq ($(ARCH), x86) | ifeq ($(ARCH), x86) | ||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | ||||
CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||||
ifneq ($(NO_AVX), 1) | |||||
DYNAMIC_CORE += SANDYBRIDGE | |||||
endif | |||||
endif | endif | ||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM SANDYBRIDGE OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||||
ifneq ($(NO_AVX), 1) | |||||
DYNAMIC_CORE += SANDYBRIDGE | |||||
endif | |||||
endif | endif | ||||
ifndef DYNAMIC_CORE | ifndef DYNAMIC_CORE | ||||
@@ -562,6 +572,10 @@ ifeq ($(NO_LAPACKE), 1) | |||||
CCOMMON_OPT += -DNO_LAPACKE | CCOMMON_OPT += -DNO_LAPACKE | ||||
endif | endif | ||||
ifeq ($(NO_AVX), 1) | |||||
CCOMMON_OPT += -DNO_AVX | |||||
endif | |||||
ifdef SMP | ifdef SMP | ||||
CCOMMON_OPT += -DSMP_SERVER | CCOMMON_OPT += -DSMP_SERVER | ||||
@@ -40,6 +40,11 @@ | |||||
#include <string.h> | #include <string.h> | ||||
#include "cpuid.h" | #include "cpuid.h" | ||||
#ifdef NO_AVX | |||||
#define CPUTYPE_SANDYBRIDGE CPUTYPE_NEHALEM | |||||
#define CORE_SANDYBRIDGE CORE_NEHALEM | |||||
#endif | |||||
#ifndef CPUIDEMU | #ifndef CPUIDEMU | ||||
#if defined(__APPLE__) && defined(__i386__) | #if defined(__APPLE__) && defined(__i386__) | ||||
@@ -189,7 +194,9 @@ int get_cputype(int gettype){ | |||||
if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3; | if ((ecx & (1 << 9)) != 0) feature |= HAVE_SSSE3; | ||||
if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1; | if ((ecx & (1 << 19)) != 0) feature |= HAVE_SSE4_1; | ||||
if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; | if ((ecx & (1 << 20)) != 0) feature |= HAVE_SSE4_2; | ||||
#ifndef NO_AVX | |||||
if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX; | if ((ecx & (1 << 28)) != 0) feature |= HAVE_AVX; | ||||
#endif | |||||
if (have_excpuid() >= 0x01) { | if (have_excpuid() >= 0x01) { | ||||
cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | cpuid(0x80000001, &eax, &ebx, &ecx, &edx); | ||||
@@ -60,8 +60,14 @@ extern gotoblas_t gotoblas_NEHALEM; | |||||
extern gotoblas_t gotoblas_OPTERON; | extern gotoblas_t gotoblas_OPTERON; | ||||
extern gotoblas_t gotoblas_OPTERON_SSE3; | extern gotoblas_t gotoblas_OPTERON_SSE3; | ||||
extern gotoblas_t gotoblas_BARCELONA; | extern gotoblas_t gotoblas_BARCELONA; | ||||
extern gotoblas_t gotoblas_SANDYBRIDGE; | |||||
extern gotoblas_t gotoblas_BOBCAT; | extern gotoblas_t gotoblas_BOBCAT; | ||||
#ifndef NO_AVX | |||||
extern gotoblas_t gotoblas_SANDYBRIDGE; | |||||
#else | |||||
//Use NEHALEM kernels for sandy bridge | |||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | |||||
#endif | |||||
#define VENDOR_INTEL 1 | #define VENDOR_INTEL 1 | ||||
#define VENDOR_AMD 2 | #define VENDOR_AMD 2 | ||||