@@ -1,7 +1,12 @@ | |||||
ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
endif | |||||
ifeq ($(CORE), ARMV6) | |||||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
endif | endif | ||||
@@ -0,0 +1,262 @@ | |||||
/************************************************************************** | |||||
Copyright (c) 2013, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <string.h> | |||||
#define CPU_UNKNOWN 0 | |||||
#define CPU_ARMV6 1 | |||||
#define CPU_ARMV7 2 | |||||
#define CPU_CORTEXA15 3 | |||||
static char *cpuname[] = { | |||||
"UNKOWN", | |||||
"ARMV6", | |||||
"ARMV7", | |||||
"CORTEXA15" | |||||
}; | |||||
int get_feature(char *search) | |||||
{ | |||||
#ifdef linux | |||||
FILE *infile; | |||||
char buffer[2048], *p,*t; | |||||
p = (char *) NULL ; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) | |||||
{ | |||||
if (!strncmp("Features", buffer, 8)) | |||||
{ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if( p == NULL ) return; | |||||
t = strtok(p," "); | |||||
while( t = strtok(NULL," ")) | |||||
{ | |||||
if (!strcmp(t, search)) { return(1); } | |||||
} | |||||
#endif | |||||
return(0); | |||||
} | |||||
int detect(void) | |||||
{ | |||||
#ifdef linux | |||||
FILE *infile; | |||||
char buffer[512], *p; | |||||
p = (char *) NULL ; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) | |||||
{ | |||||
if (!strncmp("model name", buffer, 10)) | |||||
{ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if(p != NULL) | |||||
{ | |||||
if (strstr(p, "ARMv7")) | |||||
{ | |||||
if ( get_feature("vfpv4")) | |||||
return CPU_ARMV7; | |||||
if ( get_feature("vfpv3")) | |||||
return CPU_ARMV7; | |||||
if ( get_feature("vfp")) | |||||
return CPU_ARMV6; | |||||
} | |||||
if (strstr(p, "ARMv6")) | |||||
{ | |||||
if ( get_feature("vfp")) | |||||
return CPU_ARMV6; | |||||
} | |||||
} | |||||
#endif | |||||
return CPU_UNKNOWN; | |||||
} | |||||
char *get_corename(void) | |||||
{ | |||||
return cpuname[detect()]; | |||||
} | |||||
void get_architecture(void) | |||||
{ | |||||
printf("ARM"); | |||||
} | |||||
void get_subarchitecture(void) | |||||
{ | |||||
int d = detect(); | |||||
switch (d) | |||||
{ | |||||
case CPU_ARMV7: | |||||
printf("ARMV7"); | |||||
break; | |||||
case CPU_ARMV6: | |||||
printf("ARMV6"); | |||||
break; | |||||
default: | |||||
printf("UNKNOWN"); | |||||
break; | |||||
} | |||||
} | |||||
void get_subdirname(void) | |||||
{ | |||||
printf("arm"); | |||||
} | |||||
void get_cpuconfig(void) | |||||
{ | |||||
int d = detect(); | |||||
switch (d) | |||||
{ | |||||
case CPU_ARMV7: | |||||
printf("#define ARMV7\n"); | |||||
printf("#define HAVE_VFP\n"); | |||||
printf("#define HAVE_VFPV3\n"); | |||||
if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | |||||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
break; | |||||
case CPU_ARMV6: | |||||
printf("#define ARMV6\n"); | |||||
printf("#define HAVE_VFP\n"); | |||||
printf("#define L1_DATA_SIZE 65536\n"); | |||||
printf("#define L1_DATA_LINESIZE 32\n"); | |||||
printf("#define L2_SIZE 512488\n"); | |||||
printf("#define L2_LINESIZE 32\n"); | |||||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
printf("#define DTB_SIZE 4096\n"); | |||||
printf("#define L2_ASSOCIATIVE 4\n"); | |||||
break; | |||||
} | |||||
} | |||||
void get_libname(void) | |||||
{ | |||||
int d = detect(); | |||||
switch (d) | |||||
{ | |||||
case CPU_ARMV7: | |||||
printf("armv7\n"); | |||||
break; | |||||
case CPU_ARMV6: | |||||
printf("armv6\n"); | |||||
break; | |||||
} | |||||
} | |||||
void get_features(void) | |||||
{ | |||||
#ifdef linux | |||||
FILE *infile; | |||||
char buffer[2048], *p,*t; | |||||
p = (char *) NULL ; | |||||
infile = fopen("/proc/cpuinfo", "r"); | |||||
while (fgets(buffer, sizeof(buffer), infile)) | |||||
{ | |||||
if (!strncmp("Features", buffer, 8)) | |||||
{ | |||||
p = strchr(buffer, ':') + 2; | |||||
break; | |||||
} | |||||
} | |||||
fclose(infile); | |||||
if( p == NULL ) return; | |||||
t = strtok(p," "); | |||||
while( t = strtok(NULL," ")) | |||||
{ | |||||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; } | |||||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; } | |||||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; } | |||||
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; } | |||||
} | |||||
#endif | |||||
return; | |||||
} | |||||
@@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define ARCHCONFIG "-DARMV7 " \ | #define ARCHCONFIG "-DARMV7 " \ | ||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | ||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | ||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||||
"-DHAVE_VFPV3 -DHAVE_VFP" | |||||
#define LIBNAME "armv7" | #define LIBNAME "armv7" | ||||
#define CORENAME "ARMV7" | #define CORENAME "ARMV7" | ||||
#else | #else | ||||
#endif | #endif | ||||
#ifdef FORCE_ARMV6 | |||||
#define FORCE | |||||
#define ARCHITECTURE "ARM" | |||||
#define SUBARCHITECTURE "ARMV6" | |||||
#define SUBDIRNAME "arm" | |||||
#define ARCHCONFIG "-DARMV6 " \ | |||||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||||
"-DHAVE_VFP" | |||||
#define LIBNAME "armv6" | |||||
#define CORENAME "ARMV6" | |||||
#else | |||||
#endif | |||||
#ifndef FORCE | #ifndef FORCE | ||||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | ||||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||||
#ifndef POWER | #ifndef POWER | ||||
#define POWER | #define POWER | ||||
#endif | #endif | ||||
#define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
#endif | #endif | ||||
#if defined(__i386__) || (__x86_64__) | #if defined(__i386__) || (__x86_64__) | ||||
#include "cpuid_x86.c" | #include "cpuid_x86.c" | ||||
#define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
@@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
#endif | #endif | ||||
#ifdef __arm__ | |||||
#include "cpuid_arm.c" | |||||
#define OPENBLAS_SUPPORTED | |||||
#endif | |||||
#ifndef OPENBLAS_SUPPORTED | #ifndef OPENBLAS_SUPPORTED | ||||
#error "This arch/CPU is not supported by OpenBLAS." | #error "This arch/CPU is not supported by OpenBLAS." | ||||
#endif | #endif | ||||
#else | |||||
#endif | #endif | ||||
static int get_num_cores(void) { | static int get_num_cores(void) { | ||||
@@ -788,7 +811,7 @@ int main(int argc, char *argv[]){ | |||||
#ifdef FORCE | #ifdef FORCE | ||||
printf("CORE=%s\n", CORENAME); | printf("CORE=%s\n", CORENAME); | ||||
#else | #else | ||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) | |||||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||||
printf("CORE=%s\n", get_corename()); | printf("CORE=%s\n", get_corename()); | ||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -803,6 +826,12 @@ int main(int argc, char *argv[]){ | |||||
printf("NUM_CORES=%d\n", get_num_cores()); | printf("NUM_CORES=%d\n", get_num_cores()); | ||||
#if defined(__arm__) && !defined(FORCE) | |||||
get_features(); | |||||
#endif | |||||
#if defined(__i386__) || defined(__x86_64__) | #if defined(__i386__) || defined(__x86_64__) | ||||
#ifndef FORCE | #ifndef FORCE | ||||
get_sse(); | get_sse(); | ||||
@@ -0,0 +1,134 @@ | |||||
SAMAXKERNEL = amax.c | |||||
DAMAXKERNEL = amax.c | |||||
CAMAXKERNEL = zamax.c | |||||
ZAMAXKERNEL = zamax.c | |||||
SAMINKERNEL = amin.c | |||||
DAMINKERNEL = amin.c | |||||
CAMINKERNEL = zamin.c | |||||
ZAMINKERNEL = zamin.c | |||||
SMAXKERNEL = max.c | |||||
DMAXKERNEL = max.c | |||||
SMINKERNEL = min.c | |||||
DMINKERNEL = min.c | |||||
ISAMAXKERNEL = iamax.c | |||||
IDAMAXKERNEL = iamax.c | |||||
ICAMAXKERNEL = izamax.c | |||||
IZAMAXKERNEL = izamax.c | |||||
ISAMINKERNEL = iamin.c | |||||
IDAMINKERNEL = iamin.c | |||||
ICAMINKERNEL = izamin.c | |||||
IZAMINKERNEL = izamin.c | |||||
ISMAXKERNEL = imax.c | |||||
IDMAXKERNEL = imax.c | |||||
ISMINKERNEL = imin.c | |||||
IDMINKERNEL = imin.c | |||||
SASUMKERNEL = asum.c | |||||
DASUMKERNEL = asum.c | |||||
CASUMKERNEL = zasum.c | |||||
ZASUMKERNEL = zasum.c | |||||
SAXPYKERNEL = axpy.c | |||||
DAXPYKERNEL = axpy.c | |||||
CAXPYKERNEL = zaxpy.c | |||||
ZAXPYKERNEL = zaxpy.c | |||||
SCOPYKERNEL = copy.c | |||||
DCOPYKERNEL = copy.c | |||||
CCOPYKERNEL = zcopy.c | |||||
ZCOPYKERNEL = zcopy.c | |||||
SDOTKERNEL = dot.c | |||||
DDOTKERNEL = dot.c | |||||
CDOTKERNEL = zdot.c | |||||
ZDOTKERNEL = zdot.c | |||||
SNRM2KERNEL = nrm2.c | |||||
DNRM2KERNEL = nrm2.c | |||||
CNRM2KERNEL = znrm2.c | |||||
ZNRM2KERNEL = znrm2.c | |||||
SROTKERNEL = rot.c | |||||
DROTKERNEL = rot.c | |||||
CROTKERNEL = zrot.c | |||||
ZROTKERNEL = zrot.c | |||||
SSCALKERNEL = scal.c | |||||
DSCALKERNEL = scal.c | |||||
CSCALKERNEL = zscal.c | |||||
ZSCALKERNEL = zscal.c | |||||
SSWAPKERNEL = swap.c | |||||
DSWAPKERNEL = swap.c | |||||
CSWAPKERNEL = zswap.c | |||||
ZSWAPKERNEL = zswap.c | |||||
SGEMVNKERNEL = gemv_n.c | |||||
DGEMVNKERNEL = gemv_n.c | |||||
CGEMVNKERNEL = zgemv_n.c | |||||
ZGEMVNKERNEL = zgemv_n.c | |||||
SGEMVTKERNEL = gemv_t.c | |||||
DGEMVTKERNEL = gemv_t.c | |||||
CGEMVTKERNEL = zgemv_t.c | |||||
ZGEMVTKERNEL = zgemv_t.c | |||||
STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
@@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define SYMV_P 16 | |||||
#endif | |||||
#if defined(ARMV6) | |||||
#define SNUMOPT 2 | |||||
#define DNUMOPT 2 | |||||
#define GEMM_DEFAULT_OFFSET_A 0 | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | |||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
#define SGEMM_DEFAULT_P 128 | |||||
#define DGEMM_DEFAULT_P 128 | |||||
#define CGEMM_DEFAULT_P 96 | |||||
#define ZGEMM_DEFAULT_P 64 | |||||
#define SGEMM_DEFAULT_Q 240 | |||||
#define DGEMM_DEFAULT_Q 120 | |||||
#define CGEMM_DEFAULT_Q 120 | |||||
#define ZGEMM_DEFAULT_Q 120 | |||||
#define SGEMM_DEFAULT_R 12288 | |||||
#define DGEMM_DEFAULT_R 8192 | |||||
#define CGEMM_DEFAULT_R 4096 | |||||
#define ZGEMM_DEFAULT_R 4096 | |||||
#define SYMV_P 16 | #define SYMV_P 16 | ||||
#endif | #endif | ||||