@@ -1,7 +1,12 @@ | |||
ifeq ($(CORE), ARMV7) | |||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||
endif | |||
ifeq ($(CORE), ARMV6) | |||
CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||
FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||
endif | |||
@@ -0,0 +1,262 @@ | |||
/************************************************************************** | |||
Copyright (c) 2013, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#include <string.h> | |||
#define CPU_UNKNOWN 0 | |||
#define CPU_ARMV6 1 | |||
#define CPU_ARMV7 2 | |||
#define CPU_CORTEXA15 3 | |||
static char *cpuname[] = { | |||
"UNKOWN", | |||
"ARMV6", | |||
"ARMV7", | |||
"CORTEXA15" | |||
}; | |||
int get_feature(char *search) | |||
{ | |||
#ifdef linux | |||
FILE *infile; | |||
char buffer[2048], *p,*t; | |||
p = (char *) NULL ; | |||
infile = fopen("/proc/cpuinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)) | |||
{ | |||
if (!strncmp("Features", buffer, 8)) | |||
{ | |||
p = strchr(buffer, ':') + 2; | |||
break; | |||
} | |||
} | |||
fclose(infile); | |||
if( p == NULL ) return; | |||
t = strtok(p," "); | |||
while( t = strtok(NULL," ")) | |||
{ | |||
if (!strcmp(t, search)) { return(1); } | |||
} | |||
#endif | |||
return(0); | |||
} | |||
int detect(void) | |||
{ | |||
#ifdef linux | |||
FILE *infile; | |||
char buffer[512], *p; | |||
p = (char *) NULL ; | |||
infile = fopen("/proc/cpuinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)) | |||
{ | |||
if (!strncmp("model name", buffer, 10)) | |||
{ | |||
p = strchr(buffer, ':') + 2; | |||
break; | |||
} | |||
} | |||
fclose(infile); | |||
if(p != NULL) | |||
{ | |||
if (strstr(p, "ARMv7")) | |||
{ | |||
if ( get_feature("vfpv4")) | |||
return CPU_ARMV7; | |||
if ( get_feature("vfpv3")) | |||
return CPU_ARMV7; | |||
if ( get_feature("vfp")) | |||
return CPU_ARMV6; | |||
} | |||
if (strstr(p, "ARMv6")) | |||
{ | |||
if ( get_feature("vfp")) | |||
return CPU_ARMV6; | |||
} | |||
} | |||
#endif | |||
return CPU_UNKNOWN; | |||
} | |||
char *get_corename(void) | |||
{ | |||
return cpuname[detect()]; | |||
} | |||
void get_architecture(void) | |||
{ | |||
printf("ARM"); | |||
} | |||
void get_subarchitecture(void) | |||
{ | |||
int d = detect(); | |||
switch (d) | |||
{ | |||
case CPU_ARMV7: | |||
printf("ARMV7"); | |||
break; | |||
case CPU_ARMV6: | |||
printf("ARMV6"); | |||
break; | |||
default: | |||
printf("UNKNOWN"); | |||
break; | |||
} | |||
} | |||
void get_subdirname(void) | |||
{ | |||
printf("arm"); | |||
} | |||
void get_cpuconfig(void) | |||
{ | |||
int d = detect(); | |||
switch (d) | |||
{ | |||
case CPU_ARMV7: | |||
printf("#define ARMV7\n"); | |||
printf("#define HAVE_VFP\n"); | |||
printf("#define HAVE_VFPV3\n"); | |||
if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | |||
if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n"); | |||
printf("#define L1_DATA_SIZE 65536\n"); | |||
printf("#define L1_DATA_LINESIZE 32\n"); | |||
printf("#define L2_SIZE 512488\n"); | |||
printf("#define L2_LINESIZE 32\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
printf("#define L2_ASSOCIATIVE 4\n"); | |||
break; | |||
case CPU_ARMV6: | |||
printf("#define ARMV6\n"); | |||
printf("#define HAVE_VFP\n"); | |||
printf("#define L1_DATA_SIZE 65536\n"); | |||
printf("#define L1_DATA_LINESIZE 32\n"); | |||
printf("#define L2_SIZE 512488\n"); | |||
printf("#define L2_LINESIZE 32\n"); | |||
printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
printf("#define DTB_SIZE 4096\n"); | |||
printf("#define L2_ASSOCIATIVE 4\n"); | |||
break; | |||
} | |||
} | |||
void get_libname(void) | |||
{ | |||
int d = detect(); | |||
switch (d) | |||
{ | |||
case CPU_ARMV7: | |||
printf("armv7\n"); | |||
break; | |||
case CPU_ARMV6: | |||
printf("armv6\n"); | |||
break; | |||
} | |||
} | |||
void get_features(void) | |||
{ | |||
#ifdef linux | |||
FILE *infile; | |||
char buffer[2048], *p,*t; | |||
p = (char *) NULL ; | |||
infile = fopen("/proc/cpuinfo", "r"); | |||
while (fgets(buffer, sizeof(buffer), infile)) | |||
{ | |||
if (!strncmp("Features", buffer, 8)) | |||
{ | |||
p = strchr(buffer, ':') + 2; | |||
break; | |||
} | |||
} | |||
fclose(infile); | |||
if( p == NULL ) return; | |||
t = strtok(p," "); | |||
while( t = strtok(NULL," ")) | |||
{ | |||
if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; } | |||
if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; } | |||
if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; } | |||
if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; } | |||
} | |||
#endif | |||
return; | |||
} | |||
@@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ARCHCONFIG "-DARMV7 " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
"-DHAVE_VFPV3 -DHAVE_VFP" | |||
#define LIBNAME "armv7" | |||
#define CORENAME "ARMV7" | |||
#else | |||
#endif | |||
#ifdef FORCE_ARMV6 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM" | |||
#define SUBARCHITECTURE "ARMV6" | |||
#define SUBDIRNAME "arm" | |||
#define ARCHCONFIG "-DARMV6 " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
"-DHAVE_VFP" | |||
#define LIBNAME "armv6" | |||
#define CORENAME "ARMV6" | |||
#else | |||
#endif | |||
#ifndef FORCE | |||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | |||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||
defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||
#ifndef POWER | |||
#define POWER | |||
#endif | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#if defined(__i386__) || (__x86_64__) | |||
#include "cpuid_x86.c" | |||
#define OPENBLAS_SUPPORTED | |||
@@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef __arm__ | |||
#include "cpuid_arm.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifndef OPENBLAS_SUPPORTED | |||
#error "This arch/CPU is not supported by OpenBLAS." | |||
#endif | |||
#else | |||
#endif | |||
static int get_num_cores(void) { | |||
@@ -788,7 +811,7 @@ int main(int argc, char *argv[]){ | |||
#ifdef FORCE | |||
printf("CORE=%s\n", CORENAME); | |||
#else | |||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) | |||
#if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
printf("CORE=%s\n", get_corename()); | |||
#endif | |||
#endif | |||
@@ -803,6 +826,12 @@ int main(int argc, char *argv[]){ | |||
printf("NUM_CORES=%d\n", get_num_cores()); | |||
#if defined(__arm__) && !defined(FORCE) | |||
get_features(); | |||
#endif | |||
#if defined(__i386__) || defined(__x86_64__) | |||
#ifndef FORCE | |||
get_sse(); | |||
@@ -0,0 +1,134 @@ | |||
SAMAXKERNEL = amax.c | |||
DAMAXKERNEL = amax.c | |||
CAMAXKERNEL = zamax.c | |||
ZAMAXKERNEL = zamax.c | |||
SAMINKERNEL = amin.c | |||
DAMINKERNEL = amin.c | |||
CAMINKERNEL = zamin.c | |||
ZAMINKERNEL = zamin.c | |||
SMAXKERNEL = max.c | |||
DMAXKERNEL = max.c | |||
SMINKERNEL = min.c | |||
DMINKERNEL = min.c | |||
ISAMAXKERNEL = iamax.c | |||
IDAMAXKERNEL = iamax.c | |||
ICAMAXKERNEL = izamax.c | |||
IZAMAXKERNEL = izamax.c | |||
ISAMINKERNEL = iamin.c | |||
IDAMINKERNEL = iamin.c | |||
ICAMINKERNEL = izamin.c | |||
IZAMINKERNEL = izamin.c | |||
ISMAXKERNEL = imax.c | |||
IDMAXKERNEL = imax.c | |||
ISMINKERNEL = imin.c | |||
IDMINKERNEL = imin.c | |||
SASUMKERNEL = asum.c | |||
DASUMKERNEL = asum.c | |||
CASUMKERNEL = zasum.c | |||
ZASUMKERNEL = zasum.c | |||
SAXPYKERNEL = axpy.c | |||
DAXPYKERNEL = axpy.c | |||
CAXPYKERNEL = zaxpy.c | |||
ZAXPYKERNEL = zaxpy.c | |||
SCOPYKERNEL = copy.c | |||
DCOPYKERNEL = copy.c | |||
CCOPYKERNEL = zcopy.c | |||
ZCOPYKERNEL = zcopy.c | |||
SDOTKERNEL = dot.c | |||
DDOTKERNEL = dot.c | |||
CDOTKERNEL = zdot.c | |||
ZDOTKERNEL = zdot.c | |||
SNRM2KERNEL = nrm2.c | |||
DNRM2KERNEL = nrm2.c | |||
CNRM2KERNEL = znrm2.c | |||
ZNRM2KERNEL = znrm2.c | |||
SROTKERNEL = rot.c | |||
DROTKERNEL = rot.c | |||
CROTKERNEL = zrot.c | |||
ZROTKERNEL = zrot.c | |||
SSCALKERNEL = scal.c | |||
DSCALKERNEL = scal.c | |||
CSCALKERNEL = zscal.c | |||
ZSCALKERNEL = zscal.c | |||
SSWAPKERNEL = swap.c | |||
DSWAPKERNEL = swap.c | |||
CSWAPKERNEL = zswap.c | |||
ZSWAPKERNEL = zswap.c | |||
SGEMVNKERNEL = gemv_n.c | |||
DGEMVNKERNEL = gemv_n.c | |||
CGEMVNKERNEL = zgemv_n.c | |||
ZGEMVNKERNEL = zgemv_n.c | |||
SGEMVTKERNEL = gemv_t.c | |||
DGEMVTKERNEL = gemv_t.c | |||
CGEMVTKERNEL = zgemv_t.c | |||
ZGEMVTKERNEL = zgemv_t.c | |||
STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
@@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define SYMV_P 16 | |||
#endif | |||
#if defined(ARMV6) | |||
#define SNUMOPT 2 | |||
#define DNUMOPT 2 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 96 | |||
#define ZGEMM_DEFAULT_P 64 | |||
#define SGEMM_DEFAULT_Q 240 | |||
#define DGEMM_DEFAULT_Q 120 | |||
#define CGEMM_DEFAULT_Q 120 | |||
#define ZGEMM_DEFAULT_Q 120 | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define DGEMM_DEFAULT_R 8192 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SYMV_P 16 | |||
#endif | |||