| @@ -1,7 +1,12 @@ | |||
| ifeq ($(CORE), ARMV7) | |||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||
| endif | |||
| ifeq ($(CORE), ARMV6) | |||
| CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||
| FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||
| endif | |||
| @@ -0,0 +1,262 @@ | |||
| /************************************************************************** | |||
| Copyright (c) 2013, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| #include <string.h> | |||
| #define CPU_UNKNOWN 0 | |||
| #define CPU_ARMV6 1 | |||
| #define CPU_ARMV7 2 | |||
| #define CPU_CORTEXA15 3 | |||
| static char *cpuname[] = { | |||
| "UNKOWN", | |||
| "ARMV6", | |||
| "ARMV7", | |||
| "CORTEXA15" | |||
| }; | |||
| int get_feature(char *search) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[2048], *p,*t; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if (!strncmp("Features", buffer, 8)) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if( p == NULL ) return; | |||
| t = strtok(p," "); | |||
| while( t = strtok(NULL," ")) | |||
| { | |||
| if (!strcmp(t, search)) { return(1); } | |||
| } | |||
| #endif | |||
| return(0); | |||
| } | |||
| int detect(void) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[512], *p; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if (!strncmp("model name", buffer, 10)) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if(p != NULL) | |||
| { | |||
| if (strstr(p, "ARMv7")) | |||
| { | |||
| if ( get_feature("vfpv4")) | |||
| return CPU_ARMV7; | |||
| if ( get_feature("vfpv3")) | |||
| return CPU_ARMV7; | |||
| if ( get_feature("vfp")) | |||
| return CPU_ARMV6; | |||
| } | |||
| if (strstr(p, "ARMv6")) | |||
| { | |||
| if ( get_feature("vfp")) | |||
| return CPU_ARMV6; | |||
| } | |||
| } | |||
| #endif | |||
| return CPU_UNKNOWN; | |||
| } | |||
| char *get_corename(void) | |||
| { | |||
| return cpuname[detect()]; | |||
| } | |||
| void get_architecture(void) | |||
| { | |||
| printf("ARM"); | |||
| } | |||
| void get_subarchitecture(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV7: | |||
| printf("ARMV7"); | |||
| break; | |||
| case CPU_ARMV6: | |||
| printf("ARMV6"); | |||
| break; | |||
| default: | |||
| printf("UNKNOWN"); | |||
| break; | |||
| } | |||
| } | |||
| void get_subdirname(void) | |||
| { | |||
| printf("arm"); | |||
| } | |||
| void get_cpuconfig(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV7: | |||
| printf("#define ARMV7\n"); | |||
| printf("#define HAVE_VFP\n"); | |||
| printf("#define HAVE_VFPV3\n"); | |||
| if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | |||
| if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||
| printf("#define L2_SIZE 512488\n"); | |||
| printf("#define L2_LINESIZE 32\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| break; | |||
| case CPU_ARMV6: | |||
| printf("#define ARMV6\n"); | |||
| printf("#define HAVE_VFP\n"); | |||
| printf("#define L1_DATA_SIZE 65536\n"); | |||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||
| printf("#define L2_SIZE 512488\n"); | |||
| printf("#define L2_LINESIZE 32\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||
| break; | |||
| } | |||
| } | |||
| void get_libname(void) | |||
| { | |||
| int d = detect(); | |||
| switch (d) | |||
| { | |||
| case CPU_ARMV7: | |||
| printf("armv7\n"); | |||
| break; | |||
| case CPU_ARMV6: | |||
| printf("armv6\n"); | |||
| break; | |||
| } | |||
| } | |||
| void get_features(void) | |||
| { | |||
| #ifdef linux | |||
| FILE *infile; | |||
| char buffer[2048], *p,*t; | |||
| p = (char *) NULL ; | |||
| infile = fopen("/proc/cpuinfo", "r"); | |||
| while (fgets(buffer, sizeof(buffer), infile)) | |||
| { | |||
| if (!strncmp("Features", buffer, 8)) | |||
| { | |||
| p = strchr(buffer, ':') + 2; | |||
| break; | |||
| } | |||
| } | |||
| fclose(infile); | |||
| if( p == NULL ) return; | |||
| t = strtok(p," "); | |||
| while( t = strtok(NULL," ")) | |||
| { | |||
| if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; } | |||
| if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; } | |||
| if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; } | |||
| if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; } | |||
| } | |||
| #endif | |||
| return; | |||
| } | |||
| @@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define ARCHCONFIG "-DARMV7 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFPV3 -DHAVE_VFP" | |||
| #define LIBNAME "armv7" | |||
| #define CORENAME "ARMV7" | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_ARMV6 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM" | |||
| #define SUBARCHITECTURE "ARMV6" | |||
| #define SUBDIRNAME "arm" | |||
| #define ARCHCONFIG "-DARMV6 " \ | |||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
| "-DHAVE_VFP" | |||
| #define LIBNAME "armv6" | |||
| #define CORENAME "ARMV6" | |||
| #else | |||
| #endif | |||
| #ifndef FORCE | |||
| #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | |||
| defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||
| defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||
| #ifndef POWER | |||
| #define POWER | |||
| #endif | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #if defined(__i386__) || (__x86_64__) | |||
| #include "cpuid_x86.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| @@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifdef __arm__ | |||
| #include "cpuid_arm.c" | |||
| #define OPENBLAS_SUPPORTED | |||
| #endif | |||
| #ifndef OPENBLAS_SUPPORTED | |||
| #error "This arch/CPU is not supported by OpenBLAS." | |||
| #endif | |||
| #else | |||
| #endif | |||
| static int get_num_cores(void) { | |||
| @@ -788,7 +811,7 @@ int main(int argc, char *argv[]){ | |||
| #ifdef FORCE | |||
| printf("CORE=%s\n", CORENAME); | |||
| #else | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) | |||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||
| printf("CORE=%s\n", get_corename()); | |||
| #endif | |||
| #endif | |||
| @@ -803,6 +826,12 @@ int main(int argc, char *argv[]){ | |||
| printf("NUM_CORES=%d\n", get_num_cores()); | |||
| #if defined(__arm__) && !defined(FORCE) | |||
| get_features(); | |||
| #endif | |||
| #if defined(__i386__) || defined(__x86_64__) | |||
| #ifndef FORCE | |||
| get_sse(); | |||
| @@ -0,0 +1,134 @@ | |||
| SAMAXKERNEL = amax.c | |||
| DAMAXKERNEL = amax.c | |||
| CAMAXKERNEL = zamax.c | |||
| ZAMAXKERNEL = zamax.c | |||
| SAMINKERNEL = amin.c | |||
| DAMINKERNEL = amin.c | |||
| CAMINKERNEL = zamin.c | |||
| ZAMINKERNEL = zamin.c | |||
| SMAXKERNEL = max.c | |||
| DMAXKERNEL = max.c | |||
| SMINKERNEL = min.c | |||
| DMINKERNEL = min.c | |||
| ISAMAXKERNEL = iamax.c | |||
| IDAMAXKERNEL = iamax.c | |||
| ICAMAXKERNEL = izamax.c | |||
| IZAMAXKERNEL = izamax.c | |||
| ISAMINKERNEL = iamin.c | |||
| IDAMINKERNEL = iamin.c | |||
| ICAMINKERNEL = izamin.c | |||
| IZAMINKERNEL = izamin.c | |||
| ISMAXKERNEL = imax.c | |||
| IDMAXKERNEL = imax.c | |||
| ISMINKERNEL = imin.c | |||
| IDMINKERNEL = imin.c | |||
| SASUMKERNEL = asum.c | |||
| DASUMKERNEL = asum.c | |||
| CASUMKERNEL = zasum.c | |||
| ZASUMKERNEL = zasum.c | |||
| SAXPYKERNEL = axpy.c | |||
| DAXPYKERNEL = axpy.c | |||
| CAXPYKERNEL = zaxpy.c | |||
| ZAXPYKERNEL = zaxpy.c | |||
| SCOPYKERNEL = copy.c | |||
| DCOPYKERNEL = copy.c | |||
| CCOPYKERNEL = zcopy.c | |||
| ZCOPYKERNEL = zcopy.c | |||
| SDOTKERNEL = dot.c | |||
| DDOTKERNEL = dot.c | |||
| CDOTKERNEL = zdot.c | |||
| ZDOTKERNEL = zdot.c | |||
| SNRM2KERNEL = nrm2.c | |||
| DNRM2KERNEL = nrm2.c | |||
| CNRM2KERNEL = znrm2.c | |||
| ZNRM2KERNEL = znrm2.c | |||
| SROTKERNEL = rot.c | |||
| DROTKERNEL = rot.c | |||
| CROTKERNEL = zrot.c | |||
| ZROTKERNEL = zrot.c | |||
| SSCALKERNEL = scal.c | |||
| DSCALKERNEL = scal.c | |||
| CSCALKERNEL = zscal.c | |||
| ZSCALKERNEL = zscal.c | |||
| SSWAPKERNEL = swap.c | |||
| DSWAPKERNEL = swap.c | |||
| CSWAPKERNEL = zswap.c | |||
| ZSWAPKERNEL = zswap.c | |||
| SGEMVNKERNEL = gemv_n.c | |||
| DGEMVNKERNEL = gemv_n.c | |||
| CGEMVNKERNEL = zgemv_n.c | |||
| ZGEMVNKERNEL = zgemv_n.c | |||
| SGEMVTKERNEL = gemv_t.c | |||
| DGEMVTKERNEL = gemv_t.c | |||
| CGEMVTKERNEL = zgemv_t.c | |||
| ZGEMVTKERNEL = zgemv_t.c | |||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
| @@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #define SYMV_P 16 | |||
| #endif | |||
| #if defined(ARMV6) | |||
| #define SNUMOPT 2 | |||
| #define DNUMOPT 2 | |||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||
| #define SGEMM_DEFAULT_P 128 | |||
| #define DGEMM_DEFAULT_P 128 | |||
| #define CGEMM_DEFAULT_P 96 | |||
| #define ZGEMM_DEFAULT_P 64 | |||
| #define SGEMM_DEFAULT_Q 240 | |||
| #define DGEMM_DEFAULT_Q 120 | |||
| #define CGEMM_DEFAULT_Q 120 | |||
| #define ZGEMM_DEFAULT_Q 120 | |||
| #define SGEMM_DEFAULT_R 12288 | |||
| #define DGEMM_DEFAULT_R 8192 | |||
| #define CGEMM_DEFAULT_R 4096 | |||
| #define ZGEMM_DEFAULT_R 4096 | |||
| #define SYMV_P 16 | |||
| #endif | |||