| @@ -1,7 +1,12 @@ | |||||
| ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard | |||||
| CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | |||||
| endif | |||||
| ifeq ($(CORE), ARMV6) | |||||
| CCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
| FCOMMON_OPT += -marm -mfpu=vfp -mfloat-abi=hard -march=armv6 | |||||
| endif | endif | ||||
| @@ -0,0 +1,262 @@ | |||||
| /************************************************************************** | |||||
| Copyright (c) 2013, The OpenBLAS Project | |||||
| All rights reserved. | |||||
| Redistribution and use in source and binary forms, with or without | |||||
| modification, are permitted provided that the following conditions are | |||||
| met: | |||||
| 1. Redistributions of source code must retain the above copyright | |||||
| notice, this list of conditions and the following disclaimer. | |||||
| 2. Redistributions in binary form must reproduce the above copyright | |||||
| notice, this list of conditions and the following disclaimer in | |||||
| the documentation and/or other materials provided with the | |||||
| distribution. | |||||
| 3. Neither the name of the OpenBLAS project nor the names of | |||||
| its contributors may be used to endorse or promote products | |||||
| derived from this software without specific prior written permission. | |||||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| *****************************************************************************/ | |||||
| #include <string.h> | |||||
| #define CPU_UNKNOWN 0 | |||||
| #define CPU_ARMV6 1 | |||||
| #define CPU_ARMV7 2 | |||||
| #define CPU_CORTEXA15 3 | |||||
| static char *cpuname[] = { | |||||
| "UNKOWN", | |||||
| "ARMV6", | |||||
| "ARMV7", | |||||
| "CORTEXA15" | |||||
| }; | |||||
| int get_feature(char *search) | |||||
| { | |||||
| #ifdef linux | |||||
| FILE *infile; | |||||
| char buffer[2048], *p,*t; | |||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if (!strncmp("Features", buffer, 8)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if( p == NULL ) return; | |||||
| t = strtok(p," "); | |||||
| while( t = strtok(NULL," ")) | |||||
| { | |||||
| if (!strcmp(t, search)) { return(1); } | |||||
| } | |||||
| #endif | |||||
| return(0); | |||||
| } | |||||
| int detect(void) | |||||
| { | |||||
| #ifdef linux | |||||
| FILE *infile; | |||||
| char buffer[512], *p; | |||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if (!strncmp("model name", buffer, 10)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if(p != NULL) | |||||
| { | |||||
| if (strstr(p, "ARMv7")) | |||||
| { | |||||
| if ( get_feature("vfpv4")) | |||||
| return CPU_ARMV7; | |||||
| if ( get_feature("vfpv3")) | |||||
| return CPU_ARMV7; | |||||
| if ( get_feature("vfp")) | |||||
| return CPU_ARMV6; | |||||
| } | |||||
| if (strstr(p, "ARMv6")) | |||||
| { | |||||
| if ( get_feature("vfp")) | |||||
| return CPU_ARMV6; | |||||
| } | |||||
| } | |||||
| #endif | |||||
| return CPU_UNKNOWN; | |||||
| } | |||||
| char *get_corename(void) | |||||
| { | |||||
| return cpuname[detect()]; | |||||
| } | |||||
| void get_architecture(void) | |||||
| { | |||||
| printf("ARM"); | |||||
| } | |||||
| void get_subarchitecture(void) | |||||
| { | |||||
| int d = detect(); | |||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV7: | |||||
| printf("ARMV7"); | |||||
| break; | |||||
| case CPU_ARMV6: | |||||
| printf("ARMV6"); | |||||
| break; | |||||
| default: | |||||
| printf("UNKNOWN"); | |||||
| break; | |||||
| } | |||||
| } | |||||
| void get_subdirname(void) | |||||
| { | |||||
| printf("arm"); | |||||
| } | |||||
| void get_cpuconfig(void) | |||||
| { | |||||
| int d = detect(); | |||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV7: | |||||
| printf("#define ARMV7\n"); | |||||
| printf("#define HAVE_VFP\n"); | |||||
| printf("#define HAVE_VFPV3\n"); | |||||
| if ( get_feature("neon")) printf("#define HAVE_NEON\n"); | |||||
| if ( get_feature("vfpv4")) printf("#define HAVE_VFPV4\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||||
| printf("#define L2_SIZE 512488\n"); | |||||
| printf("#define L2_LINESIZE 32\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| break; | |||||
| case CPU_ARMV6: | |||||
| printf("#define ARMV6\n"); | |||||
| printf("#define HAVE_VFP\n"); | |||||
| printf("#define L1_DATA_SIZE 65536\n"); | |||||
| printf("#define L1_DATA_LINESIZE 32\n"); | |||||
| printf("#define L2_SIZE 512488\n"); | |||||
| printf("#define L2_LINESIZE 32\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| printf("#define L2_ASSOCIATIVE 4\n"); | |||||
| break; | |||||
| } | |||||
| } | |||||
| void get_libname(void) | |||||
| { | |||||
| int d = detect(); | |||||
| switch (d) | |||||
| { | |||||
| case CPU_ARMV7: | |||||
| printf("armv7\n"); | |||||
| break; | |||||
| case CPU_ARMV6: | |||||
| printf("armv6\n"); | |||||
| break; | |||||
| } | |||||
| } | |||||
| void get_features(void) | |||||
| { | |||||
| #ifdef linux | |||||
| FILE *infile; | |||||
| char buffer[2048], *p,*t; | |||||
| p = (char *) NULL ; | |||||
| infile = fopen("/proc/cpuinfo", "r"); | |||||
| while (fgets(buffer, sizeof(buffer), infile)) | |||||
| { | |||||
| if (!strncmp("Features", buffer, 8)) | |||||
| { | |||||
| p = strchr(buffer, ':') + 2; | |||||
| break; | |||||
| } | |||||
| } | |||||
| fclose(infile); | |||||
| if( p == NULL ) return; | |||||
| t = strtok(p," "); | |||||
| while( t = strtok(NULL," ")) | |||||
| { | |||||
| if (!strcmp(t, "vfp")) { printf("HAVE_VFP=1\n"); continue; } | |||||
| if (!strcmp(t, "vfpv3")) { printf("HAVE_VFPV3=1\n"); continue; } | |||||
| if (!strcmp(t, "vfpv4")) { printf("HAVE_VFPV4=1\n"); continue; } | |||||
| if (!strcmp(t, "neon")) { printf("HAVE_NEON=1\n"); continue; } | |||||
| } | |||||
| #endif | |||||
| return; | |||||
| } | |||||
| @@ -687,23 +687,42 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define ARCHCONFIG "-DARMV7 " \ | #define ARCHCONFIG "-DARMV7 " \ | ||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | ||||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | ||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||||
| "-DHAVE_VFPV3 -DHAVE_VFP" | |||||
| #define LIBNAME "armv7" | #define LIBNAME "armv7" | ||||
| #define CORENAME "ARMV7" | #define CORENAME "ARMV7" | ||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_ARMV6 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM" | |||||
| #define SUBARCHITECTURE "ARMV6" | |||||
| #define SUBDIRNAME "arm" | |||||
| #define ARCHCONFIG "-DARMV6 " \ | |||||
| "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||||
| "-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||||
| "-DHAVE_VFP" | |||||
| #define LIBNAME "armv6" | |||||
| #define CORENAME "ARMV6" | |||||
| #else | |||||
| #endif | |||||
| #ifndef FORCE | #ifndef FORCE | ||||
| #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | #if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | ||||
| defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||||
| defined(__PPC__) || defined(PPC) || defined(_POWER) || defined(__POWERPC__) | |||||
| #ifndef POWER | #ifndef POWER | ||||
| #define POWER | #define POWER | ||||
| #endif | #endif | ||||
| #define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
| #endif | #endif | ||||
| #if defined(__i386__) || (__x86_64__) | #if defined(__i386__) || (__x86_64__) | ||||
| #include "cpuid_x86.c" | #include "cpuid_x86.c" | ||||
| #define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
| @@ -734,12 +753,16 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define OPENBLAS_SUPPORTED | #define OPENBLAS_SUPPORTED | ||||
| #endif | #endif | ||||
| #ifdef __arm__ | |||||
| #include "cpuid_arm.c" | |||||
| #define OPENBLAS_SUPPORTED | |||||
| #endif | |||||
| #ifndef OPENBLAS_SUPPORTED | #ifndef OPENBLAS_SUPPORTED | ||||
| #error "This arch/CPU is not supported by OpenBLAS." | #error "This arch/CPU is not supported by OpenBLAS." | ||||
| #endif | #endif | ||||
| #else | |||||
| #endif | #endif | ||||
| static int get_num_cores(void) { | static int get_num_cores(void) { | ||||
| @@ -788,7 +811,7 @@ int main(int argc, char *argv[]){ | |||||
| #ifdef FORCE | #ifdef FORCE | ||||
| printf("CORE=%s\n", CORENAME); | printf("CORE=%s\n", CORENAME); | ||||
| #else | #else | ||||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) | |||||
| #if defined(__i386__) || defined(__x86_64__) || defined(POWER) || defined(__mips__) || defined(__arm__) | |||||
| printf("CORE=%s\n", get_corename()); | printf("CORE=%s\n", get_corename()); | ||||
| #endif | #endif | ||||
| #endif | #endif | ||||
| @@ -803,6 +826,12 @@ int main(int argc, char *argv[]){ | |||||
| printf("NUM_CORES=%d\n", get_num_cores()); | printf("NUM_CORES=%d\n", get_num_cores()); | ||||
| #if defined(__arm__) && !defined(FORCE) | |||||
| get_features(); | |||||
| #endif | |||||
| #if defined(__i386__) || defined(__x86_64__) | #if defined(__i386__) || defined(__x86_64__) | ||||
| #ifndef FORCE | #ifndef FORCE | ||||
| get_sse(); | get_sse(); | ||||
| @@ -0,0 +1,134 @@ | |||||
| SAMAXKERNEL = amax.c | |||||
| DAMAXKERNEL = amax.c | |||||
| CAMAXKERNEL = zamax.c | |||||
| ZAMAXKERNEL = zamax.c | |||||
| SAMINKERNEL = amin.c | |||||
| DAMINKERNEL = amin.c | |||||
| CAMINKERNEL = zamin.c | |||||
| ZAMINKERNEL = zamin.c | |||||
| SMAXKERNEL = max.c | |||||
| DMAXKERNEL = max.c | |||||
| SMINKERNEL = min.c | |||||
| DMINKERNEL = min.c | |||||
| ISAMAXKERNEL = iamax.c | |||||
| IDAMAXKERNEL = iamax.c | |||||
| ICAMAXKERNEL = izamax.c | |||||
| IZAMAXKERNEL = izamax.c | |||||
| ISAMINKERNEL = iamin.c | |||||
| IDAMINKERNEL = iamin.c | |||||
| ICAMINKERNEL = izamin.c | |||||
| IZAMINKERNEL = izamin.c | |||||
| ISMAXKERNEL = imax.c | |||||
| IDMAXKERNEL = imax.c | |||||
| ISMINKERNEL = imin.c | |||||
| IDMINKERNEL = imin.c | |||||
| SASUMKERNEL = asum.c | |||||
| DASUMKERNEL = asum.c | |||||
| CASUMKERNEL = zasum.c | |||||
| ZASUMKERNEL = zasum.c | |||||
| SAXPYKERNEL = axpy.c | |||||
| DAXPYKERNEL = axpy.c | |||||
| CAXPYKERNEL = zaxpy.c | |||||
| ZAXPYKERNEL = zaxpy.c | |||||
| SCOPYKERNEL = copy.c | |||||
| DCOPYKERNEL = copy.c | |||||
| CCOPYKERNEL = zcopy.c | |||||
| ZCOPYKERNEL = zcopy.c | |||||
| SDOTKERNEL = dot.c | |||||
| DDOTKERNEL = dot.c | |||||
| CDOTKERNEL = zdot.c | |||||
| ZDOTKERNEL = zdot.c | |||||
| SNRM2KERNEL = nrm2.c | |||||
| DNRM2KERNEL = nrm2.c | |||||
| CNRM2KERNEL = znrm2.c | |||||
| ZNRM2KERNEL = znrm2.c | |||||
| SROTKERNEL = rot.c | |||||
| DROTKERNEL = rot.c | |||||
| CROTKERNEL = zrot.c | |||||
| ZROTKERNEL = zrot.c | |||||
| SSCALKERNEL = scal.c | |||||
| DSCALKERNEL = scal.c | |||||
| CSCALKERNEL = zscal.c | |||||
| ZSCALKERNEL = zscal.c | |||||
| SSWAPKERNEL = swap.c | |||||
| DSWAPKERNEL = swap.c | |||||
| CSWAPKERNEL = zswap.c | |||||
| ZSWAPKERNEL = zswap.c | |||||
| SGEMVNKERNEL = gemv_n.c | |||||
| DGEMVNKERNEL = gemv_n.c | |||||
| CGEMVNKERNEL = zgemv_n.c | |||||
| ZGEMVNKERNEL = zgemv_n.c | |||||
| SGEMVTKERNEL = gemv_t.c | |||||
| DGEMVTKERNEL = gemv_t.c | |||||
| CGEMVTKERNEL = zgemv_t.c | |||||
| ZGEMVTKERNEL = zgemv_t.c | |||||
| STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||||
| CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||||
| SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
| SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
| DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||||
| DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||||
| DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||||
| DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
| DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
| CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| CGEMMONCOPYOBJ = cgemm_oncopy.o | |||||
| CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||||
| ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||||
| ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||||
| ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||||
| ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||||
| ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||||
| STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
| ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
| ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
| ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
| @@ -1831,6 +1831,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #define SYMV_P 16 | |||||
| #endif | |||||
| #if defined(ARMV6) | |||||
| #define SNUMOPT 2 | |||||
| #define DNUMOPT 2 | |||||
| #define GEMM_DEFAULT_OFFSET_A 0 | |||||
| #define GEMM_DEFAULT_OFFSET_B 0 | |||||
| #define GEMM_DEFAULT_ALIGN 0x03fffUL | |||||
| #define SGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define SGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define DGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define CGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_M 2 | |||||
| #define ZGEMM_DEFAULT_UNROLL_N 2 | |||||
| #define SGEMM_DEFAULT_P 128 | |||||
| #define DGEMM_DEFAULT_P 128 | |||||
| #define CGEMM_DEFAULT_P 96 | |||||
| #define ZGEMM_DEFAULT_P 64 | |||||
| #define SGEMM_DEFAULT_Q 240 | |||||
| #define DGEMM_DEFAULT_Q 120 | |||||
| #define CGEMM_DEFAULT_Q 120 | |||||
| #define ZGEMM_DEFAULT_Q 120 | |||||
| #define SGEMM_DEFAULT_R 12288 | |||||
| #define DGEMM_DEFAULT_R 8192 | |||||
| #define CGEMM_DEFAULT_R 4096 | |||||
| #define ZGEMM_DEFAULT_R 4096 | |||||
| #define SYMV_P 16 | #define SYMV_P 16 | ||||
| #endif | #endif | ||||