@@ -12,7 +12,7 @@ VERSION = 0.2.8 | |||
# You can specify the target architecture, otherwise it's | |||
# automatically detected. | |||
# TARGET = PENRYN | |||
TARGET = ARMV7 | |||
# If you want to support multiple architecture in one binary | |||
# DYNAMIC_ARCH = 1 | |||
@@ -25,20 +25,20 @@ VERSION = 0.2.8 | |||
# FC = gfortran | |||
# Even you can specify cross compiler. Meanwhile, please set HOSTCC. | |||
# CC = x86_64-w64-mingw32-gcc | |||
# FC = x86_64-w64-mingw32-gfortran | |||
CC = arm-linux-gnueabihf-gcc | |||
FC = arm-linux-gnueabihf-gfortran | |||
# If you use the cross compiler, please set this host compiler. | |||
# HOSTCC = gcc | |||
HOSTCC = gcc | |||
# If you need 32bit binary, define BINARY=32, otherwise define BINARY=64 | |||
# BINARY=64 | |||
#BINARY=32 | |||
# About threaded BLAS. It will be automatically detected if you don't | |||
# specify it. | |||
# For force setting for single threaded, specify USE_THREAD = 0 | |||
# For force setting for multi threaded, specify USE_THREAD = 1 | |||
# USE_THREAD = 0 | |||
USE_THREAD = 0 | |||
# If you're going to use this library with OpenMP, please comment it in. | |||
# USE_OPENMP = 1 | |||
@@ -46,7 +46,7 @@ VERSION = 0.2.8 | |||
# You can define maximum number of threads. Basically it should be | |||
# less than actual number of cores. If you don't specify one, it's | |||
# automatically detected by the the script. | |||
# NUM_THREADS = 24 | |||
NUM_THREADS = 4 | |||
# if you don't need generate the shared library, please comment it in. | |||
# NO_SHARED = 1 | |||
@@ -54,16 +54,12 @@ VERSION = 0.2.8 | |||
# If you don't need CBLAS interface, please comment it in. | |||
# NO_CBLAS = 1 | |||
# If you only want CBLAS interface without installing Fortran compiler, | |||
# please comment it in. | |||
# ONLY_CBLAS = 1 | |||
# If you don't need LAPACK, please comment it in. | |||
# If you set NO_LAPACK=1, the library automatically sets NO_LAPACKE=1. | |||
# NO_LAPACK = 1 | |||
#NO_LAPACK = 1 | |||
# If you don't need LAPACKE (C Interface to LAPACK), please comment it in. | |||
# NO_LAPACKE = 1 | |||
#NO_LAPACKE = 1 | |||
# If you want to use legacy threaded Level 3 implementation. | |||
# USE_SIMPLE_THREADED_LEVEL3 = 1 | |||
@@ -76,10 +72,10 @@ VERSION = 0.2.8 | |||
# Unfortunately most of kernel won't give us high quality buffer. | |||
# BLAS tries to find the best region before entering main function, | |||
# but it will consume time. If you don't like it, you can disable one. | |||
# NO_WARMUP = 1 | |||
NO_WARMUP = 1 | |||
# If you want to disable CPU/Memory affinity on Linux. | |||
# NO_AFFINITY = 1 | |||
NO_AFFINITY = 1 | |||
# Don't use AVX kernel on Sandy Bridge. It is compatible with old compilers | |||
# and OS. However, the performance is low. | |||
@@ -127,13 +123,13 @@ VERSION = 0.2.8 | |||
# Common Optimization Flag; | |||
# The default -O2 is enough. | |||
# COMMON_OPT = -O2 | |||
COMMON_OPT = -O0 -marm -mfpu=vfpv3 -fno-omit-frame-pointer | |||
# Profiling flags | |||
COMMON_PROF = -pg | |||
# Build Debug version | |||
# DEBUG = 1 | |||
DEBUG = 1 | |||
# | |||
# End of user configuration | |||
@@ -82,19 +82,12 @@ ifeq ($(HOSTCC), loongcc) | |||
GETARCH_FLAGS += -static | |||
endif | |||
#if don't use Fortran, it will only compile CBLAS. | |||
ifeq ($(ONLY_CBLAS), 1) | |||
NO_LAPACK = 1 | |||
else | |||
ONLY_CBLAS = 0 | |||
endif | |||
# This operation is expensive, so execution should be once. | |||
ifndef GOTOBLAS_MAKEFILE | |||
export GOTOBLAS_MAKEFILE = 1 | |||
# Generating Makefile.conf and config.h | |||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) ONLY_CBLAS=$(ONLY_CBLAS) all) | |||
DUMMY := $(shell $(MAKE) -C $(TOPDIR) -f Makefile.prebuild CC="$(CC)" FC="$(FC)" HOSTCC="$(HOSTCC)" CFLAGS="$(GETARCH_FLAGS)" BINARY=$(BINARY) USE_OPENMP=$(USE_OPENMP) TARGET_CORE=$(TARGET_CORE) all) | |||
ifndef TARGET_CORE | |||
include $(TOPDIR)/Makefile.conf | |||
@@ -331,14 +324,16 @@ ifeq ($(ARCH), x86) | |||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | |||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
ifneq ($(NO_AVX), 1) | |||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
DYNAMIC_CORE += SANDYBRIDGE | |||
#BULLDOZER PILEDRIVER | |||
endif | |||
endif | |||
ifeq ($(ARCH), x86_64) | |||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | |||
ifneq ($(NO_AVX), 1) | |||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||
DYNAMIC_CORE += SANDYBRIDGE | |||
#BULLDOZER PILEDRIVER | |||
endif | |||
endif | |||
@@ -368,6 +363,10 @@ NO_BINARY_MODE = 1 | |||
BINARY_DEFINED = 1 | |||
endif | |||
ifeq ($(ARCH), arm) | |||
NO_BINARY_MODE = 1 | |||
BINARY_DEFINED = 1 | |||
endif | |||
# | |||
# C Compiler dependent settings | |||
# | |||
@@ -892,23 +891,6 @@ LIBZIPNAME = $(LIBNAME:.$(LIBSUFFIX)=.zip) | |||
LIBS = $(TOPDIR)/$(LIBNAME) | |||
LIBS_P = $(TOPDIR)/$(LIBNAME_P) | |||
LIB_COMPONENTS = BLAS | |||
ifneq ($(NO_CBLAS), 1) | |||
LIB_COMPONENTS += CBLAS | |||
endif | |||
ifneq ($(NO_LAPACK), 1) | |||
LIB_COMPONENTS += LAPACK | |||
ifneq ($(NO_LAPACKE), 1) | |||
LIB_COMPONENTS += LAPACKE | |||
endif | |||
endif | |||
ifeq ($(ONLY_CBLAS), 1) | |||
LIB_COMPONENTS = CBLAS | |||
endif | |||
export OSNAME | |||
export ARCH | |||
export CORE | |||
@@ -935,7 +917,6 @@ export USE_OPENMP | |||
export CROSS | |||
export CROSS_SUFFIX | |||
export NOFORTRAN | |||
export NO_FBLAS | |||
export EXTRALIB | |||
export CEXTRALIB | |||
export FEXTRALIB | |||
@@ -363,6 +363,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
#include "common_mips64.h" | |||
#endif | |||
#ifdef ARCH_ARM | |||
#include "common_arm.h" | |||
#endif | |||
#ifdef OS_LINUX | |||
#include "common_linux.h" | |||
#endif | |||
@@ -574,10 +578,9 @@ typedef struct { | |||
#include "common_level2.h" | |||
#include "common_level3.h" | |||
#include "common_lapack.h" | |||
#ifdef CBLAS | |||
# define OPENBLAS_CONST /* see comment in cblas.h */ | |||
# include "cblas.h" | |||
/* This header file is generated from "cblas.h" (see Makefile.prebuild). */ | |||
#include "cblas_noconst.h" | |||
#endif | |||
#ifndef ASSEMBLER | |||
@@ -0,0 +1,163 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the ISCAS nor the names of its contributors may | |||
be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#ifndef COMMON_ARM | |||
#define COMMON_ARM | |||
#define MB | |||
#define WMB | |||
#define INLINE inline | |||
#define RETURN_BY_COMPLEX | |||
#ifndef ASSEMBLER | |||
static void INLINE blas_lock(volatile unsigned long *address){ | |||
// long int ret, val = 1; | |||
/* | |||
do { | |||
while (*address) {YIELDING;}; | |||
__asm__ __volatile__( | |||
"1: ll %0, %3\n" | |||
" ori %2, %0, 1\n" | |||
" sc %2, %1\n" | |||
" beqz %2, 1b\n" | |||
" andi %2, %0, 1\n" | |||
" sync\n" | |||
: "=&r" (val), "=m" (address), "=&r" (ret) | |||
: "m" (address) | |||
: "memory"); | |||
} while (ret); | |||
*/ | |||
} | |||
static inline unsigned int rpcc(void){ | |||
unsigned long ret=0; | |||
return ret; | |||
} | |||
static inline int blas_quickdivide(blasint x, blasint y){ | |||
return x / y; | |||
} | |||
#if defined(DOUBLE) | |||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | |||
#else | |||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | |||
#endif | |||
#define GET_IMAGE_CANCEL | |||
#endif | |||
#ifndef F_INTERFACE | |||
#define REALNAME ASMNAME | |||
#else | |||
#define REALNAME ASMFNAME | |||
#endif | |||
#if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||
#define PROLOGUE \ | |||
.arm ;\ | |||
.global REALNAME ;\ | |||
.func REALNAME ;\ | |||
REALNAME: | |||
#define EPILOGUE | |||
#define PROFCODE | |||
#endif | |||
#define SEEK_ADDRESS | |||
#ifndef PAGESIZE | |||
#define PAGESIZE ( 4 << 10) | |||
#endif | |||
#define HUGE_PAGESIZE ( 4 << 20) | |||
#define BUFFER_SIZE (16 << 20) | |||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | |||
#ifndef MAP_ANONYMOUS | |||
#define MAP_ANONYMOUS MAP_ANON | |||
#endif | |||
#endif |
@@ -124,3 +124,9 @@ ARCH_IA64 | |||
#if defined(__LP64) || defined(__LP64__) || defined(__ptr64) || defined(__x86_64__) || defined(__amd64__) || defined(__64BIT__) | |||
BINARY_64 | |||
#endif | |||
#if defined(__ARM_ARCH) || defined(__ARM_ARCH_7A__) | |||
ARCH_ARM | |||
#endif | |||
@@ -679,6 +679,21 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "generic" | |||
#endif | |||
#ifdef FORCE_ARMV7 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM" | |||
#define SUBARCHITECTURE "ARMV7" | |||
#define SUBDIRNAME "arm" | |||
#define ARCHCONFIG "-DARMV7 " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " | |||
#define LIBNAME "armv7" | |||
#define CORENAME "ARMV7" | |||
#else | |||
#endif | |||
#ifndef FORCE | |||
#if defined(__powerpc__) || defined(__powerpc) || defined(powerpc) || \ | |||
@@ -1793,6 +1793,49 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define SYMV_P 16 | |||
#endif | |||
#ifdef ARMV7 | |||
#define SNUMOPT 2 | |||
#define DNUMOPT 2 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||
#define DGEMM_DEFAULT_UNROLL_M 8 | |||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 64 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 24 | |||
#define ZGEMM_DEFAULT_P 20 | |||
#define SGEMM_DEFAULT_Q 192 | |||
#define DGEMM_DEFAULT_Q 128 | |||
#define CGEMM_DEFAULT_Q 128 | |||
#define ZGEMM_DEFAULT_Q 64 | |||
#define SGEMM_DEFAULT_R 512 | |||
#define DGEMM_DEFAULT_R 2048 | |||
#define CGEMM_DEFAULT_R 512 | |||
#define ZGEMM_DEFAULT_R 512 | |||
#define SYMV_P 16 | |||
#endif | |||
#ifdef GENERIC | |||
#define SNUMOPT 2 | |||