@@ -0,0 +1,7 @@ | |||
ifeq ($(CORE), ARMV8) | |||
CCOMMON_OPT += -march=armv8-a | |||
FCOMMON_OPT += -march=armv8-a | |||
endif | |||
@@ -367,6 +367,14 @@ ifeq ($(ARCH), arm) | |||
NO_BINARY_MODE = 1 | |||
BINARY_DEFINED = 1 | |||
endif | |||
ifeq ($(ARCH), arm64) | |||
NO_BINARY_MODE = 1 | |||
BINARY_DEFINED = 1 | |||
endif | |||
# | |||
# C Compiler dependent settings | |||
# | |||
@@ -64,6 +64,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$defined = 0; | |||
@@ -151,6 +152,7 @@ $architecture = alpha if ($data =~ /ARCH_ALPHA/); | |||
$architecture = sparc if ($data =~ /ARCH_SPARC/); | |||
$architecture = ia64 if ($data =~ /ARCH_IA64/); | |||
$architecture = arm if ($data =~ /ARCH_ARM/); | |||
$architecture = arm64 if ($data =~ /ARCH_ARM64/); | |||
$binformat = bin32; | |||
$binformat = bin64 if ($data =~ /BINARY_64/); | |||
@@ -311,7 +311,7 @@ typedef int blasint; | |||
#endif | |||
#ifdef ARMV7 | |||
#if defined(ARMV7) || defined(ARMV6) || defined(ARMV8) | |||
#define YIELDING asm volatile ("nop;nop;nop;nop;nop;nop;nop;nop; \n"); | |||
#endif | |||
@@ -375,6 +375,10 @@ please https://github.com/xianyi/OpenBLAS/issues/246 | |||
#include "common_arm.h" | |||
#endif | |||
#ifdef ARCH_ARM64 | |||
#include "common_arm64.h" | |||
#endif | |||
#ifdef OS_LINUX | |||
#include "common_linux.h" | |||
#endif | |||
@@ -0,0 +1,169 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the ISCAS nor the names of its contributors may | |||
be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#ifndef COMMON_ARM64 | |||
#define COMMON_ARM64 | |||
#define MB | |||
#define WMB | |||
#define INLINE inline | |||
#define RETURN_BY_COMPLEX | |||
#ifndef ASSEMBLER | |||
static void __inline blas_lock(volatile BLASULONG *address){ | |||
/* | |||
int register ret; | |||
do { | |||
while (*address) {YIELDING;}; | |||
__asm__ __volatile__( | |||
"ldrex r2, [%1] \n\t" | |||
"mov r2, #0 \n\t" | |||
"strex r3, r2, [%1] \n\t" | |||
"mov %0 , r3 \n\t" | |||
: "=r"(ret), "=r"(address) | |||
: "1"(address) | |||
: "memory", "r2" , "r3" | |||
); | |||
} while (ret); | |||
*/ | |||
} | |||
static inline unsigned long long rpcc(void){ | |||
unsigned long long ret=0; | |||
double v; | |||
struct timeval tv; | |||
gettimeofday(&tv,NULL); | |||
v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; | |||
ret = (unsigned long long) ( v * 1000.0d ); | |||
return ret; | |||
} | |||
static inline int blas_quickdivide(blasint x, blasint y){ | |||
return x / y; | |||
} | |||
#if defined(DOUBLE) | |||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f64 d1, %0" : "=m"(res) : : "memory") | |||
#else | |||
#define GET_IMAGE(res) __asm__ __volatile__("vstr.f32 s1, %0" : "=m"(res) : : "memory") | |||
#endif | |||
#define GET_IMAGE_CANCEL | |||
#endif | |||
#ifndef F_INTERFACE | |||
#define REALNAME ASMNAME | |||
#else | |||
#define REALNAME ASMFNAME | |||
#endif | |||
#if defined(ASSEMBLER) && !defined(NEEDPARAM) | |||
#define PROLOGUE \ | |||
.arm ;\ | |||
.global REALNAME ;\ | |||
.func REALNAME ;\ | |||
REALNAME: | |||
#define EPILOGUE | |||
#define PROFCODE | |||
#endif | |||
#define SEEK_ADDRESS | |||
#ifndef PAGESIZE | |||
#define PAGESIZE ( 4 << 10) | |||
#endif | |||
#define HUGE_PAGESIZE ( 4 << 20) | |||
#define BUFFER_SIZE (16 << 20) | |||
#define BASE_ADDRESS (START_ADDRESS - BUFFER_SIZE * MAX_CPU_NUMBER) | |||
#ifndef MAP_ANONYMOUS | |||
#define MAP_ANONYMOUS MAP_ANON | |||
#endif | |||
#endif |
@@ -129,4 +129,7 @@ BINARY_64 | |||
ARCH_ARM | |||
#endif | |||
#if defined(__aarch64__) | |||
ARCH_ARM64 | |||
#endif | |||
@@ -709,6 +709,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#else | |||
#endif | |||
#ifdef FORCE_ARMV8 | |||
#define FORCE | |||
#define ARCHITECTURE "ARM64" | |||
#define SUBARCHITECTURE "ARMV8" | |||
#define SUBDIRNAME "arm64" | |||
#define ARCHCONFIG "-DARMV8 " \ | |||
"-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=32 " \ | |||
"-DL2_SIZE=512488 -DL2_LINESIZE=32 " \ | |||
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=4 " \ | |||
"-DHAVE_VFP -DHAVE_VFPV3 -DHAVE_VFPV4" | |||
#define LIBNAME "armv8" | |||
#define CORENAME "ARMV8" | |||
#else | |||
#endif | |||
#ifndef FORCE | |||
@@ -18,6 +18,10 @@ ifeq ($(ARCH), arm) | |||
USE_TRMM = 1 | |||
endif | |||
ifeq ($(ARCH), arm64) | |||
USE_TRMM = 1 | |||
endif | |||
ifeq ($(TARGET), LOONGSON3B) | |||
USE_TRMM = 1 | |||
endif | |||
@@ -0,0 +1,46 @@ | |||
ifndef SNRM2KERNEL | |||
SNRM2KERNEL = nrm2.c | |||
endif | |||
ifndef DNRM2KERNEL | |||
DNRM2KERNEL = nrm2.c | |||
endif | |||
ifndef CNRM2KERNEL | |||
CNRM2KERNEL = znrm2.c | |||
endif | |||
ifndef ZNRM2KERNEL | |||
ZNRM2KERNEL = znrm2.c | |||
endif | |||
ifndef SCABS_KERNEL | |||
SCABS_KERNEL = ../generic/cabs.c | |||
endif | |||
ifndef DCABS_KERNEL | |||
DCABS_KERNEL = ../generic/cabs.c | |||
endif | |||
ifndef QCABS_KERNEL | |||
QCABS_KERNEL = ../generic/cabs.c | |||
endif | |||
ifndef LSAME_KERNEL | |||
LSAME_KERNEL = ../generic/lsame.c | |||
endif | |||
ifndef SGEMM_BETA | |||
SGEMM_BETA = ../generic/gemm_beta.c | |||
endif | |||
ifndef DGEMM_BETA | |||
DGEMM_BETA = ../generic/gemm_beta.c | |||
endif | |||
ifndef CGEMM_BETA | |||
CGEMM_BETA = ../generic/zgemm_beta.c | |||
endif | |||
ifndef ZGEMM_BETA | |||
ZGEMM_BETA = ../generic/zgemm_beta.c | |||
endif | |||
@@ -0,0 +1,134 @@ | |||
SAMAXKERNEL = ../arm/amax.c | |||
DAMAXKERNEL = ../arm/amax.c | |||
CAMAXKERNEL = ../arm/zamax.c | |||
ZAMAXKERNEL = ../arm/zamax.c | |||
SAMINKERNEL = ../arm/amin.c | |||
DAMINKERNEL = ../arm/amin.c | |||
CAMINKERNEL = ../arm/zamin.c | |||
ZAMINKERNEL = ../arm/zamin.c | |||
SMAXKERNEL = ../arm/max.c | |||
DMAXKERNEL = ../arm/max.c | |||
SMINKERNEL = ../arm/min.c | |||
DMINKERNEL = ../arm/min.c | |||
ISAMAXKERNEL = ../arm/iamax.c | |||
IDAMAXKERNEL = ../arm/iamax.c | |||
ICAMAXKERNEL = ../arm/izamax.c | |||
IZAMAXKERNEL = ../arm/izamax.c | |||
ISAMINKERNEL = ../arm/iamin.c | |||
IDAMINKERNEL = ../arm/iamin.c | |||
ICAMINKERNEL = ../arm/izamin.c | |||
IZAMINKERNEL = ../arm/izamin.c | |||
ISMAXKERNEL = ../arm/imax.c | |||
IDMAXKERNEL = ../arm/imax.c | |||
ISMINKERNEL = ../arm/imin.c | |||
IDMINKERNEL = ../arm/imin.c | |||
SASUMKERNEL = ../arm/asum.c | |||
DASUMKERNEL = ../arm/asum.c | |||
CASUMKERNEL = ../arm/zasum.c | |||
ZASUMKERNEL = ../arm/zasum.c | |||
SAXPYKERNEL = ../arm/axpy.c | |||
DAXPYKERNEL = ../arm/axpy.c | |||
CAXPYKERNEL = ../arm/zaxpy.c | |||
ZAXPYKERNEL = ../arm/zaxpy.c | |||
SCOPYKERNEL = ../arm/copy.c | |||
DCOPYKERNEL = ../arm/copy.c | |||
CCOPYKERNEL = ../arm/zcopy.c | |||
ZCOPYKERNEL = ../arm/zcopy.c | |||
SDOTKERNEL = ../arm/dot.c | |||
DDOTKERNEL = ../arm/dot.c | |||
CDOTKERNEL = ../arm/zdot.c | |||
ZDOTKERNEL = ../arm/zdot.c | |||
SNRM2KERNEL = ../arm/nrm2.c | |||
DNRM2KERNEL = ../arm/nrm2.c | |||
CNRM2KERNEL = ../arm/znrm2.c | |||
ZNRM2KERNEL = ../arm/znrm2.c | |||
SROTKERNEL = ../arm/rot.c | |||
DROTKERNEL = ../arm/rot.c | |||
CROTKERNEL = ../arm/zrot.c | |||
ZROTKERNEL = ../arm/zrot.c | |||
SSCALKERNEL = ../arm/scal.c | |||
DSCALKERNEL = ../arm/scal.c | |||
CSCALKERNEL = ../arm/zscal.c | |||
ZSCALKERNEL = ../arm/zscal.c | |||
SSWAPKERNEL = ../arm/swap.c | |||
DSWAPKERNEL = ../arm/swap.c | |||
CSWAPKERNEL = ../arm/zswap.c | |||
ZSWAPKERNEL = ../arm/zswap.c | |||
SGEMVNKERNEL = ../arm/gemv_n.c | |||
DGEMVNKERNEL = ../arm/gemv_n.c | |||
CGEMVNKERNEL = ../arm/zgemv_n.c | |||
ZGEMVNKERNEL = ../arm/zgemv_n.c | |||
SGEMVTKERNEL = ../arm/gemv_t.c | |||
DGEMVTKERNEL = ../arm/gemv_t.c | |||
CGEMVTKERNEL = ../arm/zgemv_t.c | |||
ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
STRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
DTRMMKERNEL = ../generic/trmmkernel_2x2.c | |||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
SGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = ../generic/gemmkernel_2x2.c | |||
DGEMMONCOPY = ../generic/gemm_ncopy_2.c | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_2.c | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
CGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
ZGEMMKERNEL = ../generic/zgemmkernel_2x2.c | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
@@ -0,0 +1,2 @@ | |||
clean :: | |||
@@ -0,0 +1,33 @@ | |||
TOPDIR = ../../.. | |||
include ../../../Makefile.system | |||
ifeq ($(CORE), CORE2) | |||
LASWP = ../generic/laswp_k_2.c | |||
ZLASWP = ../generic/zlaswp_k_2.c | |||
endif | |||
ifeq ($(CORE), OPTERON) | |||
LASWP = ../generic/laswp_k_1.c | |||
ZLASWP = ../generic/zlaswp_k_1.c | |||
endif | |||
ifeq ($(CORE), PRESCOTT) | |||
LASWP = ../generic/laswp_k_1.c | |||
ZLASWP = ../generic/zlaswp_k_1.c | |||
endif | |||
ifeq ($(DYNAMIC_ARCH), 1) | |||
LASWP = ../generic/laswp_k_4.c | |||
ZLASWP = ../generic/zlaswp_k_4.c | |||
endif | |||
ifndef LASWP | |||
LASWP = ../generic/laswp_k.c | |||
endif | |||
ifndef ZLASWP | |||
ZLASWP = ../generic/zlaswp_k.c | |||
endif | |||
include ../generic/Makefile | |||
@@ -1874,6 +1874,46 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define SYMV_P 16 | |||
#endif | |||
#if defined(ARMV8) | |||
#define SNUMOPT 2 | |||
#define DNUMOPT 2 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 0 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||
#define SGEMM_DEFAULT_UNROLL_N 2 | |||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||
#define DGEMM_DEFAULT_UNROLL_N 2 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
#define CGEMM_DEFAULT_UNROLL_N 2 | |||
#define ZGEMM_DEFAULT_UNROLL_M 2 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 128 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 96 | |||
#define ZGEMM_DEFAULT_P 64 | |||
#define SGEMM_DEFAULT_Q 240 | |||
#define DGEMM_DEFAULT_Q 120 | |||
#define CGEMM_DEFAULT_Q 120 | |||
#define ZGEMM_DEFAULT_Q 120 | |||
#define SGEMM_DEFAULT_R 12288 | |||
#define DGEMM_DEFAULT_R 8192 | |||
#define CGEMM_DEFAULT_R 4096 | |||
#define ZGEMM_DEFAULT_R 4096 | |||
#define SYMV_P 16 | |||
#endif | |||
#ifdef GENERIC | |||