power9 makefile. dgemm based on power8 kernel with following changes …tags/v0.3.6^2
@@ -9,7 +9,15 @@ else | |||
USE_OPENMP = 1 | |||
endif | |||
ifeq ($(CORE), POWER9) | |||
ifeq ($(USE_OPENMP), 1) | |||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -DUSE_OPENMP -fno-fast-math -fopenmp | |||
else | |||
COMMON_OPT += -Ofast -mcpu=power9 -mtune=power9 -mvsx -malign-power -fno-fast-math | |||
FCOMMON_OPT += -O2 -frecursive -mcpu=power9 -mtune=power9 -malign-power -fno-fast-math | |||
endif | |||
endif | |||
ifeq ($(CORE), POWER8) | |||
ifeq ($(USE_OPENMP), 1) | |||
@@ -48,6 +48,7 @@ POWER5 | |||
POWER6 | |||
POWER7 | |||
POWER8 | |||
POWER9 | |||
PPCG4 | |||
PPC970 | |||
PPC970MP | |||
@@ -348,6 +348,11 @@ typedef int blasint; | |||
#endif | |||
#endif | |||
#ifdef POWER9 | |||
#ifndef YIELDING | |||
#define YIELDING __asm__ __volatile__ ("nop;nop;nop;nop;nop;nop;nop;nop;\n"); | |||
#endif | |||
#endif | |||
/* | |||
#ifdef PILEDRIVER | |||
@@ -39,7 +39,7 @@ | |||
#ifndef COMMON_POWER | |||
#define COMMON_POWER | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#define MB __asm__ __volatile__ ("eieio":::"memory") | |||
#define WMB __asm__ __volatile__ ("eieio":::"memory") | |||
#else | |||
@@ -241,7 +241,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
#define HAVE_PREFETCH | |||
#endif | |||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || ( defined(PPC970) && defined(OS_DARWIN) ) | |||
#if defined(POWER3) || defined(POWER6) || defined(PPCG4) || defined(CELL) || defined(POWER8) || defined(POWER9) || ( defined(PPC970) && defined(OS_DARWIN) ) | |||
#define DCBT_ARG 0 | |||
#else | |||
#define DCBT_ARG 8 | |||
@@ -263,7 +263,7 @@ static inline int blas_quickdivide(blasint x, blasint y){ | |||
#define L1_PREFETCH dcbtst | |||
#endif | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#define L1_DUALFETCH | |||
#define L1_PREFETCHSIZE (16 + 128 * 100) | |||
#define L1_PREFETCH dcbtst | |||
@@ -812,7 +812,7 @@ Lmcount$lazy_ptr: | |||
#define BUFFER_SIZE ( 2 << 20) | |||
#elif defined(PPC440FP2) | |||
#define BUFFER_SIZE ( 16 << 20) | |||
#elif defined(POWER8) | |||
#elif defined(POWER8) || defined(POWER9) | |||
#define BUFFER_SIZE ( 64 << 20) | |||
#else | |||
#define BUFFER_SIZE ( 16 << 20) | |||
@@ -94,7 +94,7 @@ char *corename[] = { | |||
"CELL", | |||
"PPCG4", | |||
"POWER8", | |||
"POWER8" | |||
"POWER9" | |||
}; | |||
int detect(void){ | |||
@@ -124,7 +124,7 @@ int detect(void){ | |||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | |||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | |||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8; | |||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | |||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | |||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | |||
@@ -156,7 +156,7 @@ int detect(void){ | |||
if (!strncasecmp(p, "POWER6", 6)) return CPUTYPE_POWER6; | |||
if (!strncasecmp(p, "POWER7", 6)) return CPUTYPE_POWER6; | |||
if (!strncasecmp(p, "POWER8", 6)) return CPUTYPE_POWER8; | |||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER8; | |||
if (!strncasecmp(p, "POWER9", 6)) return CPUTYPE_POWER9; | |||
if (!strncasecmp(p, "Cell", 4)) return CPUTYPE_CELL; | |||
if (!strncasecmp(p, "7447", 4)) return CPUTYPE_PPCG4; | |||
return CPUTYPE_POWER5; | |||
@@ -180,7 +180,7 @@ int id; | |||
__asm __volatile("mfpvr %0" : "=r"(id)); | |||
switch ( id >> 16 ) { | |||
case 0x4e: // POWER9 | |||
return CPUTYPE_POWER8; | |||
return CPUTYPE_POWER9; | |||
break; | |||
case 0x4d: | |||
case 0x4b: // POWER8/8E | |||
@@ -637,6 +637,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define CORENAME "POWER8" | |||
#endif | |||
#if defined(FORCE_POWER9) | |||
#define FORCE | |||
#define ARCHITECTURE "POWER" | |||
#define SUBARCHITECTURE "POWER9" | |||
#define SUBDIRNAME "power" | |||
#define ARCHCONFIG "-DPOWER9 " \ | |||
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=128 " \ | |||
"-DL2_SIZE=4194304 -DL2_LINESIZE=128 " \ | |||
"-DDTB_DEFAULT_ENTRIES=128 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=8 " | |||
#define LIBNAME "power9" | |||
#define CORENAME "POWER9" | |||
#endif | |||
#ifdef FORCE_PPCG4 | |||
#define FORCE | |||
@@ -44,6 +44,10 @@ ifeq ($(CORE), POWER8) | |||
USE_TRMM = 1 | |||
endif | |||
ifeq ($(CORE), POWER9) | |||
USE_TRMM = 1 | |||
endif | |||
ifeq ($(ARCH), zarch) | |||
USE_TRMM = 1 | |||
endif | |||
@@ -0,0 +1,184 @@ | |||
#SGEMM_BETA = ../generic/gemm_beta.c | |||
#DGEMM_BETA = ../generic/gemm_beta.c | |||
#CGEMM_BETA = ../generic/zgemm_beta.c | |||
#ZGEMM_BETA = ../generic/zgemm_beta.c | |||
STRMMKERNEL = strmm_kernel_16x8_power8.S | |||
DTRMMKERNEL = dgemm_kernel_power9.S | |||
CTRMMKERNEL = ctrmm_kernel_8x4_power8.S | |||
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S | |||
SGEMMKERNEL = sgemm_kernel_16x8_power8.S | |||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
SGEMMITCOPY = sgemm_tcopy_16_power8.S | |||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMOTCOPY = sgemm_tcopy_8_power8.S | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
DGEMMKERNEL = dgemm_kernel_power9.S | |||
DGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
DGEMMITCOPY = dgemm_tcopy_16_power8.S | |||
DGEMMONCOPY = dgemm_ncopy_4_power8.S | |||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
DGEMMINCOPYOBJ = dgemm_incopy.o | |||
DGEMMITCOPYOBJ = dgemm_itcopy.o | |||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||
CGEMMKERNEL = cgemm_kernel_8x4_power8.S | |||
CGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
CGEMMITCOPY = cgemm_tcopy_8_power8.S | |||
CGEMMONCOPY = ../generic/zgemm_ncopy_4.c | |||
CGEMMOTCOPY = ../generic/zgemm_tcopy_4.c | |||
CGEMMONCOPYOBJ = cgemm_oncopy.o | |||
CGEMMOTCOPYOBJ = cgemm_otcopy.o | |||
CGEMMINCOPYOBJ = cgemm_incopy.o | |||
CGEMMITCOPYOBJ = cgemm_itcopy.o | |||
ZGEMMKERNEL = zgemm_kernel_8x2_power8.S | |||
ZGEMMONCOPY = ../generic/zgemm_ncopy_2.c | |||
ZGEMMOTCOPY = ../generic/zgemm_tcopy_2.c | |||
ZGEMMINCOPY = ../generic/zgemm_ncopy_8.c | |||
ZGEMMITCOPY = zgemm_tcopy_8_power8.S | |||
ZGEMMONCOPYOBJ = zgemm_oncopy.o | |||
ZGEMMOTCOPYOBJ = zgemm_otcopy.o | |||
ZGEMMINCOPYOBJ = zgemm_incopy.o | |||
ZGEMMITCOPYOBJ = zgemm_itcopy.o | |||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
DTRSMKERNEL_LT = dtrsm_kernel_LT_16x4_power8.S | |||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||
ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||
ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||
ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
#Todo: CGEMM3MKERNEL should be 4x4 blocksizes. | |||
#CGEMM3MKERNEL = zgemm3m_kernel_8x4_sse3.S | |||
#ZGEMM3MKERNEL = zgemm3m_kernel_4x4_sse3.S | |||
#Pure C for other kernels | |||
#SAMAXKERNEL = ../arm/amax.c | |||
#DAMAXKERNEL = ../arm/amax.c | |||
#CAMAXKERNEL = ../arm/zamax.c | |||
#ZAMAXKERNEL = ../arm/zamax.c | |||
# | |||
#SAMINKERNEL = ../arm/amin.c | |||
#DAMINKERNEL = ../arm/amin.c | |||
#CAMINKERNEL = ../arm/zamin.c | |||
#ZAMINKERNEL = ../arm/zamin.c | |||
# | |||
#SMAXKERNEL = ../arm/max.c | |||
#DMAXKERNEL = ../arm/max.c | |||
# | |||
#SMINKERNEL = ../arm/min.c | |||
#DMINKERNEL = ../arm/min.c | |||
# | |||
ISAMAXKERNEL = isamax.c | |||
IDAMAXKERNEL = idamax.c | |||
ICAMAXKERNEL = icamax.c | |||
IZAMAXKERNEL = izamax.c | |||
# | |||
ISAMINKERNEL = isamin.c | |||
IDAMINKERNEL = idamin.c | |||
ICAMINKERNEL = icamin.c | |||
IZAMINKERNEL = izamin.c | |||
# | |||
#ISMAXKERNEL = ../arm/imax.c | |||
#IDMAXKERNEL = ../arm/imax.c | |||
# | |||
#ISMINKERNEL = ../arm/imin.c | |||
#IDMINKERNEL = ../arm/imin.c | |||
# | |||
SASUMKERNEL = sasum.c | |||
DASUMKERNEL = dasum.c | |||
CASUMKERNEL = casum.c | |||
ZASUMKERNEL = zasum.c | |||
# | |||
SAXPYKERNEL = saxpy.c | |||
DAXPYKERNEL = daxpy.c | |||
CAXPYKERNEL = caxpy.c | |||
ZAXPYKERNEL = zaxpy.c | |||
# | |||
SCOPYKERNEL = scopy.c | |||
DCOPYKERNEL = dcopy.c | |||
CCOPYKERNEL = ccopy.c | |||
ZCOPYKERNEL = zcopy.c | |||
# | |||
SDOTKERNEL = sdot.c | |||
DDOTKERNEL = ddot.c | |||
DSDOTKERNEL = sdot.c | |||
CDOTKERNEL = cdot.c | |||
ZDOTKERNEL = zdot.c | |||
# | |||
SNRM2KERNEL = ../arm/nrm2.c | |||
DNRM2KERNEL = ../arm/nrm2.c | |||
CNRM2KERNEL = ../arm/znrm2.c | |||
ZNRM2KERNEL = ../arm/znrm2.c | |||
# | |||
SROTKERNEL = srot.c | |||
DROTKERNEL = drot.c | |||
CROTKERNEL = crot.c | |||
ZROTKERNEL = zrot.c | |||
# | |||
SSCALKERNEL = sscal.c | |||
DSCALKERNEL = dscal.c | |||
CSCALKERNEL = zscal.c | |||
ZSCALKERNEL = zscal.c | |||
# | |||
SSWAPKERNEL = sswap.c | |||
DSWAPKERNEL = dswap.c | |||
CSWAPKERNEL = cswap.c | |||
ZSWAPKERNEL = zswap.c | |||
# | |||
SGEMVNKERNEL = sgemv_n.c | |||
DGEMVNKERNEL = dgemv_n.c | |||
CGEMVNKERNEL = cgemv_n.c | |||
ZGEMVNKERNEL = zgemv_n_4.c | |||
# | |||
SGEMVTKERNEL = sgemv_t.c | |||
DGEMVTKERNEL = dgemv_t.c | |||
CGEMVTKERNEL = cgemv_t.c | |||
ZGEMVTKERNEL = zgemv_t_4.c | |||
#SSYMV_U_KERNEL = ../generic/symv_k.c | |||
#SSYMV_L_KERNEL = ../generic/symv_k.c | |||
#DSYMV_U_KERNEL = ../generic/symv_k.c | |||
#DSYMV_L_KERNEL = ../generic/symv_k.c | |||
#QSYMV_U_KERNEL = ../generic/symv_k.c | |||
#QSYMV_L_KERNEL = ../generic/symv_k.c | |||
#CSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
#CSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
#ZSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
#ZSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
#XSYMV_U_KERNEL = ../generic/zsymv_k.c | |||
#XSYMV_L_KERNEL = ../generic/zsymv_k.c | |||
#ZHEMV_U_KERNEL = ../generic/zhemv_k.c | |||
#ZHEMV_L_KERNEL = ../generic/zhemv_k.c | |||
LSAME_KERNEL = ../generic/lsame.c | |||
SCABS_KERNEL = ../generic/cabs.c | |||
DCABS_KERNEL = ../generic/cabs.c | |||
QCABS_KERNEL = ../generic/cabs.c | |||
#Dump kernel | |||
CGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c | |||
ZGEMM3MKERNEL = ../generic/zgemm3mkernel_dump.c |
@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "casum_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "ccopy_microk_power8.c" | |||
#endif | |||
@@ -27,7 +27,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
static void crot_kernel_8 (long n, float *x, float *y, float c, float s) | |||
{ | |||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "cswap_microk_power8.c" | |||
#endif | |||
@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "dasum_microk_power8.c" | |||
#endif | |||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "daxpy_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "dcopy_microk_power8.c" | |||
#endif | |||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "ddot_microk_power8.c" | |||
#endif | |||
@@ -0,0 +1,249 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2013-2019, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
#include "def_vsx.h" | |||
#define LOAD ld | |||
#define STACKSIZE (512 ) | |||
#define ALPHA_SP (296+192)(SP) | |||
#define FZERO (304+192)(SP) | |||
#define M r3 | |||
#define N r4 | |||
#define K r5 | |||
#define A r7 | |||
#define B r8 | |||
#define C r9 | |||
#define LDC r10 | |||
#define OFFSET r6 | |||
#define alpha_r vs18 | |||
#define o0 0 | |||
#define T4 r12 | |||
#define T3 r11 | |||
#define C4 r14 | |||
#define o8 r15 | |||
#define o24 r16 | |||
#define C2 r17 | |||
#define L r18 | |||
#define T1 r19 | |||
#define C3 r20 | |||
#define TEMP_REG r21 | |||
#define I r22 | |||
#define J r23 | |||
#define AO r24 | |||
#define BO r25 | |||
#define CO r26 | |||
#define o16 r27 | |||
#define o32 r28 | |||
#define o48 r29 | |||
#define PRE r30 | |||
#define T2 r31 | |||
#include "dgemm_macros_power9.S" | |||
#ifndef NEEDPARAM | |||
PROLOGUE | |||
PROFCODE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
stfd f14, 0(SP) | |||
stfd f15, 8(SP) | |||
stfd f16, 16(SP) | |||
stfd f17, 24(SP) | |||
stfd f18, 32(SP) | |||
stfd f19, 40(SP) | |||
stfd f20, 48(SP) | |||
stfd f21, 56(SP) | |||
stfd f22, 64(SP) | |||
stfd f23, 72(SP) | |||
stfd f24, 80(SP) | |||
stfd f25, 88(SP) | |||
stfd f26, 96(SP) | |||
stfd f27, 104(SP) | |||
stfd f28, 112(SP) | |||
stfd f29, 120(SP) | |||
stfd f30, 128(SP) | |||
stfd f31, 136(SP) | |||
std r31, 144(SP) | |||
std r30, 152(SP) | |||
std r29, 160(SP) | |||
std r28, 168(SP) | |||
std r27, 176(SP) | |||
std r26, 184(SP) | |||
std r25, 192(SP) | |||
std r24, 200(SP) | |||
std r23, 208(SP) | |||
std r22, 216(SP) | |||
std r21, 224(SP) | |||
std r20, 232(SP) | |||
std r19, 240(SP) | |||
std r18, 248(SP) | |||
std r17, 256(SP) | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
stxv v20, 288(SP) | |||
stxv v21, 304(SP) | |||
stxv v22, 320(SP) | |||
stxv v23, 336(SP) | |||
stxv v24, 352(SP) | |||
stxv v25, 368(SP) | |||
stxv v26, 384(SP) | |||
stxv v27, 400(SP) | |||
stxv v28, 416(SP) | |||
stxv v29, 432(SP) | |||
stxv v30, 448(SP) | |||
stxv v31, 464(SP) | |||
stfd f1, ALPHA_SP | |||
stw r0, FZERO | |||
slwi LDC, LDC, BASE_SHIFT | |||
#if defined(TRMMKERNEL) | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
cmpwi cr0, M, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, N, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, K, 0 | |||
ble .L999_H1 | |||
addi T1, SP, 296+192 | |||
li PRE, 384 | |||
li o8 , 8 | |||
li o16, 16 | |||
li o24, 24 | |||
li o32, 32 | |||
li o48, 48 | |||
lxvdsx alpha_r, 0, T1 | |||
#include "dgemm_logic_power9.S" | |||
.L999: | |||
addi r3, 0, 0 | |||
lfd f14, 0(SP) | |||
lfd f15, 8(SP) | |||
lfd f16, 16(SP) | |||
lfd f17, 24(SP) | |||
lfd f18, 32(SP) | |||
lfd f19, 40(SP) | |||
lfd f20, 48(SP) | |||
lfd f21, 56(SP) | |||
lfd f22, 64(SP) | |||
lfd f23, 72(SP) | |||
lfd f24, 80(SP) | |||
lfd f25, 88(SP) | |||
lfd f26, 96(SP) | |||
lfd f27, 104(SP) | |||
lfd f28, 112(SP) | |||
lfd f29, 120(SP) | |||
lfd f30, 128(SP) | |||
lfd f31, 136(SP) | |||
ld r31, 144(SP) | |||
ld r30, 152(SP) | |||
ld r29, 160(SP) | |||
ld r28, 168(SP) | |||
ld r27, 176(SP) | |||
ld r26, 184(SP) | |||
ld r25, 192(SP) | |||
ld r24, 200(SP) | |||
ld r23, 208(SP) | |||
ld r22, 216(SP) | |||
ld r21, 224(SP) | |||
ld r20, 232(SP) | |||
ld r19, 240(SP) | |||
ld r18, 248(SP) | |||
ld r17, 256(SP) | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
lxv v20, 288(SP) | |||
lxv v21, 304(SP) | |||
lxv v22, 320(SP) | |||
lxv v23, 336(SP) | |||
lxv v24, 352(SP) | |||
lxv v25, 368(SP) | |||
lxv v26, 384(SP) | |||
lxv v27, 400(SP) | |||
lxv v28, 416(SP) | |||
lxv v29, 432(SP) | |||
lxv v30, 448(SP) | |||
lxv v31, 464(SP) | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
#endif |
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "dgemv_n_microk_power8.c" | |||
#endif | |||
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#pragma GCC optimize "O1" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "drot_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "dscal_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "dswap_microk_power8.c" | |||
#endif | |||
@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "sasum_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "scopy_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "sdot_microk_power8.c" | |||
#endif | |||
@@ -39,7 +39,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#pragma GCC optimize "O1" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "srot_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "sscal_microk_power8.c" | |||
#endif | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "sswap_microk_power8.c" | |||
#endif | |||
@@ -46,7 +46,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "zasum_microk_power8.c" | |||
#endif | |||
@@ -36,19 +36,18 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "zaxpy_microk_power8.c" | |||
#endif | |||
#ifndef HAVE_KERNEL_4 | |||
static void zaxpy_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *alpha) | |||
static void zaxpy_kernel_4(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT da_r,FLOAT da_i) | |||
{ | |||
BLASLONG register i = 0; | |||
BLASLONG register ix = 0; | |||
FLOAT da_r = alpha[0]; | |||
FLOAT da_i = alpha[1]; | |||
while(i < n) | |||
@@ -35,7 +35,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "zcopy_microk_power8.c" | |||
#endif | |||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "zdot_microk_power8.c" | |||
#endif | |||
@@ -38,7 +38,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#pragma GCC optimize "O1" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#if defined(DOUBLE) | |||
#include "zscal_microk_power8.c" | |||
#endif | |||
@@ -36,7 +36,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#include "common.h" | |||
#if defined(POWER8) | |||
#if defined(POWER8) || defined(POWER9) | |||
#include "zswap_microk_power8.c" | |||
#endif | |||
@@ -2230,6 +2230,37 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#endif | |||
#if defined(POWER9) | |||
#define SNUMOPT 16 | |||
#define DNUMOPT 8 | |||
#define GEMM_DEFAULT_OFFSET_A 0 | |||
#define GEMM_DEFAULT_OFFSET_B 65536 | |||
#define GEMM_DEFAULT_ALIGN 0x0ffffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||
#define SGEMM_DEFAULT_UNROLL_N 8 | |||
#define DGEMM_DEFAULT_UNROLL_M 16 | |||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||
#define CGEMM_DEFAULT_UNROLL_M 8 | |||
#define CGEMM_DEFAULT_UNROLL_N 4 | |||
#define ZGEMM_DEFAULT_UNROLL_M 8 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 1280 | |||
#define DGEMM_DEFAULT_P 128 | |||
#define CGEMM_DEFAULT_P 640 | |||
#define ZGEMM_DEFAULT_P 320 | |||
#define SGEMM_DEFAULT_Q 640 | |||
#define DGEMM_DEFAULT_Q 384 | |||
#define CGEMM_DEFAULT_Q 640 | |||
#define ZGEMM_DEFAULT_Q 640 | |||
#define SYMV_P 8 | |||
#endif | |||
#if defined(SPARC) && defined(V7) | |||