@@ -3,14 +3,18 @@ | |||
#CGEMM_BETA = ../generic/zgemm_beta.c | |||
#ZGEMM_BETA = ../generic/zgemm_beta.c | |||
STRMMKERNEL = gemm_kernel_power6.S | |||
STRMMKERNEL = strmm_kernel_16x8_power8.S | |||
DTRMMKERNEL = dtrmm_kernel_16x4_power8.S | |||
CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c | |||
ZTRMMKERNEL = ztrmm_kernel_8x2_power8.S | |||
SGEMMKERNEL = gemm_kernel_power6.S | |||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||
SGEMMKERNEL = sgemm_kernel_16x8_power8.S | |||
SGEMMINCOPY = ../generic/gemm_ncopy_16.c | |||
SGEMMITCOPY = ../generic/gemm_tcopy_16.c | |||
SGEMMONCOPY = ../generic/gemm_ncopy_8.c | |||
SGEMMOTCOPY = ../generic/gemm_tcopy_8.c | |||
SGEMMINCOPYOBJ = sgemm_incopy.o | |||
SGEMMITCOPYOBJ = sgemm_itcopy.o | |||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||
@@ -146,7 +150,7 @@ ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||
#SGEMVTKERNEL = ../arm/gemv_t.c | |||
#DGEMVTKERNEL = ../arm/gemv_t.c | |||
#CGEMVTKERNEL = ../arm/zgemv_t.c | |||
#ZGEMVTKERNEL = ../arm/zgemv_t.c | |||
ZGEMVTKERNEL = zgemv_t_4.c | |||
#SSYMV_U_KERNEL = ../generic/symv_k.c | |||
@@ -0,0 +1,354 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2013-2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
/************************************************************************************** | |||
* 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||
* BLASTEST : OK | |||
* CTEST : OK | |||
* TEST : OK | |||
**************************************************************************************/ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
#include "def_vsx.h" | |||
#ifndef __64BIT__ | |||
#define LOAD lwz | |||
#else | |||
#define LOAD ld | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
#define FZERO 232(SP) | |||
#endif | |||
#define M r3 | |||
#define N r4 | |||
#define K r5 | |||
#ifdef linux | |||
#ifndef __64BIT__ | |||
#define A r6 | |||
#define B r7 | |||
#define C r8 | |||
#define LDC r9 | |||
#define OFFSET r10 | |||
#else | |||
#define A r7 | |||
#define B r8 | |||
#define C r9 | |||
#define LDC r10 | |||
#define OFFSET r6 | |||
#endif | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#if !defined(__64BIT__) && defined(DOUBLE) | |||
#define A r8 | |||
#define B r9 | |||
#define C r10 | |||
#define LDC r7 | |||
#define OFFSET r6 | |||
#else | |||
#define A r7 | |||
#define B r8 | |||
#define C r9 | |||
#define LDC r10 | |||
#define OFFSET r6 | |||
#endif | |||
#endif | |||
#define alpha_r vs30 | |||
#define alpha_vr vs31 | |||
#define o0 0 | |||
#define o4 r15 | |||
#define o12 r16 | |||
#define o8 r17 | |||
#define L r18 | |||
#define T1 r19 | |||
#define KK r20 | |||
#define BB r21 | |||
#define I r22 | |||
#define J r23 | |||
#define AO r24 | |||
#define BO r25 | |||
#define CO r26 | |||
#define o16 r27 | |||
#define o32 r28 | |||
#define o48 r29 | |||
#define PRE r30 | |||
#define T2 r31 | |||
#include "sgemm_macros_16x8_power8.S" | |||
#ifndef NEEDPARAM | |||
PROLOGUE | |||
PROFCODE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
stfd f14, 0(SP) | |||
stfd f15, 8(SP) | |||
stfd f16, 16(SP) | |||
stfd f17, 24(SP) | |||
stfd f18, 32(SP) | |||
stfd f19, 40(SP) | |||
stfd f20, 48(SP) | |||
stfd f21, 56(SP) | |||
stfd f22, 64(SP) | |||
stfd f23, 72(SP) | |||
stfd f24, 80(SP) | |||
stfd f25, 88(SP) | |||
stfd f26, 96(SP) | |||
stfd f27, 104(SP) | |||
stfd f28, 112(SP) | |||
stfd f29, 120(SP) | |||
stfd f30, 128(SP) | |||
stfd f31, 136(SP) | |||
#ifdef __64BIT__ | |||
std r31, 144(SP) | |||
std r30, 152(SP) | |||
std r29, 160(SP) | |||
std r28, 168(SP) | |||
std r27, 176(SP) | |||
std r26, 184(SP) | |||
std r25, 192(SP) | |||
std r24, 200(SP) | |||
std r23, 208(SP) | |||
std r22, 216(SP) | |||
std r21, 224(SP) | |||
std r20, 232(SP) | |||
std r19, 240(SP) | |||
std r18, 248(SP) | |||
std r17, 256(SP) | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
stw r29, 152(SP) | |||
stw r28, 156(SP) | |||
stw r27, 160(SP) | |||
stw r26, 164(SP) | |||
stw r25, 168(SP) | |||
stw r24, 172(SP) | |||
stw r23, 176(SP) | |||
stw r22, 180(SP) | |||
stw r21, 184(SP) | |||
stw r20, 188(SP) | |||
stw r19, 192(SP) | |||
stw r18, 196(SP) | |||
stw r17, 200(SP) | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
#endif | |||
// stfd f1, ALPHA_SP | |||
// stw r0, FZERO | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#if !defined(__64BIT__) && defined(DOUBLE) | |||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#endif | |||
slwi LDC, LDC, 2 | |||
#if defined(TRMMKERNEL) | |||
#if defined(linux) && defined(__64BIT__) | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#ifdef __64BIT__ | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#else | |||
#ifdef DOUBLE | |||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
#else | |||
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#endif | |||
#endif | |||
#endif | |||
cmpwi cr0, M, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, N, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, K, 0 | |||
ble .L999_H1 | |||
li PRE, 384 | |||
li o4 , 4 | |||
li o8 , 8 | |||
li o12, 12 | |||
li o16, 16 | |||
li o32, 32 | |||
li o48, 48 | |||
addi T1, SP, 300 | |||
stfs f1, 0(T1) | |||
stfs f1, 4(T1) | |||
stfs f1, 8(T1) | |||
stfs f1,12(T1) | |||
lxsspx vs28, 0, T1 | |||
xxspltw alpha_r, vs28 , 0 | |||
lxvw4x alpha_vr, 0, T1 | |||
#include "sgemm_logic_16x8_power8.S" | |||
.L999: | |||
addi r3, 0, 0 | |||
lfd f14, 0(SP) | |||
lfd f15, 8(SP) | |||
lfd f16, 16(SP) | |||
lfd f17, 24(SP) | |||
lfd f18, 32(SP) | |||
lfd f19, 40(SP) | |||
lfd f20, 48(SP) | |||
lfd f21, 56(SP) | |||
lfd f22, 64(SP) | |||
lfd f23, 72(SP) | |||
lfd f24, 80(SP) | |||
lfd f25, 88(SP) | |||
lfd f26, 96(SP) | |||
lfd f27, 104(SP) | |||
lfd f28, 112(SP) | |||
lfd f29, 120(SP) | |||
lfd f30, 128(SP) | |||
lfd f31, 136(SP) | |||
#ifdef __64BIT__ | |||
ld r31, 144(SP) | |||
ld r30, 152(SP) | |||
ld r29, 160(SP) | |||
ld r28, 168(SP) | |||
ld r27, 176(SP) | |||
ld r26, 184(SP) | |||
ld r25, 192(SP) | |||
ld r24, 200(SP) | |||
ld r23, 208(SP) | |||
ld r22, 216(SP) | |||
ld r21, 224(SP) | |||
ld r20, 232(SP) | |||
ld r19, 240(SP) | |||
ld r18, 248(SP) | |||
ld r17, 256(SP) | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
lwz r29, 152(SP) | |||
lwz r28, 156(SP) | |||
lwz r27, 160(SP) | |||
lwz r26, 164(SP) | |||
lwz r25, 168(SP) | |||
lwz r24, 172(SP) | |||
lwz r23, 176(SP) | |||
lwz r22, 180(SP) | |||
lwz r21, 184(SP) | |||
lwz r20, 188(SP) | |||
lwz r19, 192(SP) | |||
lwz r18, 196(SP) | |||
lwz r17, 200(SP) | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
#endif | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
#endif |
@@ -0,0 +1,364 @@ | |||
/*************************************************************************** | |||
Copyright (c) 2013-2016, The OpenBLAS Project | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the OpenBLAS project nor the names of | |||
its contributors may be used to endorse or promote products | |||
derived from this software without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
*****************************************************************************/ | |||
/************************************************************************************** | |||
* 2016/03/14 Werner Saar (wernsaar@googlemail.com) | |||
* BLASTEST : OK | |||
* CTEST : OK | |||
* TEST : OK | |||
**************************************************************************************/ | |||
/*********************************************************************/ | |||
/* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
/* All rights reserved. */ | |||
/* */ | |||
/* Redistribution and use in source and binary forms, with or */ | |||
/* without modification, are permitted provided that the following */ | |||
/* conditions are met: */ | |||
/* */ | |||
/* 1. Redistributions of source code must retain the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer. */ | |||
/* */ | |||
/* 2. Redistributions in binary form must reproduce the above */ | |||
/* copyright notice, this list of conditions and the following */ | |||
/* disclaimer in the documentation and/or other materials */ | |||
/* provided with the distribution. */ | |||
/* */ | |||
/* THIS SOFTWARE IS PROVIDED BY THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, */ | |||
/* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF */ | |||
/* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE */ | |||
/* DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OF TEXAS AT */ | |||
/* AUSTIN OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, */ | |||
/* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES */ | |||
/* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE */ | |||
/* GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR */ | |||
/* BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF */ | |||
/* LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT */ | |||
/* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT */ | |||
/* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE */ | |||
/* POSSIBILITY OF SUCH DAMAGE. */ | |||
/* */ | |||
/* The views and conclusions contained in the software and */ | |||
/* documentation are those of the authors and should not be */ | |||
/* interpreted as representing official policies, either expressed */ | |||
/* or implied, of The University of Texas at Austin. */ | |||
/*********************************************************************/ | |||
#define ASSEMBLER | |||
#include "common.h" | |||
#include "def_vsx.h" | |||
#ifndef __64BIT__ | |||
#define LOAD lwz | |||
#else | |||
#define LOAD ld | |||
#endif | |||
#ifdef __64BIT__ | |||
#define STACKSIZE 320 | |||
#define ALPHA_SP 296(SP) | |||
#define FZERO 304(SP) | |||
#else | |||
#define STACKSIZE 240 | |||
#define ALPHA_SP 224(SP) | |||
#define FZERO 232(SP) | |||
#endif | |||
#define M r3 | |||
#define N r4 | |||
#define K r5 | |||
#ifdef linux | |||
#ifndef __64BIT__ | |||
#define A r6 | |||
#define B r7 | |||
#define C r8 | |||
#define LDC r9 | |||
#define OFFSET r10 | |||
#else | |||
#define A r7 | |||
#define B r8 | |||
#define C r9 | |||
#define LDC r10 | |||
#define OFFSET r6 | |||
#endif | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#if !defined(__64BIT__) && defined(DOUBLE) | |||
#define A r8 | |||
#define B r9 | |||
#define C r10 | |||
#define LDC r7 | |||
#define OFFSET r6 | |||
#else | |||
#define A r7 | |||
#define B r8 | |||
#define C r9 | |||
#define LDC r10 | |||
#define OFFSET r6 | |||
#endif | |||
#endif | |||
#define alpha_r vs30 | |||
#define alpha_vr vs31 | |||
#define o0 0 | |||
#define o12 r14 | |||
#define o4 r15 | |||
#define K1 r16 | |||
#define o8 r17 | |||
#define L r18 | |||
#define T1 r19 | |||
#define KK r20 | |||
#define KKK 21 | |||
#define I r22 | |||
#define J r23 | |||
#define AO r24 | |||
#define BO r25 | |||
#define CO r26 | |||
#define o16 r27 | |||
#define o32 r28 | |||
#define o48 r29 | |||
#define PRE r30 | |||
#define T2 r31 | |||
#include "sgemm_macros_16x8_power8.S" | |||
#ifndef NEEDPARAM | |||
PROLOGUE | |||
PROFCODE | |||
addi SP, SP, -STACKSIZE | |||
li r0, 0 | |||
stfd f14, 0(SP) | |||
stfd f15, 8(SP) | |||
stfd f16, 16(SP) | |||
stfd f17, 24(SP) | |||
stfd f18, 32(SP) | |||
stfd f19, 40(SP) | |||
stfd f20, 48(SP) | |||
stfd f21, 56(SP) | |||
stfd f22, 64(SP) | |||
stfd f23, 72(SP) | |||
stfd f24, 80(SP) | |||
stfd f25, 88(SP) | |||
stfd f26, 96(SP) | |||
stfd f27, 104(SP) | |||
stfd f28, 112(SP) | |||
stfd f29, 120(SP) | |||
stfd f30, 128(SP) | |||
stfd f31, 136(SP) | |||
#ifdef __64BIT__ | |||
std r31, 144(SP) | |||
std r30, 152(SP) | |||
std r29, 160(SP) | |||
std r28, 168(SP) | |||
std r27, 176(SP) | |||
std r26, 184(SP) | |||
std r25, 192(SP) | |||
std r24, 200(SP) | |||
std r23, 208(SP) | |||
std r22, 216(SP) | |||
std r21, 224(SP) | |||
std r20, 232(SP) | |||
std r19, 240(SP) | |||
std r18, 248(SP) | |||
std r17, 256(SP) | |||
std r16, 264(SP) | |||
std r15, 272(SP) | |||
std r14, 280(SP) | |||
#else | |||
stw r31, 144(SP) | |||
stw r30, 148(SP) | |||
stw r29, 152(SP) | |||
stw r28, 156(SP) | |||
stw r27, 160(SP) | |||
stw r26, 164(SP) | |||
stw r25, 168(SP) | |||
stw r24, 172(SP) | |||
stw r23, 176(SP) | |||
stw r22, 180(SP) | |||
stw r21, 184(SP) | |||
stw r20, 188(SP) | |||
stw r19, 192(SP) | |||
stw r18, 196(SP) | |||
stw r17, 200(SP) | |||
stw r16, 204(SP) | |||
stw r15, 208(SP) | |||
stw r14, 212(SP) | |||
#endif | |||
// stfd f1, ALPHA_SP | |||
// stw r0, FZERO | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#if !defined(__64BIT__) && defined(DOUBLE) | |||
lwz LDC, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#endif | |||
slwi LDC, LDC, BASE_SHIFT | |||
#if defined(TRMMKERNEL) | |||
#if defined(linux) && defined(__64BIT__) | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#if defined(_AIX) || defined(__APPLE__) | |||
#ifdef __64BIT__ | |||
ld OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#else | |||
#ifdef DOUBLE | |||
lwz OFFSET, FRAMESLOT(1) + STACKSIZE(SP) | |||
#else | |||
lwz OFFSET, FRAMESLOT(0) + STACKSIZE(SP) | |||
#endif | |||
#endif | |||
#endif | |||
#endif | |||
mr KK, OFFSET | |||
#if defined(TRMMKERNEL) && !defined(LEFT) | |||
neg KK, KK | |||
#endif | |||
cmpwi cr0, M, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, N, 0 | |||
ble .L999_H1 | |||
cmpwi cr0, K, 0 | |||
ble .L999_H1 | |||
li PRE, 256 | |||
li o4 , 4 | |||
li o8 , 8 | |||
li o12, 12 | |||
li o16, 16 | |||
li o32, 32 | |||
li o48, 48 | |||
addi T1, SP, 300 | |||
stfs f1, 0(T1) | |||
stfs f1, 4(T1) | |||
stfs f1, 8(T1) | |||
stfs f1,12(T1) | |||
lxsspx vs28, 0, T1 | |||
xxspltw alpha_r, vs28 , 0 | |||
lxvw4x alpha_vr, 0, T1 | |||
#include "strmm_logic_16x8_power8.S" | |||
.L999: | |||
addi r3, 0, 0 | |||
lfd f14, 0(SP) | |||
lfd f15, 8(SP) | |||
lfd f16, 16(SP) | |||
lfd f17, 24(SP) | |||
lfd f18, 32(SP) | |||
lfd f19, 40(SP) | |||
lfd f20, 48(SP) | |||
lfd f21, 56(SP) | |||
lfd f22, 64(SP) | |||
lfd f23, 72(SP) | |||
lfd f24, 80(SP) | |||
lfd f25, 88(SP) | |||
lfd f26, 96(SP) | |||
lfd f27, 104(SP) | |||
lfd f28, 112(SP) | |||
lfd f29, 120(SP) | |||
lfd f30, 128(SP) | |||
lfd f31, 136(SP) | |||
#ifdef __64BIT__ | |||
ld r31, 144(SP) | |||
ld r30, 152(SP) | |||
ld r29, 160(SP) | |||
ld r28, 168(SP) | |||
ld r27, 176(SP) | |||
ld r26, 184(SP) | |||
ld r25, 192(SP) | |||
ld r24, 200(SP) | |||
ld r23, 208(SP) | |||
ld r22, 216(SP) | |||
ld r21, 224(SP) | |||
ld r20, 232(SP) | |||
ld r19, 240(SP) | |||
ld r18, 248(SP) | |||
ld r17, 256(SP) | |||
ld r16, 264(SP) | |||
ld r15, 272(SP) | |||
ld r14, 280(SP) | |||
#else | |||
lwz r31, 144(SP) | |||
lwz r30, 148(SP) | |||
lwz r29, 152(SP) | |||
lwz r28, 156(SP) | |||
lwz r27, 160(SP) | |||
lwz r26, 164(SP) | |||
lwz r25, 168(SP) | |||
lwz r24, 172(SP) | |||
lwz r23, 176(SP) | |||
lwz r22, 180(SP) | |||
lwz r21, 184(SP) | |||
lwz r20, 188(SP) | |||
lwz r19, 192(SP) | |||
lwz r18, 196(SP) | |||
lwz r17, 200(SP) | |||
lwz r16, 204(SP) | |||
lwz r15, 208(SP) | |||
lwz r14, 212(SP) | |||
#endif | |||
addi SP, SP, STACKSIZE | |||
blr | |||
EPILOGUE | |||
#endif |
@@ -1961,15 +1961,15 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#if defined(POWER8) | |||
#define SNUMOPT 4 | |||
#define SNUMOPT 16 | |||
#define DNUMOPT 8 | |||
#define GEMM_DEFAULT_OFFSET_A 384 | |||
#define GEMM_DEFAULT_OFFSET_B 1024 | |||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | |||
#define SGEMM_DEFAULT_UNROLL_M 4 | |||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||
#define SGEMM_DEFAULT_UNROLL_M 16 | |||
#define SGEMM_DEFAULT_UNROLL_N 8 | |||
#define DGEMM_DEFAULT_UNROLL_M 16 | |||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||
#define CGEMM_DEFAULT_UNROLL_M 2 | |||
@@ -1977,12 +1977,12 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#define ZGEMM_DEFAULT_UNROLL_M 8 | |||
#define ZGEMM_DEFAULT_UNROLL_N 2 | |||
#define SGEMM_DEFAULT_P 992 | |||
#define SGEMM_DEFAULT_P 960 | |||
#define DGEMM_DEFAULT_P 480 | |||
#define CGEMM_DEFAULT_P 488 | |||
#define ZGEMM_DEFAULT_P 240 | |||
#define SGEMM_DEFAULT_Q 504 | |||
#define SGEMM_DEFAULT_Q 720 | |||
#define DGEMM_DEFAULT_Q 720 | |||
#define CGEMM_DEFAULT_Q 400 | |||
#define ZGEMM_DEFAULT_Q 360 | |||