/*************************************************************************** Copyright (c) 2013-2019, The OpenBLAS Project All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the OpenBLAS project nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *****************************************************************************/ #define unit_size 16 #define DISP32(ind,disp) (ind*unit_size*32+disp) #define DISP16(ind,disp) (ind*unit_size*16+disp) #define DISP8(ind,disp) (ind*unit_size*8+disp) #define DISP4(ind,disp) (ind*unit_size*4+disp) #define DISP2(ind,disp) (ind*unit_size*2+disp) #define DISP1(ind,disp) (ind*unit_size+disp) #define DISPX(disp) (disp) /* HELPERS FOR SAVE */ /* {r0,i0} and {r1,i1} into {r0,r1} {i0,i1} */ .macro LOAD_COUPLE_AS_RR_II VS_OUT1,VS_OUT2,VS_TEMP1,VS_TEMP2,REG,LOFFSET #ifndef TRMMKERNEL lxv \VS_TEMP1, DISPX(\LOFFSET)(\REG) lxv \VS_TEMP2, DISPX(\LOFFSET+16)(\REG) xxmrgld \VS_OUT1,\VS_TEMP1,\VS_TEMP2 xxmrghd \VS_OUT2,\VS_TEMP1,\VS_TEMP2 #endif .endm /*from 2 result {a0r*br,a0i*bi} and {a1r*br,a1i*bi} pack into {a0r*br,a1r*br} and {a0i*bi,a1i*bi}*/ .macro RESULT_INTO_REALREAL_IMAGEIMAGE VSIN1,VSIN2,VSOUT1,VSOUT2 xxmrgld \VSOUT1, \VSIN1,\VSIN2 /* real*real from 2 results*/ xxmrghd \VSOUT2, \VSIN1,\VSIN2 /* imag*imag from 2 results*/ .endm /*from 2 result {a0r*bi,a0i*br} and {a1r*bi,a1i*br} pack into {a0r*bi,a1r*bi} and {a0i*br,a1i*br}*/ .macro RESULT_INTO_REALIMAG_IMAGREAL VSIN1,VSIN2,VSOUT1,VSOUT2 xxmrgld \VSOUT1, \VSIN1,\VSIN2 /* real*imag */ xxmrghd \VSOUT2, \VSIN1,\VSIN2 /* imag*real*/ .endm /* {a0r*br op a0i*bi ,a1r*br op a1i*bi} ~ {r0,r1}; {a0r*bi op a0i*br ,a1r*bi op a1i*br} ~ {i0,i1}*/ .macro AGGREGATE_REALS_IMAGES VSINR_OUT1,VSINR,VSINI_OUT2,VSINI #if defined(NN) || defined(NT) || defined(TN) || defined(TT) xvsubdp \VSINR_OUT1,\VSINR_OUT1,\VSINR xvadddp \VSINI_OUT2,\VSINI_OUT2,\VSINI #elif defined(CN) || defined(CT) || defined(RN) || defined(RT) xvadddp \VSINR_OUT1,\VSINR_OUT1,\VSINR xvsubdp \VSINI_OUT2,\VSINI_OUT2,\VSINI #elif defined(NC) || defined(TC) || defined(NR) || defined(TR) xvadddp \VSINR_OUT1,\VSINR_OUT1,\VSINR xvsubdp \VSINI_OUT2,\VSINI,\VSINI_OUT2 #else // CC || CR || RC || RR /*we will assume {-alpha_r,-alpha_i} for this case */ /*i1i2-r1r2 so we will negate alpha real instead to fix sign*/ xvsubdp \VSINR_OUT1,\VSINR,\VSINR_OUT1 /*we will negate alpha image instead instead to fix sign*/ xvadddp \VSINI_OUT2,\VSINI_OUT2,\VSINI #endif .endm /* {i0,i1} * {alpha_i,alpha_i} - VSOUT1 ;VSOUT2 + {r0,r1}*{alpha_i,alpha_i} */ .macro MULT_APLHA_PART1 VSINRR,VSINII,VSOUT1,VSOUT2 #ifndef TRMMKERNEL xvmsubadp \VSOUT1,\VSINII, alpha_i xvmaddadp \VSOUT2,\VSINRR, alpha_i #else xvmuldp \VSOUT1,\VSINII, alpha_i xvmuldp \VSOUT2,\VSINRR, alpha_i #endif .endm /* {r0,r1} * {alpha_r,alpha_r} - VSOUT1 ;VSOUT2 + {i0,i1} * {alpha_r,alpha_r} */ .macro MULT_APLHA_PART2 VSINRR,VSINII,VSOUT1,VSOUT2 xvmsubadp \VSOUT1,\VSINRR, alpha_r xvmaddadp \VSOUT2,\VSINII, alpha_r .endm /* unpack to store 2{r,r} {i,i} into {r,i} {r,i} (big endian because of stxv) */ .macro UNPACK_FOR_STORE VSIN1,VSIN2,VSOUT1,VSOUT2 xxmrghd \VSOUT1,\VSIN2,\VSIN1 xxmrgld \VSOUT2,\VSIN2,\VSIN1 .endm .macro STORE_COUPLE REG,LOFFSET,VSIN1,VSIN2 stxv \VSIN1, DISPX(\LOFFSET)(\REG) stxv \VSIN2, DISPX(\LOFFSET+16)(\REG) .endm .macro SAVE8 VSRes1,VSRes2,VSRes3,VSRes4,VSRes5,VSRes6,VSRes7,VSRes8,VSRes9,VSRes10,VSRes11,VSRes12,VSRes13,VSRes14,VSRes15,VSRes16,BASE_REG,LOFFSET RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3 LOAD_COUPLE_AS_RR_II vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET RESULT_INTO_REALIMAG_IMAGREAL \VSRes2,\VSRes4,vs4,vs5 LOAD_COUPLE_AS_RR_II vs16,vs17,vs20,vs21,\BASE_REG,(\LOFFSET+32) RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes5,\VSRes7,vs6,vs7 LOAD_COUPLE_AS_RR_II vs24,vs25,vs18,vs19,\BASE_REG,(\LOFFSET +64) RESULT_INTO_REALIMAG_IMAGREAL \VSRes6,\VSRes8,vs8,vs9 LOAD_COUPLE_AS_RR_II vs26,vs27,vs20,vs21,\BASE_REG,(\LOFFSET+96) RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes9,\VSRes11,vs10,vs11 AGGREGATE_REALS_IMAGES vs2,vs3,vs4,vs5 RESULT_INTO_REALIMAG_IMAGREAL \VSRes10,\VSRes12,vs12,vs13 AGGREGATE_REALS_IMAGES vs6,vs7,vs8,vs9 RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes13,\VSRes15,\VSRes1,\VSRes2 MULT_APLHA_PART1 vs2,vs4, vs14,vs15 RESULT_INTO_REALIMAG_IMAGREAL \VSRes14,\VSRes16,\VSRes3,\VSRes4 MULT_APLHA_PART1 vs6,vs8,vs16,vs17 MULT_APLHA_PART2 vs2,vs4,vs14,vs15 AGGREGATE_REALS_IMAGES vs10,vs11,vs12,vs13 MULT_APLHA_PART2 vs6,vs8,vs16,vs17 AGGREGATE_REALS_IMAGES \VSRes1,\VSRes2,\VSRes3,\VSRes4 UNPACK_FOR_STORE vs14,vs15,vs7,vs9 MULT_APLHA_PART1 vs10,vs12, vs24,vs25 UNPACK_FOR_STORE vs16,vs17,vs3,vs5 MULT_APLHA_PART1 \VSRes1,\VSRes3, vs26,vs27 STORE_COUPLE \BASE_REG,\LOFFSET,vs7,vs9 MULT_APLHA_PART2 vs10,vs12,vs24,vs25 STORE_COUPLE \BASE_REG,(\LOFFSET+32),vs3,vs5 MULT_APLHA_PART2 \VSRes1,\VSRes3, vs26,vs27 UNPACK_FOR_STORE vs24,vs25,vs10,vs12 UNPACK_FOR_STORE vs26,vs27,\VSRes1,\VSRes3 STORE_COUPLE \BASE_REG,(\LOFFSET +64),vs10,vs12 STORE_COUPLE \BASE_REG,(\LOFFSET+96),\VSRes1,\VSRes3 .endm .macro SAVE4 VSRes1,VSRes2,VSRes3,VSRes4,VSRes5,VSRes6,VSRes7,VSRes8,BASE_REG,LOFFSET RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3 LOAD_COUPLE_AS_RR_II vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET RESULT_INTO_REALIMAG_IMAGREAL \VSRes2,\VSRes4,vs4,vs5 LOAD_COUPLE_AS_RR_II vs16,vs17,vs20,vs21,\BASE_REG,(\LOFFSET+32) RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes5,\VSRes7,vs6,vs7 RESULT_INTO_REALIMAG_IMAGREAL \VSRes6,\VSRes8,vs8,vs9 AGGREGATE_REALS_IMAGES vs2,vs3,vs4,vs5 AGGREGATE_REALS_IMAGES vs6,vs7,vs8,vs9 MULT_APLHA_PART1 vs2,vs4, vs14,vs15 MULT_APLHA_PART1 vs6,vs8, vs16,vs17 MULT_APLHA_PART2 vs2,vs4, vs14,vs15 MULT_APLHA_PART2 vs6,vs8,vs16,vs17 UNPACK_FOR_STORE vs14,vs15,vs7,vs9 UNPACK_FOR_STORE vs16,vs17,vs3,vs5 STORE_COUPLE \BASE_REG,\LOFFSET,vs7,vs9 STORE_COUPLE \BASE_REG,(\LOFFSET+32),vs3,vs5 .endm .macro SAVE2 VSRes1,VSRes2,VSRes3,VSRes4,BASE_REG,LOFFSET RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes3,vs2,vs3 LOAD_COUPLE_AS_RR_II vs14,vs15,vs18,vs19,\BASE_REG,\LOFFSET RESULT_INTO_REALIMAG_IMAGREAL \VSRes2,\VSRes4,vs4,vs5 AGGREGATE_REALS_IMAGES vs2,vs3,vs4,vs5 MULT_APLHA_PART1 vs2,vs4, vs14,vs15 MULT_APLHA_PART2 vs2,vs4, vs14,vs15 UNPACK_FOR_STORE vs14,vs15,vs7,vs9 STORE_COUPLE \BASE_REG,\LOFFSET,vs7,vs9 .endm .macro SAVE1 VSRes1,VSRes2,BASE_REG,LOFFSET RESULT_INTO_REALREAL_IMAGEIMAGE \VSRes1,\VSRes1,vs2,vs3 #ifndef TRMMKERNEL lxv vs18, (\LOFFSET)(\BASE_REG) xxmrgld vs14,vs18,vs18 xxmrghd vs15,vs18,vs18 #endif RESULT_INTO_REALIMAG_IMAGREAL \VSRes2,\VSRes2,vs4,vs5 AGGREGATE_REALS_IMAGES vs2,vs3,vs4,vs5 MULT_APLHA_PART1 vs2,vs4, vs14,vs15 MULT_APLHA_PART2 vs2,vs4, vs14,vs15 UNPACK_FOR_STORE vs14,vs15,vs7,vs9 xxmrghd vs7,vs15,vs14 stxv vs7, (\LOFFSET)(\BASE_REG) .endm /********************************************************************************************** * .macros for N=2 and M=8 **********************************************************************************************/ .macro Zero2x8 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 xxlxor vs36, vs36, vs36 xxlxor vs37, vs37, vs37 xxlxor vs38, vs38, vs38 xxlxor vs39, vs39, vs39 xxlxor vs40, vs40, vs40 xxlxor vs41, vs41, vs41 xxlxor vs42, vs42, vs42 xxlxor vs43, vs43, vs43 xxlxor vs44, vs44, vs44 xxlxor vs45, vs45, vs45 xxlxor vs46, vs46, vs46 xxlxor vs47, vs47, vs47 xxlxor vs48, vs48, vs48 xxlxor vs49, vs49, vs49 xxlxor vs50, vs50, vs50 xxlxor vs51, vs51, vs51 xxlxor vs52, vs52, vs52 xxlxor vs53, vs53, vs53 xxlxor vs54, vs54, vs54 xxlxor vs55, vs55, vs55 xxlxor vs56, vs56, vs56 xxlxor vs57, vs57, vs57 xxlxor vs58, vs58, vs58 xxlxor vs59, vs59, vs59 xxlxor vs60, vs60, vs60 xxlxor vs61, vs61, vs61 xxlxor vs62, vs62, vs62 xxlxor vs63, vs63, vs63 .endm .macro LOAD2x8 LOAD2x8O 0,0 .endm .macro LOAD2x8O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs4, (64+\OffsetA)(AO) // load real,imag from A lxv vs5, (80+\OffsetA)(AO) // load real,imag from A lxv vs6, (96+\OffsetA)(AO) // load real,imag from A lxv vs7, (112+\OffsetA)(AO) // load real,imag from A .endm .macro END2x8_NORMAL END2x8 AO,BO,128,32 .endm .macro END2x8_WITHOUT_ADD END2x8 AO,BO,0,0 .endm .macro END2x8 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs48, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs49, vs0, vs19 xvmaddadp vs34, vs1, vs16 xvmaddadp vs50, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs51, vs1, vs19 xvmaddadp vs36, vs2, vs16 xvmaddadp vs52, vs2, vs18 xvmaddadp vs37, vs2, vs17 xvmaddadp vs53, vs2, vs19 xvmaddadp vs38, vs3, vs16 xvmaddadp vs54, vs3, vs18 xvmaddadp vs39, vs3, vs17 xvmaddadp vs55, vs3, vs19 xvmaddadp vs40, vs4, vs16 xvmaddadp vs56, vs4, vs18 xvmaddadp vs41, vs4, vs17 xvmaddadp vs57, vs4, vs19 xvmaddadp vs42, vs5, vs16 xvmaddadp vs58, vs5, vs18 xvmaddadp vs43, vs5, vs17 xvmaddadp vs59, vs5, vs19 xvmaddadp vs44, vs6, vs16 xvmaddadp vs60, vs6, vs18 xvmaddadp vs45, vs6, vs17 xvmaddadp vs61, vs6, vs19 xvmaddadp vs46, vs7, vs16 xvmaddadp vs62, vs7, vs18 xvmaddadp vs47, vs7, vs17 xvmaddadp vs63, vs7, vs19 .endm .macro LOAD2x8_2 LOAD2x8_2O 0,0 .endm .macro LOAD2x8_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B lxv vs20, (\OffsetB+32)(BO) // load real,imag from B lxv vs22, (\OffsetB+48)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs4, (64+\OffsetA)(AO) // load real,imag from A lxv vs5, (80+\OffsetA)(AO) // load real,imag from A lxv vs6, (96+\OffsetA)(AO) // load real,imag from A lxv vs7, (112+\OffsetA)(AO) // load real,imag from A lxv vs8, (128+0+\OffsetA)(AO) // load real,imag from A lxv vs9, (128+16+\OffsetA)(AO) // load real,imag from A lxv vs10, (128+32+\OffsetA)(AO) // load real,imag from A lxv vs11, (128+48+\OffsetA)(AO) // load real,imag from A lxv vs12, (128+64+\OffsetA)(AO) // load real,imag from A lxv vs13, (128+80+\OffsetA)(AO) // load real,imag from A lxv vs14, (128+96+\OffsetA)(AO) // load real,imag from A lxv vs15, (128+112+\OffsetA)(AO) // load real,imag from A .endm .macro END2x8_2 /*for load2 offset will be 256 and 64*/ KERNEL2x8_2 AO,BO, 256,64,0 ,1,1 .endm .macro KERNEL2x8_E2 OffsetA,OffsetB, Index,IsLast KERNEL2x8_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL2x8_L2 OffsetA,OffsetB, Index,IsLast KERNEL2x8_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL2x8_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs48, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs49, vs0, vs19 xxswapd vs21, vs20 xxswapd vs23, vs22 xvmaddadp vs34, vs1, vs16 xvmaddadp vs50, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs51, vs1, vs19 .if \Complete==0 lxv vs0, DISP16(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP16(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs2, vs16 xvmaddadp vs52, vs2, vs18 xvmaddadp vs37, vs2, vs17 xvmaddadp vs53, vs2, vs19 xvmaddadp vs38, vs3, vs16 xvmaddadp vs54, vs3, vs18 xvmaddadp vs39, vs3, vs17 xvmaddadp vs55, vs3, vs19 .if \Complete==0 lxv vs2, DISP16(\Index,32 + \OffsetA)(\AREG) // load real,imag from A lxv vs3, DISP16(\Index,48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs40, vs4, vs16 xvmaddadp vs56, vs4, vs18 xvmaddadp vs41, vs4, vs17 xvmaddadp vs57, vs4, vs19 xvmaddadp vs42, vs5, vs16 xvmaddadp vs58, vs5, vs18 xvmaddadp vs43, vs5, vs17 xvmaddadp vs59, vs5, vs19 .if \Complete==0 lxv vs4, DISP16(\Index,64+ \OffsetA)(\AREG) // load real,imag from A lxv vs5, DISP16(\Index,64+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs44, vs6, vs16 xvmaddadp vs60, vs6, vs18 xvmaddadp vs45, vs6, vs17 xvmaddadp vs61, vs6, vs19 xvmaddadp vs46, vs7, vs16 xvmaddadp vs62, vs7, vs18 xvmaddadp vs47, vs7, vs17 xvmaddadp vs63, vs7, vs19 .if \Complete==0 lxv vs16, DISP4(\Index, 0+\OffsetB)(\BREG) // load real imag from B lxv vs18, DISP4(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs48, vs8, vs22 .if \Complete==0 lxv vs6, DISP16(\Index,64+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs7, DISP16(\Index,64+48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs33, vs8, vs21 xvmaddadp vs49, vs8, vs23 .if \Complete==0 xxswapd vs17, vs16 xxswapd vs19, vs18 .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs50, vs9, vs22 xvmaddadp vs35, vs9, vs21 xvmaddadp vs51, vs9, vs23 .if \Complete==0 lxv vs8, DISP16(\Index,128+ + \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP16(\Index,128+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs10, vs20 xvmaddadp vs52, vs10, vs22 xvmaddadp vs37, vs10, vs21 xvmaddadp vs53, vs10, vs23 xvmaddadp vs38, vs11, vs20 xvmaddadp vs54, vs11, vs22 xvmaddadp vs39, vs11, vs21 xvmaddadp vs55, vs11, vs23 .if \Complete==0 lxv vs10, DISP16(\Index,128+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs11, DISP16(\Index,128+48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs40, vs12, vs20 xvmaddadp vs56, vs12, vs22 xvmaddadp vs41, vs12, vs21 xvmaddadp vs57, vs12, vs23 xvmaddadp vs42, vs13, vs20 xvmaddadp vs58, vs13, vs22 xvmaddadp vs43, vs13, vs21 xvmaddadp vs59, vs13, vs23 .if \Complete==0 lxv vs12, DISP16(\Index, 192 + \OffsetA)(\AREG) // load real,imag from A lxv vs13, DISP16(\Index,192 +16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs44, vs14, vs20 xvmaddadp vs60, vs14, vs22 xvmaddadp vs45, vs14, vs21 xvmaddadp vs61, vs14, vs23 xvmaddadp vs46, vs15, vs20 xvmaddadp vs62, vs15, vs22 xvmaddadp vs47, vs15, vs21 xvmaddadp vs63, vs15, vs23 .if \Complete==0 lxv vs14, DISP16(\Index,192 +32 + \OffsetA)(\AREG) // load real,imag from A lxv vs15, DISP16(\Index,192 +48 + \OffsetA)(\AREG) // load real,imag from A lxv vs20, DISP4(\Index, 32+\OffsetB)(\BREG) // load real,imag from B lxv vs22, DISP4(\Index, 48+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP16(\Index,\OffsetA) addi \BREG, \BREG, DISP4(\Index,\OffsetB) .else addi \AREG, \AREG, DISP16(\Index,256) addi \BREG, \BREG, DISP4(\Index,64) .endif .endif .endm .macro KERNEL2x8 LOAD2x8 END2x8 AO, BO, 128,32 .endm .macro SAVE2x8 add T1, CO ,LDC SAVE8 vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,CO,0 SAVE8 vs48,vs49,vs50,vs51,vs52,vs53,vs54,vs55,vs56,vs57,vs58,vs59,vs60,vs61,vs62,vs63,T1,0 addi CO, CO, 128 .endm /********************************************************************************************** * .macros for N=2 and M=4 **********************************************************************************************/ .macro Zero2x4 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 xxlxor vs36, vs36, vs36 xxlxor vs37, vs37, vs37 xxlxor vs38, vs38, vs38 xxlxor vs39, vs39, vs39 xxlxor vs40, vs40, vs40 xxlxor vs41, vs41, vs41 xxlxor vs42, vs42, vs42 xxlxor vs43, vs43, vs43 xxlxor vs44, vs44, vs44 xxlxor vs45, vs45, vs45 xxlxor vs46, vs46, vs46 xxlxor vs47, vs47, vs47 .endm .macro LOAD2x4 LOAD2x4O 0,0 .endm .macro LOAD2x4O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A .endm .macro END2x4_NORMAL END2x4 AO,BO,64,32 .endm .macro END2x4_WITHOUT_ADD END2x4 AO,BO,0,0 .endm .macro END2x4 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs40, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs41, vs0, vs19 xvmaddadp vs34, vs1, vs16 xvmaddadp vs42, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs43, vs1, vs19 xvmaddadp vs36, vs2, vs16 xvmaddadp vs44, vs2, vs18 xvmaddadp vs37, vs2, vs17 xvmaddadp vs45, vs2, vs19 xvmaddadp vs38, vs3, vs16 xvmaddadp vs46, vs3, vs18 xvmaddadp vs39, vs3, vs17 xvmaddadp vs47, vs3, vs19 .endm .macro LOAD2x4_2 LOAD2x4_2O 0,0 .endm .macro LOAD2x4_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B lxv vs20, (\OffsetB+32)(BO) // load real,imag from B lxv vs22, (\OffsetB+48)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs8, (64+\OffsetA)(AO) // load real,imag from A lxv vs9, (80+\OffsetA)(AO) // load real,imag from A lxv vs10, (96+\OffsetA)(AO) // load real,imag from A lxv vs11, (112+\OffsetA)(AO) // load real,imag from A .endm .macro END2x4_2 /*for load2 offset will be 128 and 64*/ KERNEL2x4_2 AO,BO, 128,64,0 ,1,1 .endm .macro KERNEL2x4_E2 OffsetA,OffsetB, Index,IsLast KERNEL2x4_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL2x4_L2 OffsetA,OffsetB, Index,IsLast KERNEL2x4_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL2x4_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs40, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs41, vs0, vs19 xxswapd vs21, vs20 xxswapd vs23, vs22 xvmaddadp vs34, vs1, vs16 xvmaddadp vs42, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs43, vs1, vs19 .if \Complete==0 lxv vs0, DISP8(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP8(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs2, vs16 xvmaddadp vs44, vs2, vs18 xvmaddadp vs37, vs2, vs17 xvmaddadp vs45, vs2, vs19 xvmaddadp vs38, vs3, vs16 xvmaddadp vs46, vs3, vs18 xvmaddadp vs39, vs3, vs17 xvmaddadp vs47, vs3, vs19 .if \Complete==0 lxv vs2, DISP8(\Index,32 + \OffsetA)(\AREG) // load real,imag from A lxv vs3, DISP8(\Index,48 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP4(\Index, 0+\OffsetB)(\BREG) // load real imag from B lxv vs18, DISP4(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs40, vs8, vs22 xvmaddadp vs33, vs8, vs21 xvmaddadp vs41, vs8, vs23 .if \Complete==0 xxswapd vs17, vs16 xxswapd vs19, vs18 .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs42, vs9, vs22 xvmaddadp vs35, vs9, vs21 xvmaddadp vs43, vs9, vs23 .if \Complete==0 lxv vs8, DISP8(\Index,64+0+ \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP8(\Index,64+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs10, vs20 xvmaddadp vs44, vs10, vs22 xvmaddadp vs37, vs10, vs21 xvmaddadp vs45, vs10, vs23 xvmaddadp vs38, vs11, vs20 xvmaddadp vs46, vs11, vs22 xvmaddadp vs39, vs11, vs21 xvmaddadp vs47, vs11, vs23 .if \Complete==0 lxv vs10, DISP8(\Index,64+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs11, DISP8(\Index,64+48 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs20, DISP4(\Index, 32+\OffsetB)(\BREG) // load real,imag from B lxv vs22, DISP4(\Index, 48+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP8(\Index,\OffsetA) addi \BREG, \BREG, DISP4(\Index,\OffsetB) .else addi \AREG, \AREG, DISP8(\Index,128) addi \BREG, \BREG, DISP4(\Index,64) .endif .endif .endm .macro KERNEL2x4 LOAD2x4 END2x4 AO, BO, 64,32 .endm .macro SAVE2x4 add T1, CO ,LDC SAVE4 vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,CO,0 SAVE4 vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,T1,0 addi CO, CO, 64 .endm /********************************************************************************************** * .macros for N=2 and M=2 **********************************************************************************************/ .macro Zero2x2 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 xxlxor vs36, vs36, vs36 xxlxor vs37, vs37, vs37 xxlxor vs38, vs38, vs38 xxlxor vs39, vs39, vs39 .endm .macro LOAD2x2 LOAD2x2O 0,0 .endm .macro LOAD2x2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A .endm .macro END2x2_NORMAL END2x2 AO,BO,32,32 .endm .macro END2x2_WITHOUT_ADD END2x2 AO,BO,0,0 .endm .macro END2x2 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs36, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs37, vs0, vs19 xvmaddadp vs34, vs1, vs16 xvmaddadp vs38, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs39, vs1, vs19 .endm .macro LOAD2x2_2 LOAD2x2_2O 0,0 .endm .macro LOAD2x2_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B lxv vs20, (\OffsetB+32)(BO) // load real,imag from B lxv vs22, (\OffsetB+48)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs8, (32+\OffsetA)(AO) // load real,imag from A lxv vs9, (48+\OffsetA)(AO) // load real,imag from A .endm .macro END2x2_2 /*for load2 offset will be 64 and 64*/ KERNEL2x2_2 AO,BO, 64,64,0 ,1,1 .endm .macro KERNEL2x2_E2 OffsetA,OffsetB, Index,IsLast KERNEL2x2_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL2x2_L2 OffsetA,OffsetB, Index,IsLast KERNEL2x2_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL2x2_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs36, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs37, vs0, vs19 xxswapd vs21, vs20 xxswapd vs23, vs22 xvmaddadp vs34, vs1, vs16 xvmaddadp vs38, vs1, vs18 xvmaddadp vs35, vs1, vs17 xvmaddadp vs39, vs1, vs19 .if \Complete==0 lxv vs0, DISP4(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP4(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP4(\Index, 0+\OffsetB)(\BREG) // load real imag from B lxv vs18, DISP4(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs36, vs8, vs22 xvmaddadp vs33, vs8, vs21 xvmaddadp vs37, vs8, vs23 .if \Complete==0 xxswapd vs17, vs16 xxswapd vs19, vs18 .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs38, vs9, vs22 xvmaddadp vs35, vs9, vs21 xvmaddadp vs39, vs9, vs23 .if \Complete==0 lxv vs20, DISP4(\Index, 32+\OffsetB)(\BREG) // load real,imag from B lxv vs22, DISP4(\Index, 48+\OffsetB)(\BREG) // load real,imag from B .endif .if \Complete==0 lxv vs8, DISP4(\Index,32+0+ \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP4(\Index,32+16 + \OffsetA)(\AREG) // load real,imag from A .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP4(\Index,\OffsetA) addi \BREG, \BREG, DISP4(\Index,\OffsetB) .else addi \AREG, \AREG, DISP4(\Index,64) addi \BREG, \BREG, DISP4(\Index,64) .endif .endif .endm .macro KERNEL2x2 LOAD2x2 END2x2 AO, BO, 32,32 .endm .macro SAVE2x2 add T1, CO ,LDC SAVE2 vs32,vs33,vs34,vs35,CO,0 SAVE2 vs36,vs37,vs38,vs39,T1,0 addi CO, CO, 32 .endm /********************************************************************************************** * .macros for N=2 and M=1 **********************************************************************************************/ .macro Zero2x1 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 .endm .macro LOAD2x1 LOAD2x1O 0,0 .endm .macro LOAD2x1O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A .endm .macro END2x1_NORMAL END2x1 AO,BO,16,32 .endm .macro END2x1_WITHOUT_ADD END2x1 AO,BO,0,0 .endm .macro END2x1 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs34, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs35, vs0, vs19 .endm .macro LOAD2x1_2 LOAD2x1_2O 0,0 .endm .macro LOAD2x1_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs18, (\OffsetB+16)(BO) // load real,imag from B lxv vs20, (\OffsetB+32)(BO) // load real,imag from B lxv vs22, (\OffsetB+48)(BO) // load real,imag from B xxswapd vs17, vs16 xxswapd vs19, vs18 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs8, (16+\OffsetA)(AO) // load real,imag from A .endm .macro END2x1_2 /*for load2 offset will be 32 and 64*/ KERNEL2x1_2 AO,BO, 32,64,0 ,1,1 .endm .macro KERNEL2x1_E2 OffsetA,OffsetB, Index,IsLast KERNEL2x1_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL2x1_L2 OffsetA,OffsetB, Index,IsLast KERNEL2x1_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL2x1_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xxswapd vs21, vs20 xxswapd vs23, vs22 xvmaddadp vs32, vs0, vs16 xvmaddadp vs34, vs0, vs18 xvmaddadp vs33, vs0, vs17 xvmaddadp vs35, vs0, vs19 .if \Complete==0 lxv vs0, DISP2(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP4(\Index, 0+\OffsetB)(\BREG) // load real imag from B lxv vs18, DISP4(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif .if \Complete==0 xxswapd vs17, vs16 xxswapd vs19, vs18 .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs34, vs8, vs22 xvmaddadp vs33, vs8, vs21 xvmaddadp vs35, vs8, vs23 .if \Complete==0 lxv vs8, DISP2(\Index,16+0+ \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs20, DISP4(\Index, 32+\OffsetB)(\BREG) // load real,imag from B lxv vs22, DISP4(\Index, 48+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP2(\Index,\OffsetA) addi \BREG, \BREG, DISP4(\Index,\OffsetB) .else addi \AREG, \AREG, DISP2(\Index,32) addi \BREG, \BREG, DISP4(\Index,64) .endif .endif .endm .macro KERNEL2x1 LOAD2x1 END2x1 AO, BO, 16,32 .endm .macro SAVE2x1 add T1, CO ,LDC SAVE1 vs32,vs33,CO,0 SAVE1 vs34,vs35,T1,0 addi CO, CO, 16 .endm /********************************************************************************************** * .macros for N=1 and M=8 **********************************************************************************************/ .macro Zero1x8 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 xxlxor vs36, vs36, vs36 xxlxor vs37, vs37, vs37 xxlxor vs38, vs38, vs38 xxlxor vs39, vs39, vs39 xxlxor vs40, vs40, vs40 xxlxor vs41, vs41, vs41 xxlxor vs42, vs42, vs42 xxlxor vs43, vs43, vs43 xxlxor vs44, vs44, vs44 xxlxor vs45, vs45, vs45 xxlxor vs46, vs46, vs46 xxlxor vs47, vs47, vs47 xxlxor vs48, vs48, vs48 .endm .macro LOAD1x8 LOAD1x8O 0,0 .endm .macro LOAD1x8O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs4, (64+\OffsetA)(AO) // load real,imag from A lxv vs5, (80+\OffsetA)(AO) // load real,imag from A lxv vs6, (96+\OffsetA)(AO) // load real,imag from A lxv vs7, (112+\OffsetA)(AO) // load real,imag from A .endm .macro END1x8_NORMAL END1x8 AO,BO,128,16 .endm .macro END1x8_WITHOUT_ADD END1x8 AO,BO,0,0 .endm .macro END1x8 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 xvmaddadp vs36, vs2, vs16 xvmaddadp vs37, vs2, vs17 xvmaddadp vs38, vs3, vs16 xvmaddadp vs39, vs3, vs17 xvmaddadp vs40, vs4, vs16 xvmaddadp vs41, vs4, vs17 xvmaddadp vs42, vs5, vs16 xvmaddadp vs43, vs5, vs17 xvmaddadp vs44, vs6, vs16 xvmaddadp vs45, vs6, vs17 xvmaddadp vs46, vs7, vs16 xvmaddadp vs47, vs7, vs17 .endm .macro LOAD1x8_2 LOAD1x8_2O 0,0 .endm .macro LOAD1x8_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs20, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs4, (64+\OffsetA)(AO) // load real,imag from A lxv vs5, (80+\OffsetA)(AO) // load real,imag from A lxv vs6, (96+\OffsetA)(AO) // load real,imag from A lxv vs7, (112+\OffsetA)(AO) // load real,imag from A lxv vs8, (128+0+\OffsetA)(AO) // load real,imag from A lxv vs9, (128+16+\OffsetA)(AO) // load real,imag from A lxv vs10, (128+32+\OffsetA)(AO) // load real,imag from A lxv vs11, (128+48+\OffsetA)(AO) // load real,imag from A lxv vs12, (128+64+\OffsetA)(AO) // load real,imag from A lxv vs13, (128+80+\OffsetA)(AO) // load real,imag from A lxv vs14, (128+96+\OffsetA)(AO) // load real,imag from A lxv vs15, (128+112+\OffsetA)(AO) // load real,imag from A .endm .macro END1x8_2 /*for load2 offset will be 256 and 32*/ KERNEL1x8_2 AO,BO, 256,32,0 ,1,1 .endm .macro KERNEL1x8_E2 OffsetA,OffsetB, Index,IsLast KERNEL1x8_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL1x8_L2 OffsetA,OffsetB, Index,IsLast KERNEL1x8_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL1x8_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xxswapd vs21, vs20 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 .if \Complete==0 lxv vs0, DISP16(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP16(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs2, vs16 xvmaddadp vs37, vs2, vs17 xvmaddadp vs38, vs3, vs16 xvmaddadp vs39, vs3, vs17 .if \Complete==0 lxv vs2, DISP16(\Index,32 + \OffsetA)(\AREG) // load real,imag from A lxv vs3, DISP16(\Index,48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs40, vs4, vs16 xvmaddadp vs41, vs4, vs17 xvmaddadp vs42, vs5, vs16 xvmaddadp vs43, vs5, vs17 .if \Complete==0 lxv vs4, DISP16(\Index,64+ \OffsetA)(\AREG) // load real,imag from A lxv vs5, DISP16(\Index,64+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs44, vs6, vs16 xvmaddadp vs45, vs6, vs17 xvmaddadp vs46, vs7, vs16 xvmaddadp vs47, vs7, vs17 .if \Complete==0 lxv vs16, DISP2(\Index, 0+\OffsetB)(\BREG) // load real imag from B .endif .if \Complete==0 xxswapd vs17, vs16 .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs33, vs8, vs21 .if \Complete==0 lxv vs6, DISP16(\Index,64+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs7, DISP16(\Index,64+48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs35, vs9, vs21 .if \Complete==0 lxv vs8, DISP16(\Index,128+ + \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP16(\Index,128+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs10, vs20 xvmaddadp vs37, vs10, vs21 xvmaddadp vs38, vs11, vs20 xvmaddadp vs39, vs11, vs21 .if \Complete==0 lxv vs10, DISP16(\Index,128+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs11, DISP16(\Index,128+48 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs40, vs12, vs20 xvmaddadp vs41, vs12, vs21 xvmaddadp vs42, vs13, vs20 xvmaddadp vs43, vs13, vs21 .if \Complete==0 lxv vs12, DISP16(\Index, 192 + \OffsetA)(\AREG) // load real,imag from A lxv vs13, DISP16(\Index,192 +16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs44, vs14, vs20 xvmaddadp vs45, vs14, vs21 xvmaddadp vs46, vs15, vs20 xvmaddadp vs47, vs15, vs21 .if \Complete==0 lxv vs14, DISP16(\Index,192 +32 + \OffsetA)(\AREG) // load real,imag from A lxv vs15, DISP16(\Index,192 +48 + \OffsetA)(\AREG) // load real,imag from A lxv vs20, DISP2(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP16(\Index,\OffsetA) addi \BREG, \BREG, DISP2(\Index,\OffsetB) .else addi \AREG, \AREG, DISP16(\Index,256) addi \BREG, \BREG, DISP2(\Index,32) .endif .endif .endm .macro KERNEL1x8 LOAD1x8 END1x8 AO, BO, 128,16 .endm .macro SAVE1x8 SAVE8 vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,vs40,vs41,vs42,vs43,vs44,vs45,vs46,vs47,CO,0 addi CO, CO, 128 .endm /********************************************************************************************** * .macros for N=2 and M=4 **********************************************************************************************/ .macro Zero1x4 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 xxlxor vs36, vs36, vs36 xxlxor vs37, vs37, vs37 xxlxor vs38, vs38, vs38 xxlxor vs39, vs39, vs39 .endm .macro LOAD1x4 LOAD1x4O 0,0 .endm .macro LOAD1x4O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A .endm .macro END1x4_NORMAL END1x4 AO,BO,64,16 .endm .macro END1x4_WITHOUT_ADD END1x4 AO,BO,0,0 .endm .macro END1x4 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 xvmaddadp vs36, vs2, vs16 xvmaddadp vs37, vs2, vs17 xvmaddadp vs38, vs3, vs16 xvmaddadp vs39, vs3, vs17 .endm .macro LOAD1x4_2 LOAD1x4_2O 0,0 .endm .macro LOAD1x4_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs20, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs2, (32+\OffsetA)(AO) // load real,imag from A lxv vs3, (48+\OffsetA)(AO) // load real,imag from A lxv vs8, (64+\OffsetA)(AO) // load real,imag from A lxv vs9, (80+\OffsetA)(AO) // load real,imag from A lxv vs10, (96+\OffsetA)(AO) // load real,imag from A lxv vs11, (112+\OffsetA)(AO) // load real,imag from A .endm .macro END1x4_2 /*for load2 offset will be 128 and 32*/ KERNEL1x4_2 AO,BO, 128,32,0 ,1,1 .endm .macro KERNEL1x4_E2 OffsetA,OffsetB, Index,IsLast KERNEL1x4_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL1x4_L2 OffsetA,OffsetB, Index,IsLast KERNEL1x4_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL1x4_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xxswapd vs21, vs20 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 .if \Complete==0 lxv vs0, DISP8(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP8(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs2, vs16 xvmaddadp vs37, vs2, vs17 xvmaddadp vs38, vs3, vs16 xvmaddadp vs39, vs3, vs17 .if \Complete==0 lxv vs2, DISP8(\Index,32 + \OffsetA)(\AREG) // load real,imag from A lxv vs3, DISP8(\Index,48 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP2(\Index, 0+\OffsetB)(\BREG) // load real imag from B .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs33, vs8, vs21 .if \Complete==0 xxswapd vs17, vs16 .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs35, vs9, vs21 .if \Complete==0 lxv vs8, DISP8(\Index,64+0+ \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP8(\Index,64+16 + \OffsetA)(\AREG) // load real,imag from A .endif xvmaddadp vs36, vs10, vs20 xvmaddadp vs37, vs10, vs21 xvmaddadp vs38, vs11, vs20 xvmaddadp vs39, vs11, vs21 .if \Complete==0 lxv vs10, DISP8(\Index,64+32 + \OffsetA)(\AREG) // load real,imag from A lxv vs11, DISP8(\Index,64+48 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs20, DISP2(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP8(\Index,\OffsetA) addi \BREG, \BREG, DISP2(\Index,\OffsetB) .else addi \AREG, \AREG, DISP8(\Index,128) addi \BREG, \BREG, DISP2(\Index,32) .endif .endif .endm .macro KERNEL1x4 LOAD1x4 END1x4 AO, BO, 64,16 .endm .macro SAVE1x4 SAVE4 vs32,vs33,vs34,vs35,vs36,vs37,vs38,vs39,CO,0 addi CO, CO, 64 .endm /********************************************************************************************** * .macros for N=2 and M=2 **********************************************************************************************/ .macro Zero1x2 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 xxlxor vs34, vs34, vs34 xxlxor vs35, vs35, vs35 .endm .macro LOAD1x2 LOAD1x2O 0,0 .endm .macro LOAD1x2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A .endm .macro END1x2_NORMAL END1x2 AO,BO,32,16 .endm .macro END1x2_WITHOUT_ADD END1x2 AO,BO,0,0 .endm .macro END1x2 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 .endm .macro LOAD1x2_2 LOAD1x2_2O 0,0 .endm .macro LOAD1x2_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs20, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs1, (16+\OffsetA)(AO) // load real,imag from A lxv vs8, (32+\OffsetA)(AO) // load real,imag from A lxv vs9, (48+\OffsetA)(AO) // load real,imag from A .endm .macro END1x2_2 /*for load2 offset will be 64 and 32*/ KERNEL1x2_2 AO,BO, 64,32,0 ,1,1 .endm .macro KERNEL1x2_E2 OffsetA,OffsetB, Index,IsLast KERNEL1x2_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL1x2_L2 OffsetA,OffsetB, Index,IsLast KERNEL1x2_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL1x2_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 xxswapd vs21, vs20 xvmaddadp vs34, vs1, vs16 xvmaddadp vs35, vs1, vs17 .if \Complete==0 lxv vs0, DISP4(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A lxv vs1, DISP4(\Index,16 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP2(\Index, 0+\OffsetB)(\BREG) // load real imag from B .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs33, vs8, vs21 .if \Complete==0 xxswapd vs17, vs16 .endif xvmaddadp vs34, vs9, vs20 xvmaddadp vs35, vs9, vs21 .if \Complete==0 lxv vs20, DISP2(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif .if \Complete==0 lxv vs8, DISP4(\Index,32+0+ \OffsetA)(\AREG) // load real,imag from A lxv vs9, DISP4(\Index,32+16 + \OffsetA)(\AREG) // load real,imag from A .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP4(\Index,\OffsetA) addi \BREG, \BREG, DISP2(\Index,\OffsetB) .else addi \AREG, \AREG, DISP4(\Index,64) addi \BREG, \BREG, DISP2(\Index,32) .endif .endif .endm .macro KERNEL1x2 LOAD1x2 END1x2 AO, BO, 32,16 .endm .macro SAVE1x2 SAVE2 vs32,vs33,vs34,vs35,CO,0 addi CO, CO, 32 .endm /********************************************************************************************** * .macros for N=2 and M=1 **********************************************************************************************/ .macro Zero1x1 xxlxor vs32, vs32, vs32 xxlxor vs33, vs33, vs33 .endm .macro LOAD1x1 LOAD1x1O 0,0 .endm .macro LOAD1x1O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs0, (0+\OffsetA)(AO) // load real,imag from A xxswapd vs17, vs16 .endm .macro END1x1_NORMAL END1x1 AO,BO,16,16 .endm .macro END1x1_WITHOUT_ADD END1x1 AO,BO,0,0 .endm .macro END1x1 AREG, BREG, OffsetA, OffsetB .if \OffsetB != 0 addi \BREG, \BREG, \OffsetB .endif .if \OffsetA != 0 addi \AREG, \AREG, \OffsetA .endif xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 .endm .macro LOAD1x1_2 LOAD1x1_2O 0,0 .endm .macro LOAD1x1_2O OffsetA,OffsetB lxv vs16,(\OffsetB+ 0)(BO) // load real imag from B lxv vs20, (\OffsetB+16)(BO) // load real,imag from B xxswapd vs17, vs16 lxv vs0, (0+\OffsetA)(AO) // load real,imag from A lxv vs8, (16+\OffsetA)(AO) // load real,imag from A .endm .macro END1x1_2 /*for load2 offset will be 32 and 32*/ KERNEL1x1_2 AO,BO, 32,32,0 ,1,1 .endm .macro KERNEL1x1_E2 OffsetA,OffsetB, Index,IsLast KERNEL1x1_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,1 .endm .macro KERNEL1x1_L2 OffsetA,OffsetB, Index,IsLast KERNEL1x1_2 AO,BO, \OffsetA,\OffsetB, \Index,\IsLast ,0 .endm .macro KERNEL1x1_2 AREG,BREG, OffsetA,OffsetB, Index,IsLast ,Complete xxswapd vs21, vs20 xvmaddadp vs32, vs0, vs16 xvmaddadp vs33, vs0, vs17 .if \Complete==0 lxv vs0, DISP2(\Index, 0 + \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs16, DISP2(\Index, 0+\OffsetB)(\BREG) // load real imag from B .endif .if \Complete==0 xxswapd vs17, vs16 .endif xvmaddadp vs32, vs8, vs20 xvmaddadp vs33, vs8, vs21 .if \Complete==0 lxv vs8, DISP2(\Index,16+0+ \OffsetA)(\AREG) // load real,imag from A .endif .if \Complete==0 lxv vs20, DISP2(\Index, 16+\OffsetB)(\BREG) // load real,imag from B .endif .if \IsLast==1 .if \Complete==1 addi \AREG, \AREG, DISP2(\Index,\OffsetA) addi \BREG, \BREG, DISP2(\Index,\OffsetB) .else addi \AREG, \AREG, DISP2(\Index,32) addi \BREG, \BREG, DISP2(\Index,32) .endif .endif .endm .macro KERNEL1x1 LOAD1x1 END1x1 AO, BO, 16,16 .endm .macro SAVE1x1 SAVE1 vs32,vs33,CO,0 addi CO, CO, 16 .endm /****************************TRMM POINTER REFRESH .macroSES*************************/ .macro SHIFT_REG REG1,REG2,SHIFT_VAL .if \SHIFT_VAL==16 slwi \REG1, \REG2, 8 .elseif \SHIFT_VAL==8 slwi \REG1, \REG2, 7 .elseif \SHIFT_VAL==4 slwi \REG1, \REG2, 6 .elseif \SHIFT_VAL==2 slwi \REG1, \REG2, 5 .elseif \SHIFT_VAL==1 slwi \REG1, \REG2, 4 .endif .endm /* //#if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) // ptrbb = bb; // #else // ptrba += off*16; // ptrbb = bb + off*2; // #endif */ .macro REFRESH_POINTERS PTR_A,PTR_B,OFF_VAL,B_VAL,C_A,C_B #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) /* ptrbb = bb;*/ mr \PTR_B,\B_VAL /* refresh BPOINT */ #else /* // ptrba =ptrba+ off*C_A; // ptrbb = bb + off*C_B; */ SHIFT_REG T4,\OFF_VAL,\C_B /* Number of values in B shifted */ SHIFT_REG T2,\OFF_VAL,\C_A /* Number of values in A shifted */ add \PTR_B, \B_VAL , T4 /* Add values to BO */ add \PTR_A, \PTR_A, T2 /* Add values to AO */ #endif .endm /* // #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) // temp = bk-off; // #elif defined(LEFT) // temp = off+16; // number of values in A // #else // temp = off+2; // number of values in B // #endif */ .macro REFRESH_TEMP_BK TEMP_BK,BK_VAL,OFF_VAL,INCR_A,INCR_B #if (defined(LEFT) && !defined(TRANSA)) || (!defined(LEFT) && defined(TRANSA)) /* temp = bk-off;*/ sub \TEMP_BK,\BK_VAL,\OFF_VAL #elif defined(LEFT) /* temp = off+INCR_A; // number of values in A */ addi \TEMP_BK, \OFF_VAL, \INCR_A #else /* temp = off+INCR_B // number of values in B*/ addi \TEMP_BK,\OFF_VAL, \INCR_B #endif .endm /* // #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) // temp = bk - off; // #ifdef LEFT // temp -= 16; // number of values in A // #else // temp -= 2; // number of values in B // #endif // ptrba += temp*16; // ptrbb += temp*2; // #endif // #ifdef LEFT // off += 16; // number of values in A // #endif */ .macro REFRESH_AFTER_SAVE TEMP_BK,BK_VAL,OFF_VAL,PTR_B,PTR_A,C_A,C_B #if ( defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) /*temp = bk - off;*/ sub \TEMP_BK,\BK_VAL,\OFF_VAL #ifdef LEFT /*temp -= 8; // number of values in A*/ addi \TEMP_BK,\TEMP_BK,-\C_A #else /*temp -= 4; // number of values in B*/ addi \TEMP_BK,\TEMP_BK,-\C_B #endif /*ptrba += temp*C_A; ptrbb += temp*C_B;*/ SHIFT_REG T4,\TEMP_BK,\C_A SHIFT_REG T2,\TEMP_BK,\C_B add \PTR_A, \PTR_A,T4/*ptrba+temp*C_A*/ add \PTR_B, \PTR_B,T2 #endif #ifdef LEFT /*off += 8; // number of values in A*/ addi \OFF_VAL,\OFF_VAL,\C_A #endif .endm