Added license informations.tags/v0.2.16^2
| @@ -1,3 +1,38 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| @@ -218,11 +253,11 @@ | |||
| cmpwi cr0, M, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| cmpwi cr0, N, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| cmpwi cr0, K, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| #ifdef __64BIT__ | |||
| addi ALPHA, SP, 296 | |||
| @@ -241,7 +276,7 @@ | |||
| #include "dgemm_logic_16x4_power8.S" | |||
| L999: | |||
| .L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||
| @@ -1,3 +1,38 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /********************************************************************* | |||
| * Macros for N=4, M=16 * | |||
| *********************************************************************/ | |||
| @@ -1,3 +1,38 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| @@ -228,11 +263,11 @@ | |||
| #endif | |||
| cmpwi cr0, M, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| cmpwi cr0, N, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| cmpwi cr0, K, 0 | |||
| ble L999_H1 | |||
| ble .L999_H1 | |||
| #ifdef __64BIT__ | |||
| addi ALPHA, SP, 296 | |||
| @@ -251,7 +286,7 @@ | |||
| #include "dtrmm_logic_16x4_power8.S" | |||
| L999: | |||
| .L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||
| @@ -1,3 +1,38 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| @@ -233,11 +268,11 @@ | |||
| #include "zgemm_macros_8x2_power8.S" | |||
| cmpwi cr0, M, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| cmpwi cr0, N, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| cmpwi cr0, K, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| slwi LDC, LDC, ZBASE_SHIFT | |||
| li PRE, 256 | |||
| @@ -260,7 +295,7 @@ | |||
| #include "zgemm_logic_8x2_power8.S" | |||
| L999: | |||
| .L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||
| @@ -1,25 +1,25 @@ | |||
| srawi. J, N, 1 | |||
| ble ZGEMM_L2_END | |||
| ble .LZGEMM_L2_END | |||
| ZGEMM_L2_BEGIN: | |||
| .LZGEMM_L2_BEGIN: | |||
| mr CO, C | |||
| mr AO, A | |||
| slwi T1, LDC , 1 | |||
| add C, C, T1 | |||
| srawi. I, M, 3 | |||
| ble ZGEMM_L2x8_END | |||
| ble .LZGEMM_L2x8_END | |||
| ZGEMM_L2x8_BEGIN: | |||
| .LZGEMM_L2x8_BEGIN: | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L2x8_SUB0 | |||
| ble .LZGEMM_L2x8_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L2x8_SUB4 | |||
| ble .LZGEMM_L2x8_SUB4 | |||
| ZGEMM_L2x8_LOOP_START: | |||
| .LZGEMM_L2x8_LOOP_START: | |||
| dcbt AO, PRE | |||
| LOAD2x8_1 | |||
| @@ -42,11 +42,11 @@ ZGEMM_L2x8_LOOP_START: | |||
| KERNEL2x8_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L2x8_LOOP_END | |||
| ble .LZGEMM_L2x8_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L2x8_LOOP: | |||
| .LZGEMM_L2x8_LOOP: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_1 | |||
| @@ -67,9 +67,9 @@ ZGEMM_L2x8_LOOP: | |||
| KERNEL2x8_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x8_LOOP | |||
| bgt .LZGEMM_L2x8_LOOP | |||
| ZGEMM_L2x8_LOOP_END: | |||
| .LZGEMM_L2x8_LOOP_END: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_1 | |||
| @@ -88,9 +88,9 @@ ZGEMM_L2x8_LOOP_END: | |||
| KERNEL2x8_1 | |||
| KERNEL2x8_E2 | |||
| b ZGEMM_L2x8_SUB1 | |||
| b .LZGEMM_L2x8_SUB1 | |||
| ZGEMM_L2x8_SUB4: | |||
| .LZGEMM_L2x8_SUB4: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_SUBI1 | |||
| @@ -106,53 +106,53 @@ ZGEMM_L2x8_SUB4: | |||
| KERNEL2x8_SUB1 | |||
| KERNEL2x8_SUB1 | |||
| b ZGEMM_L2x8_SUB1 | |||
| b .LZGEMM_L2x8_SUB1 | |||
| ZGEMM_L2x8_SUB0: | |||
| .LZGEMM_L2x8_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL2x8_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L2x8_SAVE | |||
| b ZGEMM_L2x8_SUB2 | |||
| ble .LZGEMM_L2x8_SAVE | |||
| b .LZGEMM_L2x8_SUB2 | |||
| ZGEMM_L2x8_SUB1: | |||
| .LZGEMM_L2x8_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L2x8_SAVE | |||
| ble .LZGEMM_L2x8_SAVE | |||
| ZGEMM_L2x8_SUB2: | |||
| .LZGEMM_L2x8_SUB2: | |||
| KERNEL2x8_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x8_SUB2 | |||
| bgt .LZGEMM_L2x8_SUB2 | |||
| ZGEMM_L2x8_SAVE: | |||
| .LZGEMM_L2x8_SAVE: | |||
| SAVE2x8 | |||
| addic. I, I, -1 | |||
| bgt ZGEMM_L2x8_BEGIN | |||
| bgt .LZGEMM_L2x8_BEGIN | |||
| ZGEMM_L2x8_END: | |||
| .LZGEMM_L2x8_END: | |||
| ZGEMM_L2x4_BEGIN: | |||
| .LZGEMM_L2x4_BEGIN: | |||
| andi. T2, M, 7 | |||
| ble ZGEMM_L2x1_END | |||
| ble .LZGEMM_L2x1_END | |||
| andi. T1, M, 4 | |||
| ble ZGEMM_L2x4_END | |||
| ble .LZGEMM_L2x4_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L2x4_SUB0 | |||
| ble .LZGEMM_L2x4_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L2x4_SUB4 | |||
| ble .LZGEMM_L2x4_SUB4 | |||
| ZGEMM_L2x4_LOOP_START: | |||
| .LZGEMM_L2x4_LOOP_START: | |||
| LOAD2x4_1 | |||
| KERNEL2x4_I1 | |||
| @@ -166,11 +166,11 @@ ZGEMM_L2x4_LOOP_START: | |||
| KERNEL2x4_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L2x4_LOOP_END | |||
| ble .LZGEMM_L2x4_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L2x4_LOOP: | |||
| .LZGEMM_L2x4_LOOP: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_2 | |||
| @@ -183,9 +183,9 @@ ZGEMM_L2x4_LOOP: | |||
| KERNEL2x4_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x4_LOOP | |||
| bgt .LZGEMM_L2x4_LOOP | |||
| ZGEMM_L2x4_LOOP_END: | |||
| .LZGEMM_L2x4_LOOP_END: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_2 | |||
| @@ -197,9 +197,9 @@ ZGEMM_L2x4_LOOP_END: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_E2 | |||
| b ZGEMM_L2x4_SUB1 | |||
| b .LZGEMM_L2x4_SUB1 | |||
| ZGEMM_L2x4_SUB4: | |||
| .LZGEMM_L2x4_SUB4: | |||
| KERNEL2x4_SUBI1 | |||
| KERNEL2x4_SUB1 | |||
| @@ -211,48 +211,48 @@ ZGEMM_L2x4_SUB4: | |||
| KERNEL2x4_SUB1 | |||
| KERNEL2x4_SUB1 | |||
| b ZGEMM_L2x4_SUB1 | |||
| b .LZGEMM_L2x4_SUB1 | |||
| ZGEMM_L2x4_SUB0: | |||
| .LZGEMM_L2x4_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL2x4_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L2x4_SAVE | |||
| b ZGEMM_L2x4_SUB2 | |||
| ble .LZGEMM_L2x4_SAVE | |||
| b .LZGEMM_L2x4_SUB2 | |||
| ZGEMM_L2x4_SUB1: | |||
| .LZGEMM_L2x4_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L2x4_SAVE | |||
| ble .LZGEMM_L2x4_SAVE | |||
| ZGEMM_L2x4_SUB2: | |||
| .LZGEMM_L2x4_SUB2: | |||
| KERNEL2x4_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x4_SUB2 | |||
| bgt .LZGEMM_L2x4_SUB2 | |||
| ZGEMM_L2x4_SAVE: | |||
| .LZGEMM_L2x4_SAVE: | |||
| SAVE2x4 | |||
| ZGEMM_L2x4_END: | |||
| .LZGEMM_L2x4_END: | |||
| ZGEMM_L2x2_BEGIN: | |||
| .LZGEMM_L2x2_BEGIN: | |||
| andi. T1, M, 2 | |||
| ble ZGEMM_L2x2_END | |||
| ble .LZGEMM_L2x2_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L2x2_SUB0 | |||
| ble .LZGEMM_L2x2_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L2x2_SUB4 | |||
| ble .LZGEMM_L2x2_SUB4 | |||
| ZGEMM_L2x2_LOOP_START: | |||
| .LZGEMM_L2x2_LOOP_START: | |||
| LOAD2x2_1 | |||
| KERNEL2x2_I1 | |||
| @@ -266,11 +266,11 @@ ZGEMM_L2x2_LOOP_START: | |||
| KERNEL2x2_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L2x2_LOOP_END | |||
| ble .LZGEMM_L2x2_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L2x2_LOOP: | |||
| .LZGEMM_L2x2_LOOP: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_2 | |||
| @@ -283,9 +283,9 @@ ZGEMM_L2x2_LOOP: | |||
| KERNEL2x2_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x2_LOOP | |||
| bgt .LZGEMM_L2x2_LOOP | |||
| ZGEMM_L2x2_LOOP_END: | |||
| .LZGEMM_L2x2_LOOP_END: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_2 | |||
| @@ -297,9 +297,9 @@ ZGEMM_L2x2_LOOP_END: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_E2 | |||
| b ZGEMM_L2x2_SUB1 | |||
| b .LZGEMM_L2x2_SUB1 | |||
| ZGEMM_L2x2_SUB4: | |||
| .LZGEMM_L2x2_SUB4: | |||
| KERNEL2x2_SUBI1 | |||
| KERNEL2x2_SUB1 | |||
| @@ -311,48 +311,48 @@ ZGEMM_L2x2_SUB4: | |||
| KERNEL2x2_SUB1 | |||
| KERNEL2x2_SUB1 | |||
| b ZGEMM_L2x2_SUB1 | |||
| b .LZGEMM_L2x2_SUB1 | |||
| ZGEMM_L2x2_SUB0: | |||
| .LZGEMM_L2x2_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL2x2_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L2x2_SAVE | |||
| b ZGEMM_L2x2_SUB2 | |||
| ble .LZGEMM_L2x2_SAVE | |||
| b .LZGEMM_L2x2_SUB2 | |||
| ZGEMM_L2x2_SUB1: | |||
| .LZGEMM_L2x2_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L2x2_SAVE | |||
| ble .LZGEMM_L2x2_SAVE | |||
| ZGEMM_L2x2_SUB2: | |||
| .LZGEMM_L2x2_SUB2: | |||
| KERNEL2x2_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x2_SUB2 | |||
| bgt .LZGEMM_L2x2_SUB2 | |||
| ZGEMM_L2x2_SAVE: | |||
| .LZGEMM_L2x2_SAVE: | |||
| SAVE2x2 | |||
| ZGEMM_L2x2_END: | |||
| .LZGEMM_L2x2_END: | |||
| ZGEMM_L2x1_BEGIN: | |||
| .LZGEMM_L2x1_BEGIN: | |||
| andi. T1, M, 1 | |||
| ble ZGEMM_L2x1_END | |||
| ble .LZGEMM_L2x1_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L2x1_SUB0 | |||
| ble .LZGEMM_L2x1_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L2x1_SUB4 | |||
| ble .LZGEMM_L2x1_SUB4 | |||
| ZGEMM_L2x1_LOOP_START: | |||
| .LZGEMM_L2x1_LOOP_START: | |||
| LOAD2x1_1 | |||
| KERNEL2x1_I1 | |||
| @@ -366,11 +366,11 @@ ZGEMM_L2x1_LOOP_START: | |||
| KERNEL2x1_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L2x1_LOOP_END | |||
| ble .LZGEMM_L2x1_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L2x1_LOOP: | |||
| .LZGEMM_L2x1_LOOP: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_2 | |||
| @@ -383,9 +383,9 @@ ZGEMM_L2x1_LOOP: | |||
| KERNEL2x1_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x1_LOOP | |||
| bgt .LZGEMM_L2x1_LOOP | |||
| ZGEMM_L2x1_LOOP_END: | |||
| .LZGEMM_L2x1_LOOP_END: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_2 | |||
| @@ -397,9 +397,9 @@ ZGEMM_L2x1_LOOP_END: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_E2 | |||
| b ZGEMM_L2x1_SUB1 | |||
| b .LZGEMM_L2x1_SUB1 | |||
| ZGEMM_L2x1_SUB4: | |||
| .LZGEMM_L2x1_SUB4: | |||
| KERNEL2x1_SUBI1 | |||
| KERNEL2x1_SUB1 | |||
| @@ -411,72 +411,72 @@ ZGEMM_L2x1_SUB4: | |||
| KERNEL2x1_SUB1 | |||
| KERNEL2x1_SUB1 | |||
| b ZGEMM_L2x1_SUB1 | |||
| b .LZGEMM_L2x1_SUB1 | |||
| ZGEMM_L2x1_SUB0: | |||
| .LZGEMM_L2x1_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL2x1_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L2x1_SAVE | |||
| b ZGEMM_L2x1_SUB2 | |||
| ble .LZGEMM_L2x1_SAVE | |||
| b .LZGEMM_L2x1_SUB2 | |||
| ZGEMM_L2x1_SUB1: | |||
| .LZGEMM_L2x1_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L2x1_SAVE | |||
| ble .LZGEMM_L2x1_SAVE | |||
| ZGEMM_L2x1_SUB2: | |||
| .LZGEMM_L2x1_SUB2: | |||
| KERNEL2x1_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L2x1_SUB2 | |||
| bgt .LZGEMM_L2x1_SUB2 | |||
| ZGEMM_L2x1_SAVE: | |||
| .LZGEMM_L2x1_SAVE: | |||
| SAVE2x1 | |||
| ZGEMM_L2x1_END: | |||
| .LZGEMM_L2x1_END: | |||
| slwi T1, K, 5 | |||
| add B, B, T1 | |||
| addic. J, J, -1 | |||
| bgt ZGEMM_L2_BEGIN | |||
| bgt .LZGEMM_L2_BEGIN | |||
| andi. T2, N, 1 | |||
| ble L999 | |||
| ble .L999 | |||
| ZGEMM_L2_END: | |||
| .LZGEMM_L2_END: | |||
| b ZGEMM_L1_BEGIN | |||
| b .LZGEMM_L1_BEGIN | |||
| L999_H1: | |||
| .L999_H1: | |||
| b L999 | |||
| b .L999 | |||
| ZGEMM_L1_BEGIN: | |||
| .LZGEMM_L1_BEGIN: | |||
| andi. T1, N, 1 | |||
| ble ZGEMM_L1_END | |||
| ble .LZGEMM_L1_END | |||
| mr CO, C | |||
| mr AO, A | |||
| srawi. I, M, 3 | |||
| ble ZGEMM_L1x8_END | |||
| ble .LZGEMM_L1x8_END | |||
| ZGEMM_L1x8_BEGIN: | |||
| .LZGEMM_L1x8_BEGIN: | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L1x8_SUB0 | |||
| ble .LZGEMM_L1x8_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L1x8_SUB4 | |||
| ble .LZGEMM_L1x8_SUB4 | |||
| ZGEMM_L1x8_LOOP_START: | |||
| .LZGEMM_L1x8_LOOP_START: | |||
| dcbt AO, PRE | |||
| LOAD1x8_1 | |||
| @@ -499,11 +499,11 @@ ZGEMM_L1x8_LOOP_START: | |||
| KERNEL1x8_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L1x8_LOOP_END | |||
| ble .LZGEMM_L1x8_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L1x8_LOOP: | |||
| .LZGEMM_L1x8_LOOP: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_1 | |||
| @@ -524,9 +524,9 @@ ZGEMM_L1x8_LOOP: | |||
| KERNEL1x8_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x8_LOOP | |||
| bgt .LZGEMM_L1x8_LOOP | |||
| ZGEMM_L1x8_LOOP_END: | |||
| .LZGEMM_L1x8_LOOP_END: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_1 | |||
| @@ -545,9 +545,9 @@ ZGEMM_L1x8_LOOP_END: | |||
| KERNEL1x8_1 | |||
| KERNEL1x8_E2 | |||
| b ZGEMM_L1x8_SUB1 | |||
| b .LZGEMM_L1x8_SUB1 | |||
| ZGEMM_L1x8_SUB4: | |||
| .LZGEMM_L1x8_SUB4: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_SUBI1 | |||
| @@ -563,53 +563,53 @@ ZGEMM_L1x8_SUB4: | |||
| KERNEL1x8_SUB1 | |||
| KERNEL1x8_SUB1 | |||
| b ZGEMM_L1x8_SUB1 | |||
| b .LZGEMM_L1x8_SUB1 | |||
| ZGEMM_L1x8_SUB0: | |||
| .LZGEMM_L1x8_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL1x8_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L1x8_SAVE | |||
| b ZGEMM_L1x8_SUB2 | |||
| ble .LZGEMM_L1x8_SAVE | |||
| b .LZGEMM_L1x8_SUB2 | |||
| ZGEMM_L1x8_SUB1: | |||
| .LZGEMM_L1x8_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L1x8_SAVE | |||
| ble .LZGEMM_L1x8_SAVE | |||
| ZGEMM_L1x8_SUB2: | |||
| .LZGEMM_L1x8_SUB2: | |||
| KERNEL1x8_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x8_SUB2 | |||
| bgt .LZGEMM_L1x8_SUB2 | |||
| ZGEMM_L1x8_SAVE: | |||
| .LZGEMM_L1x8_SAVE: | |||
| SAVE1x8 | |||
| addic. I, I, -1 | |||
| bgt ZGEMM_L1x8_BEGIN | |||
| bgt .LZGEMM_L1x8_BEGIN | |||
| ZGEMM_L1x8_END: | |||
| .LZGEMM_L1x8_END: | |||
| ZGEMM_L1x4_BEGIN: | |||
| .LZGEMM_L1x4_BEGIN: | |||
| andi. T2, M, 7 | |||
| ble ZGEMM_L1x1_END | |||
| ble .LZGEMM_L1x1_END | |||
| andi. T1, M, 4 | |||
| ble ZGEMM_L1x4_END | |||
| ble .LZGEMM_L1x4_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L1x4_SUB0 | |||
| ble .LZGEMM_L1x4_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L1x4_SUB4 | |||
| ble .LZGEMM_L1x4_SUB4 | |||
| ZGEMM_L1x4_LOOP_START: | |||
| .LZGEMM_L1x4_LOOP_START: | |||
| LOAD1x4_1 | |||
| KERNEL1x4_I1 | |||
| @@ -623,11 +623,11 @@ ZGEMM_L1x4_LOOP_START: | |||
| KERNEL1x4_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L1x4_LOOP_END | |||
| ble .LZGEMM_L1x4_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L1x4_LOOP: | |||
| .LZGEMM_L1x4_LOOP: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_2 | |||
| @@ -640,9 +640,9 @@ ZGEMM_L1x4_LOOP: | |||
| KERNEL1x4_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x4_LOOP | |||
| bgt .LZGEMM_L1x4_LOOP | |||
| ZGEMM_L1x4_LOOP_END: | |||
| .LZGEMM_L1x4_LOOP_END: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_2 | |||
| @@ -654,9 +654,9 @@ ZGEMM_L1x4_LOOP_END: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_E2 | |||
| b ZGEMM_L1x4_SUB1 | |||
| b .LZGEMM_L1x4_SUB1 | |||
| ZGEMM_L1x4_SUB4: | |||
| .LZGEMM_L1x4_SUB4: | |||
| KERNEL1x4_SUBI1 | |||
| KERNEL1x4_SUB1 | |||
| @@ -668,48 +668,48 @@ ZGEMM_L1x4_SUB4: | |||
| KERNEL1x4_SUB1 | |||
| KERNEL1x4_SUB1 | |||
| b ZGEMM_L1x4_SUB1 | |||
| b .LZGEMM_L1x4_SUB1 | |||
| ZGEMM_L1x4_SUB0: | |||
| .LZGEMM_L1x4_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL1x4_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L1x4_SAVE | |||
| b ZGEMM_L1x4_SUB2 | |||
| ble .LZGEMM_L1x4_SAVE | |||
| b .LZGEMM_L1x4_SUB2 | |||
| ZGEMM_L1x4_SUB1: | |||
| .LZGEMM_L1x4_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L1x4_SAVE | |||
| ble .LZGEMM_L1x4_SAVE | |||
| ZGEMM_L1x4_SUB2: | |||
| .LZGEMM_L1x4_SUB2: | |||
| KERNEL1x4_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x4_SUB2 | |||
| bgt .LZGEMM_L1x4_SUB2 | |||
| ZGEMM_L1x4_SAVE: | |||
| .LZGEMM_L1x4_SAVE: | |||
| SAVE1x4 | |||
| ZGEMM_L1x4_END: | |||
| .LZGEMM_L1x4_END: | |||
| ZGEMM_L1x2_BEGIN: | |||
| .LZGEMM_L1x2_BEGIN: | |||
| andi. T1, M, 2 | |||
| ble ZGEMM_L1x2_END | |||
| ble .LZGEMM_L1x2_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L1x2_SUB0 | |||
| ble .LZGEMM_L1x2_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L1x2_SUB4 | |||
| ble .LZGEMM_L1x2_SUB4 | |||
| ZGEMM_L1x2_LOOP_START: | |||
| .LZGEMM_L1x2_LOOP_START: | |||
| LOAD1x2_1 | |||
| KERNEL1x2_I1 | |||
| @@ -723,11 +723,11 @@ ZGEMM_L1x2_LOOP_START: | |||
| KERNEL1x2_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L1x2_LOOP_END | |||
| ble .LZGEMM_L1x2_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L1x2_LOOP: | |||
| .LZGEMM_L1x2_LOOP: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_2 | |||
| @@ -740,9 +740,9 @@ ZGEMM_L1x2_LOOP: | |||
| KERNEL1x2_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x2_LOOP | |||
| bgt .LZGEMM_L1x2_LOOP | |||
| ZGEMM_L1x2_LOOP_END: | |||
| .LZGEMM_L1x2_LOOP_END: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_2 | |||
| @@ -754,9 +754,9 @@ ZGEMM_L1x2_LOOP_END: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_E2 | |||
| b ZGEMM_L1x2_SUB1 | |||
| b .LZGEMM_L1x2_SUB1 | |||
| ZGEMM_L1x2_SUB4: | |||
| .LZGEMM_L1x2_SUB4: | |||
| KERNEL1x2_SUBI1 | |||
| KERNEL1x2_SUB1 | |||
| @@ -768,48 +768,48 @@ ZGEMM_L1x2_SUB4: | |||
| KERNEL1x2_SUB1 | |||
| KERNEL1x2_SUB1 | |||
| b ZGEMM_L1x2_SUB1 | |||
| b .LZGEMM_L1x2_SUB1 | |||
| ZGEMM_L1x2_SUB0: | |||
| .LZGEMM_L1x2_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL1x2_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L1x2_SAVE | |||
| b ZGEMM_L1x2_SUB2 | |||
| ble .LZGEMM_L1x2_SAVE | |||
| b .LZGEMM_L1x2_SUB2 | |||
| ZGEMM_L1x2_SUB1: | |||
| .LZGEMM_L1x2_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L1x2_SAVE | |||
| ble .LZGEMM_L1x2_SAVE | |||
| ZGEMM_L1x2_SUB2: | |||
| .LZGEMM_L1x2_SUB2: | |||
| KERNEL1x2_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x2_SUB2 | |||
| bgt .LZGEMM_L1x2_SUB2 | |||
| ZGEMM_L1x2_SAVE: | |||
| .LZGEMM_L1x2_SAVE: | |||
| SAVE1x2 | |||
| ZGEMM_L1x2_END: | |||
| .LZGEMM_L1x2_END: | |||
| ZGEMM_L1x1_BEGIN: | |||
| .LZGEMM_L1x1_BEGIN: | |||
| andi. T1, M, 1 | |||
| ble ZGEMM_L1x1_END | |||
| ble .LZGEMM_L1x1_END | |||
| mr BO, B | |||
| srawi. L, K, 3 | |||
| ble ZGEMM_L1x1_SUB0 | |||
| ble .LZGEMM_L1x1_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZGEMM_L1x1_SUB4 | |||
| ble .LZGEMM_L1x1_SUB4 | |||
| ZGEMM_L1x1_LOOP_START: | |||
| .LZGEMM_L1x1_LOOP_START: | |||
| LOAD1x1_1 | |||
| KERNEL1x1_I1 | |||
| @@ -823,11 +823,11 @@ ZGEMM_L1x1_LOOP_START: | |||
| KERNEL1x1_2 | |||
| addic. L, L, -2 | |||
| ble ZGEMM_L1x1_LOOP_END | |||
| ble .LZGEMM_L1x1_LOOP_END | |||
| .align 5 | |||
| ZGEMM_L1x1_LOOP: | |||
| .LZGEMM_L1x1_LOOP: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_2 | |||
| @@ -840,9 +840,9 @@ ZGEMM_L1x1_LOOP: | |||
| KERNEL1x1_2 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x1_LOOP | |||
| bgt .LZGEMM_L1x1_LOOP | |||
| ZGEMM_L1x1_LOOP_END: | |||
| .LZGEMM_L1x1_LOOP_END: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_2 | |||
| @@ -854,9 +854,9 @@ ZGEMM_L1x1_LOOP_END: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_E2 | |||
| b ZGEMM_L1x1_SUB1 | |||
| b .LZGEMM_L1x1_SUB1 | |||
| ZGEMM_L1x1_SUB4: | |||
| .LZGEMM_L1x1_SUB4: | |||
| KERNEL1x1_SUBI1 | |||
| KERNEL1x1_SUB1 | |||
| @@ -868,34 +868,34 @@ ZGEMM_L1x1_SUB4: | |||
| KERNEL1x1_SUB1 | |||
| KERNEL1x1_SUB1 | |||
| b ZGEMM_L1x1_SUB1 | |||
| b .LZGEMM_L1x1_SUB1 | |||
| ZGEMM_L1x1_SUB0: | |||
| .LZGEMM_L1x1_SUB0: | |||
| andi. L, K, 7 | |||
| KERNEL1x1_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZGEMM_L1x1_SAVE | |||
| b ZGEMM_L1x1_SUB2 | |||
| ble .LZGEMM_L1x1_SAVE | |||
| b .LZGEMM_L1x1_SUB2 | |||
| ZGEMM_L1x1_SUB1: | |||
| .LZGEMM_L1x1_SUB1: | |||
| andi. L, K, 7 | |||
| ble ZGEMM_L1x1_SAVE | |||
| ble .LZGEMM_L1x1_SAVE | |||
| ZGEMM_L1x1_SUB2: | |||
| .LZGEMM_L1x1_SUB2: | |||
| KERNEL1x1_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZGEMM_L1x1_SUB2 | |||
| bgt .LZGEMM_L1x1_SUB2 | |||
| ZGEMM_L1x1_SAVE: | |||
| .LZGEMM_L1x1_SAVE: | |||
| SAVE1x1 | |||
| ZGEMM_L1x1_END: | |||
| .LZGEMM_L1x1_END: | |||
| ZGEMM_L1_END: | |||
| .LZGEMM_L1_END: | |||
| @@ -1,3 +1,39 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| #if defined(NN) || defined(NT) || defined(TN) || defined(TT) | |||
| #define XSFADD_R1 xsadddp | |||
| @@ -1,3 +1,38 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| /*********************************************************************/ | |||
| /* Copyright 2009, 2010 The University of Texas at Austin. */ | |||
| /* All rights reserved. */ | |||
| @@ -239,11 +274,11 @@ | |||
| #include "zgemm_macros_8x2_power8.S" | |||
| cmpwi cr0, M, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| cmpwi cr0, N, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| cmpwi cr0, K, 0 | |||
| ble L999 | |||
| ble .L999 | |||
| slwi LDC, LDC, ZBASE_SHIFT | |||
| li PRE, 256 | |||
| @@ -266,7 +301,7 @@ | |||
| #include "ztrmm_logic_8x2_power8.S" | |||
| L999: | |||
| .L999: | |||
| addi r3, 0, 0 | |||
| lfd f14, 0(SP) | |||
| @@ -1,7 +1,43 @@ | |||
| /*************************************************************************** | |||
| Copyright (c) 2013-2016, The OpenBLAS Project | |||
| All rights reserved. | |||
| Redistribution and use in source and binary forms, with or without | |||
| modification, are permitted provided that the following conditions are | |||
| met: | |||
| 1. Redistributions of source code must retain the above copyright | |||
| notice, this list of conditions and the following disclaimer. | |||
| 2. Redistributions in binary form must reproduce the above copyright | |||
| notice, this list of conditions and the following disclaimer in | |||
| the documentation and/or other materials provided with the | |||
| distribution. | |||
| 3. Neither the name of the OpenBLAS project nor the names of | |||
| its contributors may be used to endorse or promote products | |||
| derived from this software without specific prior written permission. | |||
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
| AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
| IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
| ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||
| LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
| DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
| SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
| CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
| OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
| USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| *****************************************************************************/ | |||
| /************************************************************************************** | |||
| * 2016/03/05 Werner Saar (wernsaar@googlemail.com) | |||
| * BLASTEST : OK | |||
| * CTEST : OK | |||
| * TEST : OK | |||
| * LAPACK-TEST : OK | |||
| **************************************************************************************/ | |||
| srawi. J, N, 1 | |||
| ble ZTRMM_L2_END | |||
| ble .LZTRMM_L2_END | |||
| ZTRMM_L2_BEGIN: | |||
| .LZTRMM_L2_BEGIN: | |||
| mr CO, C | |||
| mr AO, A | |||
| @@ -13,9 +49,9 @@ ZTRMM_L2_BEGIN: | |||
| #endif | |||
| srawi. I, M, 3 | |||
| ble ZTRMM_L2x8_END | |||
| ble .LZTRMM_L2x8_END | |||
| ZTRMM_L2x8_BEGIN: | |||
| .LZTRMM_L2x8_BEGIN: | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| @@ -42,11 +78,11 @@ ZTRMM_L2x8_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L2x8_SUB0 | |||
| ble .LZTRMM_L2x8_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L2x8_SUB4 | |||
| ble .LZTRMM_L2x8_SUB4 | |||
| ZTRMM_L2x8_LOOP_START: | |||
| .LZTRMM_L2x8_LOOP_START: | |||
| dcbt AO, PRE | |||
| LOAD2x8_1 | |||
| @@ -69,11 +105,11 @@ ZTRMM_L2x8_LOOP_START: | |||
| KERNEL2x8_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L2x8_LOOP_END | |||
| ble .LZTRMM_L2x8_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L2x8_LOOP: | |||
| .LZTRMM_L2x8_LOOP: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_1 | |||
| @@ -94,9 +130,9 @@ ZTRMM_L2x8_LOOP: | |||
| KERNEL2x8_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x8_LOOP | |||
| bgt .LZTRMM_L2x8_LOOP | |||
| ZTRMM_L2x8_LOOP_END: | |||
| .LZTRMM_L2x8_LOOP_END: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_1 | |||
| @@ -115,9 +151,9 @@ ZTRMM_L2x8_LOOP_END: | |||
| KERNEL2x8_1 | |||
| KERNEL2x8_E2 | |||
| b ZTRMM_L2x8_SUB1 | |||
| b .LZTRMM_L2x8_SUB1 | |||
| ZTRMM_L2x8_SUB4: | |||
| .LZTRMM_L2x8_SUB4: | |||
| dcbt AO, PRE | |||
| KERNEL2x8_SUBI1 | |||
| @@ -133,31 +169,31 @@ ZTRMM_L2x8_SUB4: | |||
| KERNEL2x8_SUB1 | |||
| KERNEL2x8_SUB1 | |||
| b ZTRMM_L2x8_SUB1 | |||
| b .LZTRMM_L2x8_SUB1 | |||
| ZTRMM_L2x8_SUB0: | |||
| .LZTRMM_L2x8_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL2x8_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L2x8_SAVE | |||
| b ZTRMM_L2x8_SUB2 | |||
| ble .LZTRMM_L2x8_SAVE | |||
| b .LZTRMM_L2x8_SUB2 | |||
| ZTRMM_L2x8_SUB1: | |||
| .LZTRMM_L2x8_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L2x8_SAVE | |||
| ble .LZTRMM_L2x8_SAVE | |||
| ZTRMM_L2x8_SUB2: | |||
| .LZTRMM_L2x8_SUB2: | |||
| KERNEL2x8_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x8_SUB2 | |||
| bgt .LZTRMM_L2x8_SUB2 | |||
| ZTRMM_L2x8_SAVE: | |||
| .LZTRMM_L2x8_SAVE: | |||
| SAVE2x8 | |||
| @@ -175,16 +211,16 @@ ZTRMM_L2x8_SAVE: | |||
| addic. I, I, -1 | |||
| bgt ZTRMM_L2x8_BEGIN | |||
| bgt .LZTRMM_L2x8_BEGIN | |||
| ZTRMM_L2x8_END: | |||
| .LZTRMM_L2x8_END: | |||
| ZTRMM_L2x4_BEGIN: | |||
| .LZTRMM_L2x4_BEGIN: | |||
| andi. T2, M, 7 | |||
| ble ZTRMM_L2x1_END | |||
| ble .LZTRMM_L2x1_END | |||
| andi. T1, M, 4 | |||
| ble ZTRMM_L2x4_END | |||
| ble .LZTRMM_L2x4_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -210,11 +246,11 @@ ZTRMM_L2x4_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L2x4_SUB0 | |||
| ble .LZTRMM_L2x4_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L2x4_SUB4 | |||
| ble .LZTRMM_L2x4_SUB4 | |||
| ZTRMM_L2x4_LOOP_START: | |||
| .LZTRMM_L2x4_LOOP_START: | |||
| LOAD2x4_1 | |||
| KERNEL2x4_I1 | |||
| @@ -228,11 +264,11 @@ ZTRMM_L2x4_LOOP_START: | |||
| KERNEL2x4_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L2x4_LOOP_END | |||
| ble .LZTRMM_L2x4_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L2x4_LOOP: | |||
| .LZTRMM_L2x4_LOOP: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_2 | |||
| @@ -245,9 +281,9 @@ ZTRMM_L2x4_LOOP: | |||
| KERNEL2x4_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x4_LOOP | |||
| bgt .LZTRMM_L2x4_LOOP | |||
| ZTRMM_L2x4_LOOP_END: | |||
| .LZTRMM_L2x4_LOOP_END: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_2 | |||
| @@ -259,9 +295,9 @@ ZTRMM_L2x4_LOOP_END: | |||
| KERNEL2x4_1 | |||
| KERNEL2x4_E2 | |||
| b ZTRMM_L2x4_SUB1 | |||
| b .LZTRMM_L2x4_SUB1 | |||
| ZTRMM_L2x4_SUB4: | |||
| .LZTRMM_L2x4_SUB4: | |||
| KERNEL2x4_SUBI1 | |||
| KERNEL2x4_SUB1 | |||
| @@ -273,31 +309,31 @@ ZTRMM_L2x4_SUB4: | |||
| KERNEL2x4_SUB1 | |||
| KERNEL2x4_SUB1 | |||
| b ZTRMM_L2x4_SUB1 | |||
| b .LZTRMM_L2x4_SUB1 | |||
| ZTRMM_L2x4_SUB0: | |||
| .LZTRMM_L2x4_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL2x4_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L2x4_SAVE | |||
| b ZTRMM_L2x4_SUB2 | |||
| ble .LZTRMM_L2x4_SAVE | |||
| b .LZTRMM_L2x4_SUB2 | |||
| ZTRMM_L2x4_SUB1: | |||
| .LZTRMM_L2x4_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L2x4_SAVE | |||
| ble .LZTRMM_L2x4_SAVE | |||
| ZTRMM_L2x4_SUB2: | |||
| .LZTRMM_L2x4_SUB2: | |||
| KERNEL2x4_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x4_SUB2 | |||
| bgt .LZTRMM_L2x4_SUB2 | |||
| ZTRMM_L2x4_SAVE: | |||
| .LZTRMM_L2x4_SAVE: | |||
| SAVE2x4 | |||
| @@ -314,12 +350,12 @@ ZTRMM_L2x4_SAVE: | |||
| #endif | |||
| ZTRMM_L2x4_END: | |||
| .LZTRMM_L2x4_END: | |||
| ZTRMM_L2x2_BEGIN: | |||
| .LZTRMM_L2x2_BEGIN: | |||
| andi. T1, M, 2 | |||
| ble ZTRMM_L2x2_END | |||
| ble .LZTRMM_L2x2_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -345,11 +381,11 @@ ZTRMM_L2x2_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L2x2_SUB0 | |||
| ble .LZTRMM_L2x2_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L2x2_SUB4 | |||
| ble .LZTRMM_L2x2_SUB4 | |||
| ZTRMM_L2x2_LOOP_START: | |||
| .LZTRMM_L2x2_LOOP_START: | |||
| LOAD2x2_1 | |||
| KERNEL2x2_I1 | |||
| @@ -363,11 +399,11 @@ ZTRMM_L2x2_LOOP_START: | |||
| KERNEL2x2_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L2x2_LOOP_END | |||
| ble .LZTRMM_L2x2_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L2x2_LOOP: | |||
| .LZTRMM_L2x2_LOOP: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_2 | |||
| @@ -380,9 +416,9 @@ ZTRMM_L2x2_LOOP: | |||
| KERNEL2x2_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x2_LOOP | |||
| bgt .LZTRMM_L2x2_LOOP | |||
| ZTRMM_L2x2_LOOP_END: | |||
| .LZTRMM_L2x2_LOOP_END: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_2 | |||
| @@ -394,9 +430,9 @@ ZTRMM_L2x2_LOOP_END: | |||
| KERNEL2x2_1 | |||
| KERNEL2x2_E2 | |||
| b ZTRMM_L2x2_SUB1 | |||
| b .LZTRMM_L2x2_SUB1 | |||
| ZTRMM_L2x2_SUB4: | |||
| .LZTRMM_L2x2_SUB4: | |||
| KERNEL2x2_SUBI1 | |||
| KERNEL2x2_SUB1 | |||
| @@ -408,31 +444,31 @@ ZTRMM_L2x2_SUB4: | |||
| KERNEL2x2_SUB1 | |||
| KERNEL2x2_SUB1 | |||
| b ZTRMM_L2x2_SUB1 | |||
| b .LZTRMM_L2x2_SUB1 | |||
| ZTRMM_L2x2_SUB0: | |||
| .LZTRMM_L2x2_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL2x2_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L2x2_SAVE | |||
| b ZTRMM_L2x2_SUB2 | |||
| ble .LZTRMM_L2x2_SAVE | |||
| b .LZTRMM_L2x2_SUB2 | |||
| ZTRMM_L2x2_SUB1: | |||
| .LZTRMM_L2x2_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L2x2_SAVE | |||
| ble .LZTRMM_L2x2_SAVE | |||
| ZTRMM_L2x2_SUB2: | |||
| .LZTRMM_L2x2_SUB2: | |||
| KERNEL2x2_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x2_SUB2 | |||
| bgt .LZTRMM_L2x2_SUB2 | |||
| ZTRMM_L2x2_SAVE: | |||
| .LZTRMM_L2x2_SAVE: | |||
| SAVE2x2 | |||
| @@ -449,12 +485,12 @@ ZTRMM_L2x2_SAVE: | |||
| #endif | |||
| ZTRMM_L2x2_END: | |||
| .LZTRMM_L2x2_END: | |||
| ZTRMM_L2x1_BEGIN: | |||
| .LZTRMM_L2x1_BEGIN: | |||
| andi. T1, M, 1 | |||
| ble ZTRMM_L2x1_END | |||
| ble .LZTRMM_L2x1_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -480,11 +516,11 @@ ZTRMM_L2x1_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L2x1_SUB0 | |||
| ble .LZTRMM_L2x1_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L2x1_SUB4 | |||
| ble .LZTRMM_L2x1_SUB4 | |||
| ZTRMM_L2x1_LOOP_START: | |||
| .LZTRMM_L2x1_LOOP_START: | |||
| LOAD2x1_1 | |||
| KERNEL2x1_I1 | |||
| @@ -498,11 +534,11 @@ ZTRMM_L2x1_LOOP_START: | |||
| KERNEL2x1_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L2x1_LOOP_END | |||
| ble .LZTRMM_L2x1_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L2x1_LOOP: | |||
| .LZTRMM_L2x1_LOOP: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_2 | |||
| @@ -515,9 +551,9 @@ ZTRMM_L2x1_LOOP: | |||
| KERNEL2x1_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x1_LOOP | |||
| bgt .LZTRMM_L2x1_LOOP | |||
| ZTRMM_L2x1_LOOP_END: | |||
| .LZTRMM_L2x1_LOOP_END: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_2 | |||
| @@ -529,9 +565,9 @@ ZTRMM_L2x1_LOOP_END: | |||
| KERNEL2x1_1 | |||
| KERNEL2x1_E2 | |||
| b ZTRMM_L2x1_SUB1 | |||
| b .LZTRMM_L2x1_SUB1 | |||
| ZTRMM_L2x1_SUB4: | |||
| .LZTRMM_L2x1_SUB4: | |||
| KERNEL2x1_SUBI1 | |||
| KERNEL2x1_SUB1 | |||
| @@ -543,31 +579,31 @@ ZTRMM_L2x1_SUB4: | |||
| KERNEL2x1_SUB1 | |||
| KERNEL2x1_SUB1 | |||
| b ZTRMM_L2x1_SUB1 | |||
| b .LZTRMM_L2x1_SUB1 | |||
| ZTRMM_L2x1_SUB0: | |||
| .LZTRMM_L2x1_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL2x1_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L2x1_SAVE | |||
| b ZTRMM_L2x1_SUB2 | |||
| ble .LZTRMM_L2x1_SAVE | |||
| b .LZTRMM_L2x1_SUB2 | |||
| ZTRMM_L2x1_SUB1: | |||
| .LZTRMM_L2x1_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L2x1_SAVE | |||
| ble .LZTRMM_L2x1_SAVE | |||
| ZTRMM_L2x1_SUB2: | |||
| .LZTRMM_L2x1_SUB2: | |||
| KERNEL2x1_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L2x1_SUB2 | |||
| bgt .LZTRMM_L2x1_SUB2 | |||
| ZTRMM_L2x1_SAVE: | |||
| .LZTRMM_L2x1_SAVE: | |||
| SAVE2x1 | |||
| @@ -584,7 +620,7 @@ ZTRMM_L2x1_SAVE: | |||
| #endif | |||
| ZTRMM_L2x1_END: | |||
| .LZTRMM_L2x1_END: | |||
| slwi T1, K, 5 | |||
| add B, B, T1 | |||
| @@ -595,23 +631,23 @@ ZTRMM_L2x1_END: | |||
| addic. J, J, -1 | |||
| bgt ZTRMM_L2_BEGIN | |||
| bgt .LZTRMM_L2_BEGIN | |||
| andi. T2, N, 1 | |||
| ble L999 | |||
| ble .L999 | |||
| ZTRMM_L2_END: | |||
| .LZTRMM_L2_END: | |||
| b ZTRMM_L1_BEGIN | |||
| b .LZTRMM_L1_BEGIN | |||
| L999_H1: | |||
| .L999_H1: | |||
| b L999 | |||
| b .L999 | |||
| ZTRMM_L1_BEGIN: | |||
| .LZTRMM_L1_BEGIN: | |||
| andi. T1, N, 1 | |||
| ble ZTRMM_L1_END | |||
| ble .LZTRMM_L1_END | |||
| mr CO, C | |||
| mr AO, A | |||
| @@ -620,9 +656,9 @@ ZTRMM_L1_BEGIN: | |||
| #endif | |||
| srawi. I, M, 3 | |||
| ble ZTRMM_L1x8_END | |||
| ble .LZTRMM_L1x8_END | |||
| ZTRMM_L1x8_BEGIN: | |||
| .LZTRMM_L1x8_BEGIN: | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| @@ -649,11 +685,11 @@ ZTRMM_L1x8_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L1x8_SUB0 | |||
| ble .LZTRMM_L1x8_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L1x8_SUB4 | |||
| ble .LZTRMM_L1x8_SUB4 | |||
| ZTRMM_L1x8_LOOP_START: | |||
| .LZTRMM_L1x8_LOOP_START: | |||
| dcbt AO, PRE | |||
| LOAD1x8_1 | |||
| @@ -676,11 +712,11 @@ ZTRMM_L1x8_LOOP_START: | |||
| KERNEL1x8_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L1x8_LOOP_END | |||
| ble .LZTRMM_L1x8_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L1x8_LOOP: | |||
| .LZTRMM_L1x8_LOOP: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_1 | |||
| @@ -701,9 +737,9 @@ ZTRMM_L1x8_LOOP: | |||
| KERNEL1x8_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x8_LOOP | |||
| bgt .LZTRMM_L1x8_LOOP | |||
| ZTRMM_L1x8_LOOP_END: | |||
| .LZTRMM_L1x8_LOOP_END: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_1 | |||
| @@ -722,9 +758,9 @@ ZTRMM_L1x8_LOOP_END: | |||
| KERNEL1x8_1 | |||
| KERNEL1x8_E2 | |||
| b ZTRMM_L1x8_SUB1 | |||
| b .LZTRMM_L1x8_SUB1 | |||
| ZTRMM_L1x8_SUB4: | |||
| .LZTRMM_L1x8_SUB4: | |||
| dcbt AO, PRE | |||
| KERNEL1x8_SUBI1 | |||
| @@ -740,31 +776,31 @@ ZTRMM_L1x8_SUB4: | |||
| KERNEL1x8_SUB1 | |||
| KERNEL1x8_SUB1 | |||
| b ZTRMM_L1x8_SUB1 | |||
| b .LZTRMM_L1x8_SUB1 | |||
| ZTRMM_L1x8_SUB0: | |||
| .LZTRMM_L1x8_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL1x8_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L1x8_SAVE | |||
| b ZTRMM_L1x8_SUB2 | |||
| ble .LZTRMM_L1x8_SAVE | |||
| b .LZTRMM_L1x8_SUB2 | |||
| ZTRMM_L1x8_SUB1: | |||
| .LZTRMM_L1x8_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L1x8_SAVE | |||
| ble .LZTRMM_L1x8_SAVE | |||
| ZTRMM_L1x8_SUB2: | |||
| .LZTRMM_L1x8_SUB2: | |||
| KERNEL1x8_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x8_SUB2 | |||
| bgt .LZTRMM_L1x8_SUB2 | |||
| ZTRMM_L1x8_SAVE: | |||
| .LZTRMM_L1x8_SAVE: | |||
| SAVE1x8 | |||
| @@ -782,16 +818,16 @@ ZTRMM_L1x8_SAVE: | |||
| addic. I, I, -1 | |||
| bgt ZTRMM_L1x8_BEGIN | |||
| bgt .LZTRMM_L1x8_BEGIN | |||
| ZTRMM_L1x8_END: | |||
| .LZTRMM_L1x8_END: | |||
| ZTRMM_L1x4_BEGIN: | |||
| .LZTRMM_L1x4_BEGIN: | |||
| andi. T2, M, 7 | |||
| ble ZTRMM_L1x1_END | |||
| ble .LZTRMM_L1x1_END | |||
| andi. T1, M, 4 | |||
| ble ZTRMM_L1x4_END | |||
| ble .LZTRMM_L1x4_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -817,11 +853,11 @@ ZTRMM_L1x4_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L1x4_SUB0 | |||
| ble .LZTRMM_L1x4_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L1x4_SUB4 | |||
| ble .LZTRMM_L1x4_SUB4 | |||
| ZTRMM_L1x4_LOOP_START: | |||
| .LZTRMM_L1x4_LOOP_START: | |||
| LOAD1x4_1 | |||
| KERNEL1x4_I1 | |||
| @@ -835,11 +871,11 @@ ZTRMM_L1x4_LOOP_START: | |||
| KERNEL1x4_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L1x4_LOOP_END | |||
| ble .LZTRMM_L1x4_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L1x4_LOOP: | |||
| .LZTRMM_L1x4_LOOP: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_2 | |||
| @@ -852,9 +888,9 @@ ZTRMM_L1x4_LOOP: | |||
| KERNEL1x4_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x4_LOOP | |||
| bgt .LZTRMM_L1x4_LOOP | |||
| ZTRMM_L1x4_LOOP_END: | |||
| .LZTRMM_L1x4_LOOP_END: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_2 | |||
| @@ -866,9 +902,9 @@ ZTRMM_L1x4_LOOP_END: | |||
| KERNEL1x4_1 | |||
| KERNEL1x4_E2 | |||
| b ZTRMM_L1x4_SUB1 | |||
| b .LZTRMM_L1x4_SUB1 | |||
| ZTRMM_L1x4_SUB4: | |||
| .LZTRMM_L1x4_SUB4: | |||
| KERNEL1x4_SUBI1 | |||
| KERNEL1x4_SUB1 | |||
| @@ -880,31 +916,31 @@ ZTRMM_L1x4_SUB4: | |||
| KERNEL1x4_SUB1 | |||
| KERNEL1x4_SUB1 | |||
| b ZTRMM_L1x4_SUB1 | |||
| b .LZTRMM_L1x4_SUB1 | |||
| ZTRMM_L1x4_SUB0: | |||
| .LZTRMM_L1x4_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL1x4_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L1x4_SAVE | |||
| b ZTRMM_L1x4_SUB2 | |||
| ble .LZTRMM_L1x4_SAVE | |||
| b .LZTRMM_L1x4_SUB2 | |||
| ZTRMM_L1x4_SUB1: | |||
| .LZTRMM_L1x4_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L1x4_SAVE | |||
| ble .LZTRMM_L1x4_SAVE | |||
| ZTRMM_L1x4_SUB2: | |||
| .LZTRMM_L1x4_SUB2: | |||
| KERNEL1x4_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x4_SUB2 | |||
| bgt .LZTRMM_L1x4_SUB2 | |||
| ZTRMM_L1x4_SAVE: | |||
| .LZTRMM_L1x4_SAVE: | |||
| SAVE1x4 | |||
| @@ -921,12 +957,12 @@ ZTRMM_L1x4_SAVE: | |||
| #endif | |||
| ZTRMM_L1x4_END: | |||
| .LZTRMM_L1x4_END: | |||
| ZTRMM_L1x2_BEGIN: | |||
| .LZTRMM_L1x2_BEGIN: | |||
| andi. T1, M, 2 | |||
| ble ZTRMM_L1x2_END | |||
| ble .LZTRMM_L1x2_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -952,11 +988,11 @@ ZTRMM_L1x2_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L1x2_SUB0 | |||
| ble .LZTRMM_L1x2_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L1x2_SUB4 | |||
| ble .LZTRMM_L1x2_SUB4 | |||
| ZTRMM_L1x2_LOOP_START: | |||
| .LZTRMM_L1x2_LOOP_START: | |||
| LOAD1x2_1 | |||
| KERNEL1x2_I1 | |||
| @@ -970,11 +1006,11 @@ ZTRMM_L1x2_LOOP_START: | |||
| KERNEL1x2_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L1x2_LOOP_END | |||
| ble .LZTRMM_L1x2_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L1x2_LOOP: | |||
| .LZTRMM_L1x2_LOOP: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_2 | |||
| @@ -987,9 +1023,9 @@ ZTRMM_L1x2_LOOP: | |||
| KERNEL1x2_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x2_LOOP | |||
| bgt .LZTRMM_L1x2_LOOP | |||
| ZTRMM_L1x2_LOOP_END: | |||
| .LZTRMM_L1x2_LOOP_END: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_2 | |||
| @@ -1001,9 +1037,9 @@ ZTRMM_L1x2_LOOP_END: | |||
| KERNEL1x2_1 | |||
| KERNEL1x2_E2 | |||
| b ZTRMM_L1x2_SUB1 | |||
| b .LZTRMM_L1x2_SUB1 | |||
| ZTRMM_L1x2_SUB4: | |||
| .LZTRMM_L1x2_SUB4: | |||
| KERNEL1x2_SUBI1 | |||
| KERNEL1x2_SUB1 | |||
| @@ -1015,31 +1051,31 @@ ZTRMM_L1x2_SUB4: | |||
| KERNEL1x2_SUB1 | |||
| KERNEL1x2_SUB1 | |||
| b ZTRMM_L1x2_SUB1 | |||
| b .LZTRMM_L1x2_SUB1 | |||
| ZTRMM_L1x2_SUB0: | |||
| .LZTRMM_L1x2_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL1x2_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L1x2_SAVE | |||
| b ZTRMM_L1x2_SUB2 | |||
| ble .LZTRMM_L1x2_SAVE | |||
| b .LZTRMM_L1x2_SUB2 | |||
| ZTRMM_L1x2_SUB1: | |||
| .LZTRMM_L1x2_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L1x2_SAVE | |||
| ble .LZTRMM_L1x2_SAVE | |||
| ZTRMM_L1x2_SUB2: | |||
| .LZTRMM_L1x2_SUB2: | |||
| KERNEL1x2_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x2_SUB2 | |||
| bgt .LZTRMM_L1x2_SUB2 | |||
| ZTRMM_L1x2_SAVE: | |||
| .LZTRMM_L1x2_SAVE: | |||
| SAVE1x2 | |||
| @@ -1056,12 +1092,12 @@ ZTRMM_L1x2_SAVE: | |||
| #endif | |||
| ZTRMM_L1x2_END: | |||
| .LZTRMM_L1x2_END: | |||
| ZTRMM_L1x1_BEGIN: | |||
| .LZTRMM_L1x1_BEGIN: | |||
| andi. T1, M, 1 | |||
| ble ZTRMM_L1x1_END | |||
| ble .LZTRMM_L1x1_END | |||
| #if (defined(LEFT) && defined(TRANSA)) || (!defined(LEFT) && !defined(TRANSA)) | |||
| mr BO, B // B -> BO | |||
| @@ -1087,11 +1123,11 @@ ZTRMM_L1x1_BEGIN: | |||
| mr KKK, T1 | |||
| mr K1, T1 | |||
| srawi. L, K1, 3 // KTEMP / 8 -> L | |||
| ble ZTRMM_L1x1_SUB0 | |||
| ble .LZTRMM_L1x1_SUB0 | |||
| cmpwi cr0, L, 1 | |||
| ble ZTRMM_L1x1_SUB4 | |||
| ble .LZTRMM_L1x1_SUB4 | |||
| ZTRMM_L1x1_LOOP_START: | |||
| .LZTRMM_L1x1_LOOP_START: | |||
| LOAD1x1_1 | |||
| KERNEL1x1_I1 | |||
| @@ -1105,11 +1141,11 @@ ZTRMM_L1x1_LOOP_START: | |||
| KERNEL1x1_2 | |||
| addic. L, L, -2 | |||
| ble ZTRMM_L1x1_LOOP_END | |||
| ble .LZTRMM_L1x1_LOOP_END | |||
| .align 5 | |||
| ZTRMM_L1x1_LOOP: | |||
| .LZTRMM_L1x1_LOOP: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_2 | |||
| @@ -1122,9 +1158,9 @@ ZTRMM_L1x1_LOOP: | |||
| KERNEL1x1_2 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x1_LOOP | |||
| bgt .LZTRMM_L1x1_LOOP | |||
| ZTRMM_L1x1_LOOP_END: | |||
| .LZTRMM_L1x1_LOOP_END: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_2 | |||
| @@ -1136,9 +1172,9 @@ ZTRMM_L1x1_LOOP_END: | |||
| KERNEL1x1_1 | |||
| KERNEL1x1_E2 | |||
| b ZTRMM_L1x1_SUB1 | |||
| b .LZTRMM_L1x1_SUB1 | |||
| ZTRMM_L1x1_SUB4: | |||
| .LZTRMM_L1x1_SUB4: | |||
| KERNEL1x1_SUBI1 | |||
| KERNEL1x1_SUB1 | |||
| @@ -1150,31 +1186,31 @@ ZTRMM_L1x1_SUB4: | |||
| KERNEL1x1_SUB1 | |||
| KERNEL1x1_SUB1 | |||
| b ZTRMM_L1x1_SUB1 | |||
| b .LZTRMM_L1x1_SUB1 | |||
| ZTRMM_L1x1_SUB0: | |||
| .LZTRMM_L1x1_SUB0: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| KERNEL1x1_SUBI1 | |||
| addic. L, L, -1 | |||
| ble ZTRMM_L1x1_SAVE | |||
| b ZTRMM_L1x1_SUB2 | |||
| ble .LZTRMM_L1x1_SAVE | |||
| b .LZTRMM_L1x1_SUB2 | |||
| ZTRMM_L1x1_SUB1: | |||
| .LZTRMM_L1x1_SUB1: | |||
| andi. L, K1, 7 // K1 & 7 -> L | |||
| ble ZTRMM_L1x1_SAVE | |||
| ble .LZTRMM_L1x1_SAVE | |||
| ZTRMM_L1x1_SUB2: | |||
| .LZTRMM_L1x1_SUB2: | |||
| KERNEL1x1_SUB1 | |||
| addic. L, L, -1 | |||
| bgt ZTRMM_L1x1_SUB2 | |||
| bgt .LZTRMM_L1x1_SUB2 | |||
| ZTRMM_L1x1_SAVE: | |||
| .LZTRMM_L1x1_SAVE: | |||
| SAVE1x1 | |||
| @@ -1191,11 +1227,11 @@ ZTRMM_L1x1_SAVE: | |||
| #endif | |||
| ZTRMM_L1x1_END: | |||
| .LZTRMM_L1x1_END: | |||
| #if !defined(LEFT) | |||
| addi KK, KK, 1 // KK += Number of values in B | |||
| #endif | |||
| ZTRMM_L1_END: | |||
| .LZTRMM_L1_END: | |||