Browse Source

Rewrite to use FMA with Householder reflectors

tags/v0.3.22^2
Martin Kroeker GitHub 2 years ago
parent
commit
2a83ec1f79
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 74 additions and 66 deletions
  1. +37
    -33
      lapack-netlib/SRC/dhgeqz.f
  2. +37
    -33
      lapack-netlib/SRC/shgeqz.f

+ 37
- 33
lapack-netlib/SRC/dhgeqz.f View File

@@ -337,9 +337,9 @@
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
$ WR2
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WABS, WI, WR, WR2
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
DOUBLE PRECISION V( 3 ) DOUBLE PRECISION V( 3 )
@@ -1127,25 +1127,27 @@
H( J+2, J-1 ) = ZERO H( J+2, J-1 ) = ZERO
END IF END IF
* *
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 230 JC = J, ILASTM DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) )
H( J, JC ) = H( J, JC ) - TEMP
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) )
T( J, JC ) = T( J, JC ) - TEMP2
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE 230 CONTINUE
IF( ILQ ) THEN IF( ILQ ) THEN
DO 240 JR = 1, N DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) )
Q( JR, J ) = Q( JR, J ) - TEMP
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE 240 CONTINUE
END IF END IF
* *
@@ -1233,27 +1235,29 @@
* *
* Apply transformations from the right. * Apply transformations from the right.
* *
T2 = TAU*V(2)
T3 = TAU*V(3)
DO 260 JR = IFRSTM, MIN( J+3, ILAST ) DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) )
H( JR, J ) = H( JR, J ) - TEMP
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE 260 CONTINUE
DO 270 JR = IFRSTM, J + 2 DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) )
T( JR, J ) = T( JR, J ) - TEMP
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE 270 CONTINUE
IF( ILZ ) THEN IF( ILZ ) THEN
DO 280 JR = 1, N DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) )
Z( JR, J ) = Z( JR, J ) - TEMP
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE 280 CONTINUE
END IF END IF
T( J+1, J ) = ZERO T( J+1, J ) = ZERO


+ 37
- 33
lapack-netlib/SRC/shgeqz.f View File

@@ -337,9 +337,9 @@
$ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL, $ BTOL, C, C11I, C11R, C12, C21, C22I, C22R, CL,
$ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX, $ CQ, CR, CZ, ESHIFT, S, S1, S1INV, S2, SAFMAX,
$ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1, $ SAFMIN, SCALE, SL, SQI, SQR, SR, SZI, SZR, T1,
$ TAU, TEMP, TEMP2, TEMPI, TEMPR, U1, U12, U12L,
$ U2, ULP, VS, W11, W12, W21, W22, WABS, WI, WR,
$ WR2
$ T2, T3, TAU, TEMP, TEMP2, TEMPI, TEMPR, U1,
$ U12, U12L, U2, ULP, VS, W11, W12, W21, W22,
$ WABS, WI, WR, WR2
* .. * ..
* .. Local Arrays .. * .. Local Arrays ..
REAL V( 3 ) REAL V( 3 )
@@ -1127,25 +1127,27 @@
H( J+2, J-1 ) = ZERO H( J+2, J-1 ) = ZERO
END IF END IF
* *
T2 = TAU * V( 2 )
T3 = TAU * V( 3 )
DO 230 JC = J, ILASTM DO 230 JC = J, ILASTM
TEMP = TAU*( H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC ) )
H( J, JC ) = H( J, JC ) - TEMP
H( J+1, JC ) = H( J+1, JC ) - TEMP*V( 2 )
H( J+2, JC ) = H( J+2, JC ) - TEMP*V( 3 )
TEMP2 = TAU*( T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC ) )
T( J, JC ) = T( J, JC ) - TEMP2
T( J+1, JC ) = T( J+1, JC ) - TEMP2*V( 2 )
T( J+2, JC ) = T( J+2, JC ) - TEMP2*V( 3 )
TEMP = H( J, JC )+V( 2 )*H( J+1, JC )+V( 3 )*
$ H( J+2, JC )
H( J, JC ) = H( J, JC ) - TEMP*TAU
H( J+1, JC ) = H( J+1, JC ) - TEMP*T2
H( J+2, JC ) = H( J+2, JC ) - TEMP*T3
TEMP2 = T( J, JC )+V( 2 )*T( J+1, JC )+V( 3 )*
$ T( J+2, JC )
T( J, JC ) = T( J, JC ) - TEMP2*TAU
T( J+1, JC ) = T( J+1, JC ) - TEMP2*T2
T( J+2, JC ) = T( J+2, JC ) - TEMP2*T3
230 CONTINUE 230 CONTINUE
IF( ILQ ) THEN IF( ILQ ) THEN
DO 240 JR = 1, N DO 240 JR = 1, N
TEMP = TAU*( Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 ) )
Q( JR, J ) = Q( JR, J ) - TEMP
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*V( 2 )
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*V( 3 )
TEMP = Q( JR, J )+V( 2 )*Q( JR, J+1 )+V( 3 )*
$ Q( JR, J+2 )
Q( JR, J ) = Q( JR, J ) - TEMP*TAU
Q( JR, J+1 ) = Q( JR, J+1 ) - TEMP*T2
Q( JR, J+2 ) = Q( JR, J+2 ) - TEMP*T3
240 CONTINUE 240 CONTINUE
END IF END IF
* *
@@ -1233,27 +1235,29 @@
* *
* Apply transformations from the right. * Apply transformations from the right.
* *
T2 = TAU*V( 2 )
T3 = TAU*V( 3 )
DO 260 JR = IFRSTM, MIN( J+3, ILAST ) DO 260 JR = IFRSTM, MIN( J+3, ILAST )
TEMP = TAU*( H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 ) )
H( JR, J ) = H( JR, J ) - TEMP
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*V( 2 )
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*V( 3 )
TEMP = H( JR, J )+V( 2 )*H( JR, J+1 )+V( 3 )*
$ H( JR, J+2 )
H( JR, J ) = H( JR, J ) - TEMP*TAU
H( JR, J+1 ) = H( JR, J+1 ) - TEMP*T2
H( JR, J+2 ) = H( JR, J+2 ) - TEMP*T3
260 CONTINUE 260 CONTINUE
DO 270 JR = IFRSTM, J + 2 DO 270 JR = IFRSTM, J + 2
TEMP = TAU*( T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 ) )
T( JR, J ) = T( JR, J ) - TEMP
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*V( 2 )
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*V( 3 )
TEMP = T( JR, J )+V( 2 )*T( JR, J+1 )+V( 3 )*
$ T( JR, J+2 )
T( JR, J ) = T( JR, J ) - TEMP*TAU
T( JR, J+1 ) = T( JR, J+1 ) - TEMP*T2
T( JR, J+2 ) = T( JR, J+2 ) - TEMP*T3
270 CONTINUE 270 CONTINUE
IF( ILZ ) THEN IF( ILZ ) THEN
DO 280 JR = 1, N DO 280 JR = 1, N
TEMP = TAU*( Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 ) )
Z( JR, J ) = Z( JR, J ) - TEMP
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*V( 2 )
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*V( 3 )
TEMP = Z( JR, J )+V( 2 )*Z( JR, J+1 )+V( 3 )*
$ Z( JR, J+2 )
Z( JR, J ) = Z( JR, J ) - TEMP*TAU
Z( JR, J+1 ) = Z( JR, J+1 ) - TEMP*T2
Z( JR, J+2 ) = Z( JR, J+2 ) - TEMP*T3
280 CONTINUE 280 CONTINUE
END IF END IF
T( J+1, J ) = ZERO T( J+1, J ) = ZERO


Loading…
Cancel
Save