|
- *> \brief \b ZHETRI_3X
- *
- * =========== DOCUMENTATION ===========
- *
- * Online html documentation available at
- * http://www.netlib.org/lapack/explore-html/
- *
- *> \htmlonly
- *> Download ZHETRI_3X + dependencies
- *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetri_3x.f">
- *> [TGZ]</a>
- *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetri_3x.f">
- *> [ZIP]</a>
- *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetri_3x.f">
- *> [TXT]</a>
- *> \endhtmlonly
- *
- * Definition:
- * ===========
- *
- * SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
- *
- * .. Scalar Arguments ..
- * CHARACTER UPLO
- * INTEGER INFO, LDA, N, NB
- * ..
- * .. Array Arguments ..
- * INTEGER IPIV( * )
- * COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
- * ..
- *
- *
- *> \par Purpose:
- * =============
- *>
- *> \verbatim
- *> ZHETRI_3X computes the inverse of a complex Hermitian indefinite
- *> matrix A using the factorization computed by ZHETRF_RK or ZHETRF_BK:
- *>
- *> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
- *>
- *> where U (or L) is unit upper (or lower) triangular matrix,
- *> U**H (or L**H) is the conjugate of U (or L), P is a permutation
- *> matrix, P**T is the transpose of P, and D is Hermitian and block
- *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
- *>
- *> This is the blocked version of the algorithm, calling Level 3 BLAS.
- *> \endverbatim
- *
- * Arguments:
- * ==========
- *
- *> \param[in] UPLO
- *> \verbatim
- *> UPLO is CHARACTER*1
- *> Specifies whether the details of the factorization are
- *> stored as an upper or lower triangular matrix.
- *> = 'U': Upper triangle of A is stored;
- *> = 'L': Lower triangle of A is stored.
- *> \endverbatim
- *>
- *> \param[in] N
- *> \verbatim
- *> N is INTEGER
- *> The order of the matrix A. N >= 0.
- *> \endverbatim
- *>
- *> \param[in,out] A
- *> \verbatim
- *> A is COMPLEX*16 array, dimension (LDA,N)
- *> On entry, diagonal of the block diagonal matrix D and
- *> factors U or L as computed by ZHETRF_RK and ZHETRF_BK:
- *> a) ONLY diagonal elements of the Hermitian block diagonal
- *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
- *> (superdiagonal (or subdiagonal) elements of D
- *> should be provided on entry in array E), and
- *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
- *> If UPLO = 'L': factor L in the subdiagonal part of A.
- *>
- *> On exit, if INFO = 0, the Hermitian inverse of the original
- *> matrix.
- *> If UPLO = 'U': the upper triangular part of the inverse
- *> is formed and the part of A below the diagonal is not
- *> referenced;
- *> If UPLO = 'L': the lower triangular part of the inverse
- *> is formed and the part of A above the diagonal is not
- *> referenced.
- *> \endverbatim
- *>
- *> \param[in] LDA
- *> \verbatim
- *> LDA is INTEGER
- *> The leading dimension of the array A. LDA >= max(1,N).
- *> \endverbatim
- *>
- *> \param[in] E
- *> \verbatim
- *> E is COMPLEX*16 array, dimension (N)
- *> On entry, contains the superdiagonal (or subdiagonal)
- *> elements of the Hermitian block diagonal matrix D
- *> with 1-by-1 or 2-by-2 diagonal blocks, where
- *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
- *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
- *>
- *> NOTE: For 1-by-1 diagonal block D(k), where
- *> 1 <= k <= N, the element E(k) is not referenced in both
- *> UPLO = 'U' or UPLO = 'L' cases.
- *> \endverbatim
- *>
- *> \param[in] IPIV
- *> \verbatim
- *> IPIV is INTEGER array, dimension (N)
- *> Details of the interchanges and the block structure of D
- *> as determined by ZHETRF_RK or ZHETRF_BK.
- *> \endverbatim
- *>
- *> \param[out] WORK
- *> \verbatim
- *> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3).
- *> \endverbatim
- *>
- *> \param[in] NB
- *> \verbatim
- *> NB is INTEGER
- *> Block size.
- *> \endverbatim
- *>
- *> \param[out] INFO
- *> \verbatim
- *> INFO is INTEGER
- *> = 0: successful exit
- *> < 0: if INFO = -i, the i-th argument had an illegal value
- *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
- *> inverse could not be computed.
- *> \endverbatim
- *
- * Authors:
- * ========
- *
- *> \author Univ. of Tennessee
- *> \author Univ. of California Berkeley
- *> \author Univ. of Colorado Denver
- *> \author NAG Ltd.
- *
- *> \ingroup complex16HEcomputational
- *
- *> \par Contributors:
- * ==================
- *> \verbatim
- *>
- *> June 2017, Igor Kozachenko,
- *> Computer Science Division,
- *> University of California, Berkeley
- *>
- *> \endverbatim
- *
- * =====================================================================
- SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
- *
- * -- LAPACK computational routine --
- * -- LAPACK is a software package provided by Univ. of Tennessee, --
- * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
- *
- * .. Scalar Arguments ..
- CHARACTER UPLO
- INTEGER INFO, LDA, N, NB
- * ..
- * .. Array Arguments ..
- INTEGER IPIV( * )
- COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
- * ..
- *
- * =====================================================================
- *
- * .. Parameters ..
- DOUBLE PRECISION ONE
- PARAMETER ( ONE = 1.0D+0 )
- COMPLEX*16 CONE, CZERO
- PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
- $ CZERO = ( 0.0D+0, 0.0D+0 ) )
- * ..
- * .. Local Scalars ..
- LOGICAL UPPER
- INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
- DOUBLE PRECISION AK, AKP1, T
- COMPLEX*16 AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J,
- $ U11_IP1_J
- * ..
- * .. External Functions ..
- LOGICAL LSAME
- EXTERNAL LSAME
- * ..
- * .. External Subroutines ..
- EXTERNAL ZGEMM, ZHESWAPR, ZTRTRI, ZTRMM, XERBLA
- * ..
- * .. Intrinsic Functions ..
- INTRINSIC ABS, DCONJG, DBLE, MAX
- * ..
- * .. Executable Statements ..
- *
- * Test the input parameters.
- *
- INFO = 0
- UPPER = LSAME( UPLO, 'U' )
- IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
- INFO = -1
- ELSE IF( N.LT.0 ) THEN
- INFO = -2
- ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
- INFO = -4
- END IF
- *
- * Quick return if possible
- *
- IF( INFO.NE.0 ) THEN
- CALL XERBLA( 'ZHETRI_3X', -INFO )
- RETURN
- END IF
- IF( N.EQ.0 )
- $ RETURN
- *
- * Workspace got Non-diag elements of D
- *
- DO K = 1, N
- WORK( K, 1 ) = E( K )
- END DO
- *
- * Check that the diagonal matrix D is nonsingular.
- *
- IF( UPPER ) THEN
- *
- * Upper triangular storage: examine D from bottom to top
- *
- DO INFO = N, 1, -1
- IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
- $ RETURN
- END DO
- ELSE
- *
- * Lower triangular storage: examine D from top to bottom.
- *
- DO INFO = 1, N
- IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
- $ RETURN
- END DO
- END IF
- *
- INFO = 0
- *
- * Splitting Workspace
- * U01 is a block ( N, NB+1 )
- * The first element of U01 is in WORK( 1, 1 )
- * U11 is a block ( NB+1, NB+1 )
- * The first element of U11 is in WORK( N+1, 1 )
- *
- U11 = N
- *
- * INVD is a block ( N, 2 )
- * The first element of INVD is in WORK( 1, INVD )
- *
- INVD = NB + 2
-
- IF( UPPER ) THEN
- *
- * Begin Upper
- *
- * invA = P * inv(U**H) * inv(D) * inv(U) * P**T.
- *
- CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
- *
- * inv(D) and inv(D) * inv(U)
- *
- K = 1
- DO WHILE( K.LE.N )
- IF( IPIV( K ).GT.0 ) THEN
- * 1 x 1 diagonal NNB
- WORK( K, INVD ) = ONE / DBLE( A( K, K ) )
- WORK( K, INVD+1 ) = CZERO
- ELSE
- * 2 x 2 diagonal NNB
- T = ABS( WORK( K+1, 1 ) )
- AK = DBLE( A( K, K ) ) / T
- AKP1 = DBLE( A( K+1, K+1 ) ) / T
- AKKP1 = WORK( K+1, 1 ) / T
- D = T*( AK*AKP1-CONE )
- WORK( K, INVD ) = AKP1 / D
- WORK( K+1, INVD+1 ) = AK / D
- WORK( K, INVD+1 ) = -AKKP1 / D
- WORK( K+1, INVD ) = DCONJG( WORK( K, INVD+1 ) )
- K = K + 1
- END IF
- K = K + 1
- END DO
- *
- * inv(U**H) = (inv(U))**H
- *
- * inv(U**H) * inv(D) * inv(U)
- *
- CUT = N
- DO WHILE( CUT.GT.0 )
- NNB = NB
- IF( CUT.LE.NNB ) THEN
- NNB = CUT
- ELSE
- ICOUNT = 0
- * count negative elements,
- DO I = CUT+1-NNB, CUT
- IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
- END DO
- * need a even number for a clear cut
- IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
- END IF
-
- CUT = CUT - NNB
- *
- * U01 Block
- *
- DO I = 1, CUT
- DO J = 1, NNB
- WORK( I, J ) = A( I, CUT+J )
- END DO
- END DO
- *
- * U11 Block
- *
- DO I = 1, NNB
- WORK( U11+I, I ) = CONE
- DO J = 1, I-1
- WORK( U11+I, J ) = CZERO
- END DO
- DO J = I+1, NNB
- WORK( U11+I, J ) = A( CUT+I, CUT+J )
- END DO
- END DO
- *
- * invD * U01
- *
- I = 1
- DO WHILE( I.LE.CUT )
- IF( IPIV( I ).GT.0 ) THEN
- DO J = 1, NNB
- WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
- END DO
- ELSE
- DO J = 1, NNB
- U01_I_J = WORK( I, J )
- U01_IP1_J = WORK( I+1, J )
- WORK( I, J ) = WORK( I, INVD ) * U01_I_J
- $ + WORK( I, INVD+1 ) * U01_IP1_J
- WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
- $ + WORK( I+1, INVD+1 ) * U01_IP1_J
- END DO
- I = I + 1
- END IF
- I = I + 1
- END DO
- *
- * invD1 * U11
- *
- I = 1
- DO WHILE ( I.LE.NNB )
- IF( IPIV( CUT+I ).GT.0 ) THEN
- DO J = I, NNB
- WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
- END DO
- ELSE
- DO J = I, NNB
- U11_I_J = WORK(U11+I,J)
- U11_IP1_J = WORK(U11+I+1,J)
- WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
- $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
- WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
- $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J
- END DO
- I = I + 1
- END IF
- I = I + 1
- END DO
- *
- * U11**H * invD1 * U11 -> U11
- *
- CALL ZTRMM( 'L', 'U', 'C', 'U', NNB, NNB,
- $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
- $ N+NB+1 )
- *
- DO I = 1, NNB
- DO J = I, NNB
- A( CUT+I, CUT+J ) = WORK( U11+I, J )
- END DO
- END DO
- *
- * U01**H * invD * U01 -> A( CUT+I, CUT+J )
- *
- CALL ZGEMM( 'C', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ),
- $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1),
- $ N+NB+1 )
-
- *
- * U11 = U11**H * invD1 * U11 + U01**H * invD * U01
- *
- DO I = 1, NNB
- DO J = I, NNB
- A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
- END DO
- END DO
- *
- * U01 = U00**H * invD0 * U01
- *
- CALL ZTRMM( 'L', UPLO, 'C', 'U', CUT, NNB,
- $ CONE, A, LDA, WORK, N+NB+1 )
-
- *
- * Update U01
- *
- DO I = 1, CUT
- DO J = 1, NNB
- A( I, CUT+J ) = WORK( I, J )
- END DO
- END DO
- *
- * Next Block
- *
- END DO
- *
- * Apply PERMUTATIONS P and P**T:
- * P * inv(U**H) * inv(D) * inv(U) * P**T.
- * Interchange rows and columns I and IPIV(I) in reverse order
- * from the formation order of IPIV vector for Upper case.
- *
- * ( We can use a loop over IPIV with increment 1,
- * since the ABS value of IPIV(I) represents the row (column)
- * index of the interchange with row (column) i in both 1x1
- * and 2x2 pivot cases, i.e. we don't need separate code branches
- * for 1x1 and 2x2 pivot cases )
- *
- DO I = 1, N
- IP = ABS( IPIV( I ) )
- IF( IP.NE.I ) THEN
- IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP )
- IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I )
- END IF
- END DO
- *
- ELSE
- *
- * Begin Lower
- *
- * inv A = P * inv(L**H) * inv(D) * inv(L) * P**T.
- *
- CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
- *
- * inv(D) and inv(D) * inv(L)
- *
- K = N
- DO WHILE ( K .GE. 1 )
- IF( IPIV( K ).GT.0 ) THEN
- * 1 x 1 diagonal NNB
- WORK( K, INVD ) = ONE / DBLE( A( K, K ) )
- WORK( K, INVD+1 ) = CZERO
- ELSE
- * 2 x 2 diagonal NNB
- T = ABS( WORK( K-1, 1 ) )
- AK = DBLE( A( K-1, K-1 ) ) / T
- AKP1 = DBLE( A( K, K ) ) / T
- AKKP1 = WORK( K-1, 1 ) / T
- D = T*( AK*AKP1-CONE )
- WORK( K-1, INVD ) = AKP1 / D
- WORK( K, INVD ) = AK / D
- WORK( K, INVD+1 ) = -AKKP1 / D
- WORK( K-1, INVD+1 ) = DCONJG( WORK( K, INVD+1 ) )
- K = K - 1
- END IF
- K = K - 1
- END DO
- *
- * inv(L**H) = (inv(L))**H
- *
- * inv(L**H) * inv(D) * inv(L)
- *
- CUT = 0
- DO WHILE( CUT.LT.N )
- NNB = NB
- IF( (CUT + NNB).GT.N ) THEN
- NNB = N - CUT
- ELSE
- ICOUNT = 0
- * count negative elements,
- DO I = CUT + 1, CUT+NNB
- IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
- END DO
- * need a even number for a clear cut
- IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
- END IF
- *
- * L21 Block
- *
- DO I = 1, N-CUT-NNB
- DO J = 1, NNB
- WORK( I, J ) = A( CUT+NNB+I, CUT+J )
- END DO
- END DO
- *
- * L11 Block
- *
- DO I = 1, NNB
- WORK( U11+I, I) = CONE
- DO J = I+1, NNB
- WORK( U11+I, J ) = CZERO
- END DO
- DO J = 1, I-1
- WORK( U11+I, J ) = A( CUT+I, CUT+J )
- END DO
- END DO
- *
- * invD*L21
- *
- I = N-CUT-NNB
- DO WHILE( I.GE.1 )
- IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
- DO J = 1, NNB
- WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
- END DO
- ELSE
- DO J = 1, NNB
- U01_I_J = WORK(I,J)
- U01_IP1_J = WORK(I-1,J)
- WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
- $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
- WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
- $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
- END DO
- I = I - 1
- END IF
- I = I - 1
- END DO
- *
- * invD1*L11
- *
- I = NNB
- DO WHILE( I.GE.1 )
- IF( IPIV( CUT+I ).GT.0 ) THEN
- DO J = 1, NNB
- WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
- END DO
-
- ELSE
- DO J = 1, NNB
- U11_I_J = WORK( U11+I, J )
- U11_IP1_J = WORK( U11+I-1, J )
- WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
- $ + WORK(CUT+I,INVD+1) * U11_IP1_J
- WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
- $ + WORK(CUT+I-1,INVD) * U11_IP1_J
- END DO
- I = I - 1
- END IF
- I = I - 1
- END DO
- *
- * L11**H * invD1 * L11 -> L11
- *
- CALL ZTRMM( 'L', UPLO, 'C', 'U', NNB, NNB, CONE,
- $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
- $ N+NB+1 )
-
- *
- DO I = 1, NNB
- DO J = 1, I
- A( CUT+I, CUT+J ) = WORK( U11+I, J )
- END DO
- END DO
- *
- IF( (CUT+NNB).LT.N ) THEN
- *
- * L21**H * invD2*L21 -> A( CUT+I, CUT+J )
- *
- CALL ZGEMM( 'C', 'N', NNB, NNB, N-NNB-CUT, CONE,
- $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
- $ CZERO, WORK( U11+1, 1 ), N+NB+1 )
-
- *
- * L11 = L11**H * invD1 * L11 + U01**H * invD * U01
- *
- DO I = 1, NNB
- DO J = 1, I
- A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
- END DO
- END DO
- *
- * L01 = L22**H * invD2 * L21
- *
- CALL ZTRMM( 'L', UPLO, 'C', 'U', N-NNB-CUT, NNB, CONE,
- $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
- $ N+NB+1 )
- *
- * Update L21
- *
- DO I = 1, N-CUT-NNB
- DO J = 1, NNB
- A( CUT+NNB+I, CUT+J ) = WORK( I, J )
- END DO
- END DO
- *
- ELSE
- *
- * L11 = L11**H * invD1 * L11
- *
- DO I = 1, NNB
- DO J = 1, I
- A( CUT+I, CUT+J ) = WORK( U11+I, J )
- END DO
- END DO
- END IF
- *
- * Next Block
- *
- CUT = CUT + NNB
- *
- END DO
- *
- * Apply PERMUTATIONS P and P**T:
- * P * inv(L**H) * inv(D) * inv(L) * P**T.
- * Interchange rows and columns I and IPIV(I) in reverse order
- * from the formation order of IPIV vector for Lower case.
- *
- * ( We can use a loop over IPIV with increment -1,
- * since the ABS value of IPIV(I) represents the row (column)
- * index of the interchange with row (column) i in both 1x1
- * and 2x2 pivot cases, i.e. we don't need separate code branches
- * for 1x1 and 2x2 pivot cases )
- *
- DO I = N, 1, -1
- IP = ABS( IPIV( I ) )
- IF( IP.NE.I ) THEN
- IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP )
- IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I )
- END IF
- END DO
- *
- END IF
- *
- RETURN
- *
- * End of ZHETRI_3X
- *
- END
|