You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

chetrf_aa.f 15 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483
  1. *> \brief \b CHETRF_AA
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download CHETRF_AA + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrf_aa.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrf_aa.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrf_aa.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
  22. *
  23. * .. Scalar Arguments ..
  24. * CHARACTER UPLO
  25. * INTEGER N, LDA, LWORK, INFO
  26. * ..
  27. * .. Array Arguments ..
  28. * INTEGER IPIV( * )
  29. * COMPLEX A( LDA, * ), WORK( * )
  30. * ..
  31. *
  32. *> \par Purpose:
  33. * =============
  34. *>
  35. *> \verbatim
  36. *>
  37. *> CHETRF_AA computes the factorization of a complex hermitian matrix A
  38. *> using the Aasen's algorithm. The form of the factorization is
  39. *>
  40. *> A = U*T*U**H or A = L*T*L**H
  41. *>
  42. *> where U (or L) is a product of permutation and unit upper (lower)
  43. *> triangular matrices, and T is a hermitian tridiagonal matrix.
  44. *>
  45. *> This is the blocked version of the algorithm, calling Level 3 BLAS.
  46. *> \endverbatim
  47. *
  48. * Arguments:
  49. * ==========
  50. *
  51. *> \param[in] UPLO
  52. *> \verbatim
  53. *> UPLO is CHARACTER*1
  54. *> = 'U': Upper triangle of A is stored;
  55. *> = 'L': Lower triangle of A is stored.
  56. *> \endverbatim
  57. *>
  58. *> \param[in] N
  59. *> \verbatim
  60. *> N is INTEGER
  61. *> The order of the matrix A. N >= 0.
  62. *> \endverbatim
  63. *>
  64. *> \param[in,out] A
  65. *> \verbatim
  66. *> A is COMPLEX array, dimension (LDA,N)
  67. *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
  68. *> N-by-N upper triangular part of A contains the upper
  69. *> triangular part of the matrix A, and the strictly lower
  70. *> triangular part of A is not referenced. If UPLO = 'L', the
  71. *> leading N-by-N lower triangular part of A contains the lower
  72. *> triangular part of the matrix A, and the strictly upper
  73. *> triangular part of A is not referenced.
  74. *>
  75. *> On exit, the tridiagonal matrix is stored in the diagonals
  76. *> and the subdiagonals of A just below (or above) the diagonals,
  77. *> and L is stored below (or above) the subdiaonals, when UPLO
  78. *> is 'L' (or 'U').
  79. *> \endverbatim
  80. *>
  81. *> \param[in] LDA
  82. *> \verbatim
  83. *> LDA is INTEGER
  84. *> The leading dimension of the array A. LDA >= max(1,N).
  85. *> \endverbatim
  86. *>
  87. *> \param[out] IPIV
  88. *> \verbatim
  89. *> IPIV is INTEGER array, dimension (N)
  90. *> On exit, it contains the details of the interchanges, i.e.,
  91. *> the row and column k of A were interchanged with the
  92. *> row and column IPIV(k).
  93. *> \endverbatim
  94. *>
  95. *> \param[out] WORK
  96. *> \verbatim
  97. *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
  98. *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  99. *> \endverbatim
  100. *>
  101. *> \param[in] LWORK
  102. *> \verbatim
  103. *> LWORK is INTEGER
  104. *> The length of WORK. LWORK >= 2*N. For optimum performance
  105. *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
  106. *>
  107. *> If LWORK = -1, then a workspace query is assumed; the routine
  108. *> only calculates the optimal size of the WORK array, returns
  109. *> this value as the first entry of the WORK array, and no error
  110. *> message related to LWORK is issued by XERBLA.
  111. *> \endverbatim
  112. *>
  113. *> \param[out] INFO
  114. *> \verbatim
  115. *> INFO is INTEGER
  116. *> = 0: successful exit
  117. *> < 0: if INFO = -i, the i-th argument had an illegal value
  118. *> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
  119. *> has been completed, but the block diagonal matrix D is
  120. *> exactly singular, and division by zero will occur if it
  121. *> is used to solve a system of equations.
  122. *> \endverbatim
  123. *
  124. * Authors:
  125. * ========
  126. *
  127. *> \author Univ. of Tennessee
  128. *> \author Univ. of California Berkeley
  129. *> \author Univ. of Colorado Denver
  130. *> \author NAG Ltd.
  131. *
  132. *> \date December 2016
  133. *
  134. *> \ingroup complexHEcomputational
  135. *
  136. * =====================================================================
  137. SUBROUTINE CHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
  138. *
  139. * -- LAPACK computational routine (version 3.7.0) --
  140. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  141. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  142. * December 2016
  143. *
  144. IMPLICIT NONE
  145. *
  146. * .. Scalar Arguments ..
  147. CHARACTER UPLO
  148. INTEGER N, LDA, LWORK, INFO
  149. * ..
  150. * .. Array Arguments ..
  151. INTEGER IPIV( * )
  152. COMPLEX A( LDA, * ), WORK( * )
  153. * ..
  154. *
  155. * =====================================================================
  156. * .. Parameters ..
  157. COMPLEX ZERO, ONE
  158. PARAMETER ( ZERO = (0.0E+0, 0.0E+0), ONE = (1.0E+0, 0.0E+0) )
  159. *
  160. * .. Local Scalars ..
  161. LOGICAL LQUERY, UPPER
  162. INTEGER J, LWKOPT, IINFO
  163. INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
  164. COMPLEX ALPHA
  165. * ..
  166. * .. External Functions ..
  167. LOGICAL LSAME
  168. INTEGER ILAENV
  169. EXTERNAL LSAME, ILAENV
  170. * ..
  171. * .. External Subroutines ..
  172. EXTERNAL XERBLA
  173. * ..
  174. * .. Intrinsic Functions ..
  175. INTRINSIC REAL, CONJG, MAX
  176. * ..
  177. * .. Executable Statements ..
  178. *
  179. * Determine the block size
  180. *
  181. NB = ILAENV( 1, 'CHETRF', UPLO, N, -1, -1, -1 )
  182. *
  183. * Test the input parameters.
  184. *
  185. INFO = 0
  186. UPPER = LSAME( UPLO, 'U' )
  187. LQUERY = ( LWORK.EQ.-1 )
  188. IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  189. INFO = -1
  190. ELSE IF( N.LT.0 ) THEN
  191. INFO = -2
  192. ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  193. INFO = -4
  194. ELSE IF( LWORK.LT.( 2*N ) .AND. .NOT.LQUERY ) THEN
  195. INFO = -7
  196. END IF
  197. *
  198. IF( INFO.EQ.0 ) THEN
  199. LWKOPT = (NB+1)*N
  200. WORK( 1 ) = LWKOPT
  201. END IF
  202. *
  203. IF( INFO.NE.0 ) THEN
  204. CALL XERBLA( 'CHETRF_AA', -INFO )
  205. RETURN
  206. ELSE IF( LQUERY ) THEN
  207. RETURN
  208. END IF
  209. *
  210. * Quick return
  211. *
  212. IF ( N.EQ.0 ) THEN
  213. RETURN
  214. ENDIF
  215. IPIV( 1 ) = 1
  216. IF ( N.EQ.1 ) THEN
  217. A( 1, 1 ) = REAL( A( 1, 1 ) )
  218. IF ( A( 1, 1 ).EQ.ZERO ) THEN
  219. INFO = 1
  220. END IF
  221. RETURN
  222. END IF
  223. *
  224. * Adjubst block size based on the workspace size
  225. *
  226. IF( LWORK.LT.((1+NB)*N) ) THEN
  227. NB = ( LWORK-N ) / N
  228. END IF
  229. *
  230. IF( UPPER ) THEN
  231. *
  232. * .....................................................
  233. * Factorize A as L*D*L**H using the upper triangle of A
  234. * .....................................................
  235. *
  236. * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
  237. *
  238. CALL CCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
  239. *
  240. * J is the main loop index, increasing from 1 to N in steps of
  241. * JB, where JB is the number of columns factorized by CLAHEF;
  242. * JB is either NB, or N-J+1 for the last block
  243. *
  244. J = 0
  245. 10 CONTINUE
  246. IF( J.GE.N )
  247. $ GO TO 20
  248. *
  249. * each step of the main loop
  250. * J is the last column of the previous panel
  251. * J1 is the first column of the current panel
  252. * K1 identifies if the previous column of the panel has been
  253. * explicitly stored, e.g., K1=1 for the first panel, and
  254. * K1=0 for the rest
  255. *
  256. J1 = J + 1
  257. JB = MIN( N-J1+1, NB )
  258. K1 = MAX(1, J)-J
  259. *
  260. * Panel factorization
  261. *
  262. CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB,
  263. $ A( MAX(1, J), J+1 ), LDA,
  264. $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ),
  265. $ IINFO )
  266. IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
  267. INFO = IINFO+J
  268. ENDIF
  269. *
  270. * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
  271. *
  272. DO J2 = J+2, MIN(N, J+JB+1)
  273. IPIV( J2 ) = IPIV( J2 ) + J
  274. IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
  275. CALL CSWAP( J1-K1-2, A( 1, J2 ), 1,
  276. $ A( 1, IPIV(J2) ), 1 )
  277. END IF
  278. END DO
  279. J = J + JB
  280. *
  281. * Trailing submatrix update, where
  282. * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
  283. * WORK stores the current block of the auxiriarly matrix H
  284. *
  285. IF( J.LT.N ) THEN
  286. *
  287. * if the first panel and JB=1 (NB=1), then nothing to do
  288. *
  289. IF( J1.GT.1 .OR. JB.GT.1 ) THEN
  290. *
  291. * Merge rank-1 update with BLAS-3 update
  292. *
  293. ALPHA = CONJG( A( J, J+1 ) )
  294. A( J, J+1 ) = ONE
  295. CALL CCOPY( N-J, A( J-1, J+1 ), LDA,
  296. $ WORK( (J+1-J1+1)+JB*N ), 1 )
  297. CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
  298. *
  299. * K1 identifies if the previous column of the panel has been
  300. * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
  301. * and K1=1 and K2=0 for the rest
  302. *
  303. IF( J1.GT.1 ) THEN
  304. *
  305. * Not first panel
  306. *
  307. K2 = 1
  308. ELSE
  309. *
  310. * First panel
  311. *
  312. K2 = 0
  313. *
  314. * First update skips the first column
  315. *
  316. JB = JB - 1
  317. END IF
  318. *
  319. DO J2 = J+1, N, NB
  320. NJ = MIN( NB, N-J2+1 )
  321. *
  322. * Update (J2, J2) diagonal block with CGEMV
  323. *
  324. J3 = J2
  325. DO MJ = NJ-1, 1, -1
  326. CALL CGEMM( 'Conjugate transpose', 'Transpose',
  327. $ 1, MJ, JB+1,
  328. $ -ONE, A( J1-K2, J3 ), LDA,
  329. $ WORK( (J3-J1+1)+K1*N ), N,
  330. $ ONE, A( J3, J3 ), LDA )
  331. J3 = J3 + 1
  332. END DO
  333. *
  334. * Update off-diagonal block of J2-th block row with CGEMM
  335. *
  336. CALL CGEMM( 'Conjugate transpose', 'Transpose',
  337. $ NJ, N-J3+1, JB+1,
  338. $ -ONE, A( J1-K2, J2 ), LDA,
  339. $ WORK( (J3-J1+1)+K1*N ), N,
  340. $ ONE, A( J2, J3 ), LDA )
  341. END DO
  342. *
  343. * Recover T( J, J+1 )
  344. *
  345. A( J, J+1 ) = CONJG( ALPHA )
  346. END IF
  347. *
  348. * WORK(J+1, 1) stores H(J+1, 1)
  349. *
  350. CALL CCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
  351. END IF
  352. GO TO 10
  353. ELSE
  354. *
  355. * .....................................................
  356. * Factorize A as L*D*L**H using the lower triangle of A
  357. * .....................................................
  358. *
  359. * copy first column A(1:N, 1) into H(1:N, 1)
  360. * (stored in WORK(1:N))
  361. *
  362. CALL CCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
  363. *
  364. * J is the main loop index, increasing from 1 to N in steps of
  365. * JB, where JB is the number of columns factorized by CLAHEF;
  366. * JB is either NB, or N-J+1 for the last block
  367. *
  368. J = 0
  369. 11 CONTINUE
  370. IF( J.GE.N )
  371. $ GO TO 20
  372. *
  373. * each step of the main loop
  374. * J is the last column of the previous panel
  375. * J1 is the first column of the current panel
  376. * K1 identifies if the previous column of the panel has been
  377. * explicitly stored, e.g., K1=1 for the first panel, and
  378. * K1=0 for the rest
  379. *
  380. J1 = J+1
  381. JB = MIN( N-J1+1, NB )
  382. K1 = MAX(1, J)-J
  383. *
  384. * Panel factorization
  385. *
  386. CALL CLAHEF_AA( UPLO, 2-K1, N-J, JB,
  387. $ A( J+1, MAX(1, J) ), LDA,
  388. $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO)
  389. IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
  390. INFO = IINFO+J
  391. ENDIF
  392. *
  393. * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
  394. *
  395. DO J2 = J+2, MIN(N, J+JB+1)
  396. IPIV( J2 ) = IPIV( J2 ) + J
  397. IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
  398. CALL CSWAP( J1-K1-2, A( J2, 1 ), LDA,
  399. $ A( IPIV(J2), 1 ), LDA )
  400. END IF
  401. END DO
  402. J = J + JB
  403. *
  404. * Trailing submatrix update, where
  405. * A(J2+1, J1-1) stores L(J2+1, J1) and
  406. * WORK(J2+1, 1) stores H(J2+1, 1)
  407. *
  408. IF( J.LT.N ) THEN
  409. *
  410. * if the first panel and JB=1 (NB=1), then nothing to do
  411. *
  412. IF( J1.GT.1 .OR. JB.GT.1 ) THEN
  413. *
  414. * Merge rank-1 update with BLAS-3 update
  415. *
  416. ALPHA = CONJG( A( J+1, J ) )
  417. A( J+1, J ) = ONE
  418. CALL CCOPY( N-J, A( J+1, J-1 ), 1,
  419. $ WORK( (J+1-J1+1)+JB*N ), 1 )
  420. CALL CSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
  421. *
  422. * K1 identifies if the previous column of the panel has been
  423. * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
  424. * and K1=1 and K2=0 for the rest
  425. *
  426. IF( J1.GT.1 ) THEN
  427. *
  428. * Not first panel
  429. *
  430. K2 = 1
  431. ELSE
  432. *
  433. * First panel
  434. *
  435. K2 = 0
  436. *
  437. * First update skips the first column
  438. *
  439. JB = JB - 1
  440. END IF
  441. *
  442. DO J2 = J+1, N, NB
  443. NJ = MIN( NB, N-J2+1 )
  444. *
  445. * Update (J2, J2) diagonal block with CGEMV
  446. *
  447. J3 = J2
  448. DO MJ = NJ-1, 1, -1
  449. CALL CGEMM( 'No transpose', 'Conjugate transpose',
  450. $ MJ, 1, JB+1,
  451. $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
  452. $ A( J3, J1-K2 ), LDA,
  453. $ ONE, A( J3, J3 ), LDA )
  454. J3 = J3 + 1
  455. END DO
  456. *
  457. * Update off-diagonal block of J2-th block column with CGEMM
  458. *
  459. CALL CGEMM( 'No transpose', 'Conjugate transpose',
  460. $ N-J3+1, NJ, JB+1,
  461. $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
  462. $ A( J2, J1-K2 ), LDA,
  463. $ ONE, A( J3, J2 ), LDA )
  464. END DO
  465. *
  466. * Recover T( J+1, J )
  467. *
  468. A( J+1, J ) = CONJG( ALPHA )
  469. END IF
  470. *
  471. * WORK(J+1, 1) stores H(J+1, 1)
  472. *
  473. CALL CCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
  474. END IF
  475. GO TO 11
  476. END IF
  477. *
  478. 20 CONTINUE
  479. RETURN
  480. *
  481. * End of CHETRF_AA
  482. *
  483. END