You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

chetrd_he2hb.f 18 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521
  1. *> \brief \b CHETRD_HE2HB
  2. *
  3. * @generated from zhetrd_he2hb.f, fortran z -> c, Wed Dec 7 08:22:40 2016
  4. *
  5. * =========== DOCUMENTATION ===========
  6. *
  7. * Online html documentation available at
  8. * http://www.netlib.org/lapack/explore-html/
  9. *
  10. *> \htmlonly
  11. *> Download CHETRD_HE2HB + dependencies
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_he2hb.f">
  13. *> [TGZ]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_he2hb.f">
  15. *> [ZIP]</a>
  16. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_he2hb.f">
  17. *> [TXT]</a>
  18. *> \endhtmlonly
  19. *
  20. * Definition:
  21. * ===========
  22. *
  23. * SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
  24. * WORK, LWORK, INFO )
  25. *
  26. * IMPLICIT NONE
  27. *
  28. * .. Scalar Arguments ..
  29. * CHARACTER UPLO
  30. * INTEGER INFO, LDA, LDAB, LWORK, N, KD
  31. * ..
  32. * .. Array Arguments ..
  33. * COMPLEX A( LDA, * ), AB( LDAB, * ),
  34. * TAU( * ), WORK( * )
  35. * ..
  36. *
  37. *
  38. *> \par Purpose:
  39. * =============
  40. *>
  41. *> \verbatim
  42. *>
  43. *> CHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
  44. *> band-diagonal form AB by a unitary similarity transformation:
  45. *> Q**H * A * Q = AB.
  46. *> \endverbatim
  47. *
  48. * Arguments:
  49. * ==========
  50. *
  51. *> \param[in] UPLO
  52. *> \verbatim
  53. *> UPLO is CHARACTER*1
  54. *> = 'U': Upper triangle of A is stored;
  55. *> = 'L': Lower triangle of A is stored.
  56. *> \endverbatim
  57. *>
  58. *> \param[in] N
  59. *> \verbatim
  60. *> N is INTEGER
  61. *> The order of the matrix A. N >= 0.
  62. *> \endverbatim
  63. *>
  64. *> \param[in] KD
  65. *> \verbatim
  66. *> KD is INTEGER
  67. *> The number of superdiagonals of the reduced matrix if UPLO = 'U',
  68. *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
  69. *> The reduced matrix is stored in the array AB.
  70. *> \endverbatim
  71. *>
  72. *> \param[in,out] A
  73. *> \verbatim
  74. *> A is COMPLEX array, dimension (LDA,N)
  75. *> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
  76. *> N-by-N upper triangular part of A contains the upper
  77. *> triangular part of the matrix A, and the strictly lower
  78. *> triangular part of A is not referenced. If UPLO = 'L', the
  79. *> leading N-by-N lower triangular part of A contains the lower
  80. *> triangular part of the matrix A, and the strictly upper
  81. *> triangular part of A is not referenced.
  82. *> On exit, if UPLO = 'U', the diagonal and first superdiagonal
  83. *> of A are overwritten by the corresponding elements of the
  84. *> tridiagonal matrix T, and the elements above the first
  85. *> superdiagonal, with the array TAU, represent the unitary
  86. *> matrix Q as a product of elementary reflectors; if UPLO
  87. *> = 'L', the diagonal and first subdiagonal of A are over-
  88. *> written by the corresponding elements of the tridiagonal
  89. *> matrix T, and the elements below the first subdiagonal, with
  90. *> the array TAU, represent the unitary matrix Q as a product
  91. *> of elementary reflectors. See Further Details.
  92. *> \endverbatim
  93. *>
  94. *> \param[in] LDA
  95. *> \verbatim
  96. *> LDA is INTEGER
  97. *> The leading dimension of the array A. LDA >= max(1,N).
  98. *> \endverbatim
  99. *>
  100. *> \param[out] AB
  101. *> \verbatim
  102. *> AB is COMPLEX array, dimension (LDAB,N)
  103. *> On exit, the upper or lower triangle of the Hermitian band
  104. *> matrix A, stored in the first KD+1 rows of the array. The
  105. *> j-th column of A is stored in the j-th column of the array AB
  106. *> as follows:
  107. *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
  108. *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
  109. *> \endverbatim
  110. *>
  111. *> \param[in] LDAB
  112. *> \verbatim
  113. *> LDAB is INTEGER
  114. *> The leading dimension of the array AB. LDAB >= KD+1.
  115. *> \endverbatim
  116. *>
  117. *> \param[out] TAU
  118. *> \verbatim
  119. *> TAU is COMPLEX array, dimension (N-KD)
  120. *> The scalar factors of the elementary reflectors (see Further
  121. *> Details).
  122. *> \endverbatim
  123. *>
  124. *> \param[out] WORK
  125. *> \verbatim
  126. *> WORK is COMPLEX array, dimension (MAX(1,LWORK))
  127. *> On exit, if INFO = 0, or if LWORK = -1,
  128. *> WORK(1) returns the size of LWORK.
  129. *> \endverbatim
  130. *>
  131. *> \param[in] LWORK
  132. *> \verbatim
  133. *> LWORK is INTEGER
  134. *> The dimension of the array WORK which should be calculated
  135. *> by a workspace query.
  136. *> If N <= KD+1, LWORK >= 1, else LWORK = MAX(1, LWORK_QUERY).
  137. *>
  138. *> If LWORK = -1, then a workspace query is assumed; the routine
  139. *> only calculates the optimal size of the WORK array, returns
  140. *> this value as the first entry of the WORK array, and no error
  141. *> message related to LWORK is issued by XERBLA.
  142. *> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  143. *> where FACTOPTNB is the blocking used by the QR or LQ
  144. *> algorithm, usually FACTOPTNB=128 is a good choice otherwise
  145. *> putting LWORK=-1 will provide the size of WORK.
  146. *> \endverbatim
  147. *>
  148. *> \param[out] INFO
  149. *> \verbatim
  150. *> INFO is INTEGER
  151. *> = 0: successful exit
  152. *> < 0: if INFO = -i, the i-th argument had an illegal value
  153. *> \endverbatim
  154. *
  155. * Authors:
  156. * ========
  157. *
  158. *> \author Univ. of Tennessee
  159. *> \author Univ. of California Berkeley
  160. *> \author Univ. of Colorado Denver
  161. *> \author NAG Ltd.
  162. *
  163. *> \ingroup hetrd_he2hb
  164. *
  165. *> \par Further Details:
  166. * =====================
  167. *>
  168. *> \verbatim
  169. *>
  170. *> Implemented by Azzam Haidar.
  171. *>
  172. *> All details are available on technical report, SC11, SC13 papers.
  173. *>
  174. *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  175. *> Parallel reduction to condensed forms for symmetric eigenvalue problems
  176. *> using aggregated fine-grained and memory-aware kernels. In Proceedings
  177. *> of 2011 International Conference for High Performance Computing,
  178. *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
  179. *> Article 8 , 11 pages.
  180. *> http://doi.acm.org/10.1145/2063384.2063394
  181. *>
  182. *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
  183. *> An improved parallel singular value algorithm and its implementation
  184. *> for multicore hardware, In Proceedings of 2013 International Conference
  185. *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
  186. *> Denver, Colorado, USA, 2013.
  187. *> Article 90, 12 pages.
  188. *> http://doi.acm.org/10.1145/2503210.2503292
  189. *>
  190. *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  191. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  192. *> calculations based on fine-grained memory aware tasks.
  193. *> International Journal of High Performance Computing Applications.
  194. *> Volume 28 Issue 2, Pages 196-209, May 2014.
  195. *> http://hpc.sagepub.com/content/28/2/196
  196. *>
  197. *> \endverbatim
  198. *>
  199. *> \verbatim
  200. *>
  201. *> If UPLO = 'U', the matrix Q is represented as a product of elementary
  202. *> reflectors
  203. *>
  204. *> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
  205. *>
  206. *> Each H(i) has the form
  207. *>
  208. *> H(i) = I - tau * v * v**H
  209. *>
  210. *> where tau is a complex scalar, and v is a complex vector with
  211. *> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
  212. *> A(i,i+kd+1:n), and tau in TAU(i).
  213. *>
  214. *> If UPLO = 'L', the matrix Q is represented as a product of elementary
  215. *> reflectors
  216. *>
  217. *> Q = H(1) H(2) . . . H(k), where k = n-kd.
  218. *>
  219. *> Each H(i) has the form
  220. *>
  221. *> H(i) = I - tau * v * v**H
  222. *>
  223. *> where tau is a complex scalar, and v is a complex vector with
  224. *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
  225. *> A(i+kd+2:n,i), and tau in TAU(i).
  226. *>
  227. *> The contents of A on exit are illustrated by the following examples
  228. *> with n = 5:
  229. *>
  230. *> if UPLO = 'U': if UPLO = 'L':
  231. *>
  232. *> ( ab ab/v1 v1 v1 v1 ) ( ab )
  233. *> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
  234. *> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
  235. *> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
  236. *> ( ab ) ( v1 v2 v3 ab/v4 ab )
  237. *>
  238. *> where d and e denote diagonal and off-diagonal elements of T, and vi
  239. *> denotes an element of the vector defining H(i).
  240. *> \endverbatim
  241. *>
  242. * =====================================================================
  243. SUBROUTINE CHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
  244. $ WORK, LWORK, INFO )
  245. *
  246. IMPLICIT NONE
  247. *
  248. * -- LAPACK computational routine --
  249. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  250. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  251. *
  252. * .. Scalar Arguments ..
  253. CHARACTER UPLO
  254. INTEGER INFO, LDA, LDAB, LWORK, N, KD
  255. * ..
  256. * .. Array Arguments ..
  257. COMPLEX A( LDA, * ), AB( LDAB, * ),
  258. $ TAU( * ), WORK( * )
  259. * ..
  260. *
  261. * =====================================================================
  262. *
  263. * .. Parameters ..
  264. REAL RONE
  265. COMPLEX ZERO, ONE, HALF
  266. PARAMETER ( RONE = 1.0E+0,
  267. $ ZERO = ( 0.0E+0, 0.0E+0 ),
  268. $ ONE = ( 1.0E+0, 0.0E+0 ),
  269. $ HALF = ( 0.5E+0, 0.0E+0 ) )
  270. * ..
  271. * .. Local Scalars ..
  272. LOGICAL LQUERY, UPPER
  273. INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
  274. $ LDT, LDW, LDS2, LDS1,
  275. $ LS2, LS1, LW, LT,
  276. $ TPOS, WPOS, S2POS, S1POS
  277. * ..
  278. * .. External Subroutines ..
  279. EXTERNAL XERBLA, CHER2K, CHEMM, CGEMM, CCOPY,
  280. $ CLARFT, CGELQF, CGEQRF, CLASET
  281. * ..
  282. * .. Intrinsic Functions ..
  283. INTRINSIC MIN, MAX
  284. * ..
  285. * .. External Functions ..
  286. LOGICAL LSAME
  287. INTEGER ILAENV2STAGE
  288. REAL SROUNDUP_LWORK
  289. EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK
  290. * ..
  291. * .. Executable Statements ..
  292. *
  293. * Determine the minimal workspace size required
  294. * and test the input parameters
  295. *
  296. INFO = 0
  297. UPPER = LSAME( UPLO, 'U' )
  298. LQUERY = ( LWORK.EQ.-1 )
  299. IF( N.LE.KD+1 ) THEN
  300. LWMIN = 1
  301. ELSE
  302. LWMIN = ILAENV2STAGE( 4, 'CHETRD_HE2HB', '', N, KD, -1, -1 )
  303. END IF
  304. *
  305. IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  306. INFO = -1
  307. ELSE IF( N.LT.0 ) THEN
  308. INFO = -2
  309. ELSE IF( KD.LT.0 ) THEN
  310. INFO = -3
  311. ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  312. INFO = -5
  313. ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
  314. INFO = -7
  315. ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  316. INFO = -10
  317. END IF
  318. *
  319. IF( INFO.NE.0 ) THEN
  320. CALL XERBLA( 'CHETRD_HE2HB', -INFO )
  321. RETURN
  322. ELSE IF( LQUERY ) THEN
  323. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  324. RETURN
  325. END IF
  326. *
  327. * Quick return if possible
  328. * Copy the upper/lower portion of A into AB
  329. *
  330. IF( N.LE.KD+1 ) THEN
  331. IF( UPPER ) THEN
  332. DO 100 I = 1, N
  333. LK = MIN( KD+1, I )
  334. CALL CCOPY( LK, A( I-LK+1, I ), 1,
  335. $ AB( KD+1-LK+1, I ), 1 )
  336. 100 CONTINUE
  337. ELSE
  338. DO 110 I = 1, N
  339. LK = MIN( KD+1, N-I+1 )
  340. CALL CCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
  341. 110 CONTINUE
  342. ENDIF
  343. WORK( 1 ) = 1
  344. RETURN
  345. END IF
  346. *
  347. * Determine the pointer position for the workspace
  348. *
  349. LDT = KD
  350. LDS1 = KD
  351. LT = LDT*KD
  352. LW = N*KD
  353. LS1 = LDS1*KD
  354. LS2 = LWMIN - LT - LW - LS1
  355. * LS2 = N*MAX(KD,FACTOPTNB)
  356. TPOS = 1
  357. WPOS = TPOS + LT
  358. S1POS = WPOS + LW
  359. S2POS = S1POS + LS1
  360. IF( UPPER ) THEN
  361. LDW = KD
  362. LDS2 = KD
  363. ELSE
  364. LDW = N
  365. LDS2 = N
  366. ENDIF
  367. *
  368. *
  369. * Set the workspace of the triangular matrix T to zero once such a
  370. * way every time T is generated the upper/lower portion will be always zero
  371. *
  372. CALL CLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
  373. *
  374. IF( UPPER ) THEN
  375. DO 10 I = 1, N - KD, KD
  376. PN = N-I-KD+1
  377. PK = MIN( N-I-KD+1, KD )
  378. *
  379. * Compute the LQ factorization of the current block
  380. *
  381. CALL CGELQF( KD, PN, A( I, I+KD ), LDA,
  382. $ TAU( I ), WORK( S2POS ), LS2, IINFO )
  383. *
  384. * Copy the upper portion of A into AB
  385. *
  386. DO 20 J = I, I+PK-1
  387. LK = MIN( KD, N-J ) + 1
  388. CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  389. 20 CONTINUE
  390. *
  391. CALL CLASET( 'Lower', PK, PK, ZERO, ONE,
  392. $ A( I, I+KD ), LDA )
  393. *
  394. * Form the matrix T
  395. *
  396. CALL CLARFT( 'Forward', 'Rowwise', PN, PK,
  397. $ A( I, I+KD ), LDA, TAU( I ),
  398. $ WORK( TPOS ), LDT )
  399. *
  400. * Compute W:
  401. *
  402. CALL CGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
  403. $ ONE, WORK( TPOS ), LDT,
  404. $ A( I, I+KD ), LDA,
  405. $ ZERO, WORK( S2POS ), LDS2 )
  406. *
  407. CALL CHEMM( 'Right', UPLO, PK, PN,
  408. $ ONE, A( I+KD, I+KD ), LDA,
  409. $ WORK( S2POS ), LDS2,
  410. $ ZERO, WORK( WPOS ), LDW )
  411. *
  412. CALL CGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
  413. $ ONE, WORK( WPOS ), LDW,
  414. $ WORK( S2POS ), LDS2,
  415. $ ZERO, WORK( S1POS ), LDS1 )
  416. *
  417. CALL CGEMM( 'No transpose', 'No transpose', PK, PN, PK,
  418. $ -HALF, WORK( S1POS ), LDS1,
  419. $ A( I, I+KD ), LDA,
  420. $ ONE, WORK( WPOS ), LDW )
  421. *
  422. *
  423. * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  424. * an update of the form: A := A - V'*W - W'*V
  425. *
  426. CALL CHER2K( UPLO, 'Conjugate', PN, PK,
  427. $ -ONE, A( I, I+KD ), LDA,
  428. $ WORK( WPOS ), LDW,
  429. $ RONE, A( I+KD, I+KD ), LDA )
  430. 10 CONTINUE
  431. *
  432. * Copy the upper band to AB which is the band storage matrix
  433. *
  434. DO 30 J = N-KD+1, N
  435. LK = MIN(KD, N-J) + 1
  436. CALL CCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  437. 30 CONTINUE
  438. *
  439. ELSE
  440. *
  441. * Reduce the lower triangle of A to lower band matrix
  442. *
  443. DO 40 I = 1, N - KD, KD
  444. PN = N-I-KD+1
  445. PK = MIN( N-I-KD+1, KD )
  446. *
  447. * Compute the QR factorization of the current block
  448. *
  449. CALL CGEQRF( PN, KD, A( I+KD, I ), LDA,
  450. $ TAU( I ), WORK( S2POS ), LS2, IINFO )
  451. *
  452. * Copy the upper portion of A into AB
  453. *
  454. DO 50 J = I, I+PK-1
  455. LK = MIN( KD, N-J ) + 1
  456. CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  457. 50 CONTINUE
  458. *
  459. CALL CLASET( 'Upper', PK, PK, ZERO, ONE,
  460. $ A( I+KD, I ), LDA )
  461. *
  462. * Form the matrix T
  463. *
  464. CALL CLARFT( 'Forward', 'Columnwise', PN, PK,
  465. $ A( I+KD, I ), LDA, TAU( I ),
  466. $ WORK( TPOS ), LDT )
  467. *
  468. * Compute W:
  469. *
  470. CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  471. $ ONE, A( I+KD, I ), LDA,
  472. $ WORK( TPOS ), LDT,
  473. $ ZERO, WORK( S2POS ), LDS2 )
  474. *
  475. CALL CHEMM( 'Left', UPLO, PN, PK,
  476. $ ONE, A( I+KD, I+KD ), LDA,
  477. $ WORK( S2POS ), LDS2,
  478. $ ZERO, WORK( WPOS ), LDW )
  479. *
  480. CALL CGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
  481. $ ONE, WORK( S2POS ), LDS2,
  482. $ WORK( WPOS ), LDW,
  483. $ ZERO, WORK( S1POS ), LDS1 )
  484. *
  485. CALL CGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  486. $ -HALF, A( I+KD, I ), LDA,
  487. $ WORK( S1POS ), LDS1,
  488. $ ONE, WORK( WPOS ), LDW )
  489. *
  490. *
  491. * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  492. * an update of the form: A := A - V*W' - W*V'
  493. *
  494. CALL CHER2K( UPLO, 'No transpose', PN, PK,
  495. $ -ONE, A( I+KD, I ), LDA,
  496. $ WORK( WPOS ), LDW,
  497. $ RONE, A( I+KD, I+KD ), LDA )
  498. * ==================================================================
  499. * RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
  500. * DO 45 J = I, I+PK-1
  501. * LK = MIN( KD, N-J ) + 1
  502. * CALL CCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
  503. * 45 CONTINUE
  504. * ==================================================================
  505. 40 CONTINUE
  506. *
  507. * Copy the lower band to AB which is the band storage matrix
  508. *
  509. DO 60 J = N-KD+1, N
  510. LK = MIN(KD, N-J) + 1
  511. CALL CCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  512. 60 CONTINUE
  513. END IF
  514. *
  515. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  516. RETURN
  517. *
  518. * End of CHETRD_HE2HB
  519. *
  520. END