You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

chetrd_hb2st.F 20 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593
  1. *> \brief \b CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download CHETRD_HB2ST + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/chetrd_hb2st.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/chetrd_hb2st.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/chetrd_hb2st.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
  22. * D, E, HOUS, LHOUS, WORK, LWORK, INFO )
  23. *
  24. * #if defined(_OPENMP)
  25. * use omp_lib
  26. * #endif
  27. *
  28. * IMPLICIT NONE
  29. *
  30. * .. Scalar Arguments ..
  31. * CHARACTER STAGE1, UPLO, VECT
  32. * INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
  33. * ..
  34. * .. Array Arguments ..
  35. * REAL D( * ), E( * )
  36. * COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * )
  37. * ..
  38. *
  39. *
  40. *> \par Purpose:
  41. * =============
  42. *>
  43. *> \verbatim
  44. *>
  45. *> CHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
  46. *> tridiagonal form T by a unitary similarity transformation:
  47. *> Q**H * A * Q = T.
  48. *> \endverbatim
  49. *
  50. * Arguments:
  51. * ==========
  52. *
  53. *> \param[in] STAGE1
  54. *> \verbatim
  55. *> STAGE1 is CHARACTER*1
  56. *> = 'N': "No": to mention that the stage 1 of the reduction
  57. *> from dense to band using the chetrd_he2hb routine
  58. *> was not called before this routine to reproduce AB.
  59. *> In other term this routine is called as standalone.
  60. *> = 'Y': "Yes": to mention that the stage 1 of the
  61. *> reduction from dense to band using the chetrd_he2hb
  62. *> routine has been called to produce AB (e.g., AB is
  63. *> the output of chetrd_he2hb.
  64. *> \endverbatim
  65. *>
  66. *> \param[in] VECT
  67. *> \verbatim
  68. *> VECT is CHARACTER*1
  69. *> = 'N': No need for the Housholder representation,
  70. *> and thus LHOUS is of size max(1, 4*N);
  71. *> = 'V': the Householder representation is needed to
  72. *> either generate or to apply Q later on,
  73. *> then LHOUS is to be queried and computed.
  74. *> (NOT AVAILABLE IN THIS RELEASE).
  75. *> \endverbatim
  76. *>
  77. *> \param[in] UPLO
  78. *> \verbatim
  79. *> UPLO is CHARACTER*1
  80. *> = 'U': Upper triangle of A is stored;
  81. *> = 'L': Lower triangle of A is stored.
  82. *> \endverbatim
  83. *>
  84. *> \param[in] N
  85. *> \verbatim
  86. *> N is INTEGER
  87. *> The order of the matrix A. N >= 0.
  88. *> \endverbatim
  89. *>
  90. *> \param[in] KD
  91. *> \verbatim
  92. *> KD is INTEGER
  93. *> The number of superdiagonals of the matrix A if UPLO = 'U',
  94. *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
  95. *> \endverbatim
  96. *>
  97. *> \param[in,out] AB
  98. *> \verbatim
  99. *> AB is COMPLEX array, dimension (LDAB,N)
  100. *> On entry, the upper or lower triangle of the Hermitian band
  101. *> matrix A, stored in the first KD+1 rows of the array. The
  102. *> j-th column of A is stored in the j-th column of the array AB
  103. *> as follows:
  104. *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
  105. *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
  106. *> On exit, the diagonal elements of AB are overwritten by the
  107. *> diagonal elements of the tridiagonal matrix T; if KD > 0, the
  108. *> elements on the first superdiagonal (if UPLO = 'U') or the
  109. *> first subdiagonal (if UPLO = 'L') are overwritten by the
  110. *> off-diagonal elements of T; the rest of AB is overwritten by
  111. *> values generated during the reduction.
  112. *> \endverbatim
  113. *>
  114. *> \param[in] LDAB
  115. *> \verbatim
  116. *> LDAB is INTEGER
  117. *> The leading dimension of the array AB. LDAB >= KD+1.
  118. *> \endverbatim
  119. *>
  120. *> \param[out] D
  121. *> \verbatim
  122. *> D is REAL array, dimension (N)
  123. *> The diagonal elements of the tridiagonal matrix T.
  124. *> \endverbatim
  125. *>
  126. *> \param[out] E
  127. *> \verbatim
  128. *> E is REAL array, dimension (N-1)
  129. *> The off-diagonal elements of the tridiagonal matrix T:
  130. *> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
  131. *> \endverbatim
  132. *>
  133. *> \param[out] HOUS
  134. *> \verbatim
  135. *> HOUS is COMPLEX array, dimension (MAX(1,LHOUS))
  136. *> Stores the Householder representation.
  137. *> \endverbatim
  138. *>
  139. *> \param[in] LHOUS
  140. *> \verbatim
  141. *> LHOUS is INTEGER
  142. *> The dimension of the array HOUS.
  143. *> If N = 0 or KD <= 1, LHOUS >= 1, else LHOUS = MAX(1, dimension).
  144. *>
  145. *> If LWORK = -1, or LHOUS = -1,
  146. *> then a query is assumed; the routine
  147. *> only calculates the optimal size of the HOUS array, returns
  148. *> this value as the first entry of the HOUS array, and no error
  149. *> message related to LHOUS is issued by XERBLA.
  150. *> LHOUS = MAX(1, dimension) where
  151. *> dimension = 4*N if VECT='N'
  152. *> not available now if VECT='H'
  153. *> \endverbatim
  154. *>
  155. *> \param[out] WORK
  156. *> \verbatim
  157. *> WORK is COMPLEX array, dimension (MAX(1,LWORK)).
  158. *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  159. *> \endverbatim
  160. *>
  161. *> \param[in] LWORK
  162. *> \verbatim
  163. *> LWORK is INTEGER
  164. *> The dimension of the array WORK.
  165. *> If N = 0 or KD <= 1, LWORK >= 1, else LWORK = MAX(1, dimension).
  166. *>
  167. *> If LWORK = -1, or LHOUS = -1,
  168. *> then a workspace query is assumed; the routine
  169. *> only calculates the optimal size of the WORK array, returns
  170. *> this value as the first entry of the WORK array, and no error
  171. *> message related to LWORK is issued by XERBLA.
  172. *> LWORK = MAX(1, dimension) where
  173. *> dimension = (2KD+1)*N + KD*NTHREADS
  174. *> where KD is the blocking size of the reduction,
  175. *> FACTOPTNB is the blocking used by the QR or LQ
  176. *> algorithm, usually FACTOPTNB=128 is a good choice
  177. *> NTHREADS is the number of threads used when
  178. *> openMP compilation is enabled, otherwise =1.
  179. *> \endverbatim
  180. *>
  181. *> \param[out] INFO
  182. *> \verbatim
  183. *> INFO is INTEGER
  184. *> = 0: successful exit
  185. *> < 0: if INFO = -i, the i-th argument had an illegal value
  186. *> \endverbatim
  187. *
  188. * Authors:
  189. * ========
  190. *
  191. *> \author Univ. of Tennessee
  192. *> \author Univ. of California Berkeley
  193. *> \author Univ. of Colorado Denver
  194. *> \author NAG Ltd.
  195. *
  196. *> \ingroup hetrd_hb2st
  197. *
  198. *> \par Further Details:
  199. * =====================
  200. *>
  201. *> \verbatim
  202. *>
  203. *> Implemented by Azzam Haidar.
  204. *>
  205. *> All details are available on technical report, SC11, SC13 papers.
  206. *>
  207. *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  208. *> Parallel reduction to condensed forms for symmetric eigenvalue problems
  209. *> using aggregated fine-grained and memory-aware kernels. In Proceedings
  210. *> of 2011 International Conference for High Performance Computing,
  211. *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
  212. *> Article 8 , 11 pages.
  213. *> http://doi.acm.org/10.1145/2063384.2063394
  214. *>
  215. *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
  216. *> An improved parallel singular value algorithm and its implementation
  217. *> for multicore hardware, In Proceedings of 2013 International Conference
  218. *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
  219. *> Denver, Colorado, USA, 2013.
  220. *> Article 90, 12 pages.
  221. *> http://doi.acm.org/10.1145/2503210.2503292
  222. *>
  223. *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  224. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  225. *> calculations based on fine-grained memory aware tasks.
  226. *> International Journal of High Performance Computing Applications.
  227. *> Volume 28 Issue 2, Pages 196-209, May 2014.
  228. *> http://hpc.sagepub.com/content/28/2/196
  229. *>
  230. *> \endverbatim
  231. *>
  232. * =====================================================================
  233. SUBROUTINE CHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
  234. $ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
  235. *
  236. *
  237. #if defined(_OPENMP)
  238. use omp_lib
  239. #endif
  240. *
  241. IMPLICIT NONE
  242. *
  243. * -- LAPACK computational routine --
  244. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  245. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  246. *
  247. * .. Scalar Arguments ..
  248. CHARACTER STAGE1, UPLO, VECT
  249. INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
  250. * ..
  251. * .. Array Arguments ..
  252. REAL D( * ), E( * )
  253. COMPLEX AB( LDAB, * ), HOUS( * ), WORK( * )
  254. * ..
  255. *
  256. * =====================================================================
  257. *
  258. * .. Parameters ..
  259. REAL RZERO
  260. COMPLEX ZERO, ONE
  261. PARAMETER ( RZERO = 0.0E+0,
  262. $ ZERO = ( 0.0E+0, 0.0E+0 ),
  263. $ ONE = ( 1.0E+0, 0.0E+0 ) )
  264. * ..
  265. * .. Local Scalars ..
  266. LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
  267. INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
  268. $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
  269. $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
  270. $ NBTILES, TTYPE, TID, NTHREADS,
  271. $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
  272. $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
  273. $ SICEV, SIZETAU, LDV, LHMIN, LWMIN
  274. REAL ABSTMP
  275. COMPLEX TMP
  276. * ..
  277. * .. External Subroutines ..
  278. EXTERNAL CHB2ST_KERNELS, CLACPY, CLASET, XERBLA
  279. * ..
  280. * .. Intrinsic Functions ..
  281. INTRINSIC MIN, MAX, CEILING, REAL
  282. * ..
  283. * .. External Functions ..
  284. LOGICAL LSAME
  285. INTEGER ILAENV2STAGE
  286. REAL SROUNDUP_LWORK
  287. EXTERNAL LSAME, ILAENV2STAGE, SROUNDUP_LWORK
  288. * ..
  289. * .. Executable Statements ..
  290. *
  291. * Determine the minimal workspace size required.
  292. * Test the input parameters
  293. *
  294. INFO = 0
  295. AFTERS1 = LSAME( STAGE1, 'Y' )
  296. WANTQ = LSAME( VECT, 'V' )
  297. UPPER = LSAME( UPLO, 'U' )
  298. LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
  299. *
  300. * Determine the block size, the workspace size and the hous size.
  301. *
  302. IB = ILAENV2STAGE( 2, 'CHETRD_HB2ST', VECT, N, KD, -1, -1 )
  303. IF( N.EQ.0 .OR. KD.LE.1 ) THEN
  304. LHMIN = 1
  305. LWMIN = 1
  306. ELSE
  307. LHMIN = ILAENV2STAGE( 3, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 )
  308. LWMIN = ILAENV2STAGE( 4, 'CHETRD_HB2ST', VECT, N, KD, IB, -1 )
  309. END IF
  310. *
  311. IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
  312. INFO = -1
  313. ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
  314. INFO = -2
  315. ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  316. INFO = -3
  317. ELSE IF( N.LT.0 ) THEN
  318. INFO = -4
  319. ELSE IF( KD.LT.0 ) THEN
  320. INFO = -5
  321. ELSE IF( LDAB.LT.(KD+1) ) THEN
  322. INFO = -7
  323. ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
  324. INFO = -11
  325. ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  326. INFO = -13
  327. END IF
  328. *
  329. IF( INFO.EQ.0 ) THEN
  330. HOUS( 1 ) = SROUNDUP_LWORK( LHMIN )
  331. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  332. END IF
  333. *
  334. IF( INFO.NE.0 ) THEN
  335. CALL XERBLA( 'CHETRD_HB2ST', -INFO )
  336. RETURN
  337. ELSE IF( LQUERY ) THEN
  338. RETURN
  339. END IF
  340. *
  341. * Quick return if possible
  342. *
  343. IF( N.EQ.0 ) THEN
  344. HOUS( 1 ) = 1
  345. WORK( 1 ) = 1
  346. RETURN
  347. END IF
  348. *
  349. * Determine pointer position
  350. *
  351. LDV = KD + IB
  352. SIZETAU = 2 * N
  353. SICEV = 2 * N
  354. INDTAU = 1
  355. INDV = INDTAU + SIZETAU
  356. LDA = 2 * KD + 1
  357. SIZEA = LDA * N
  358. INDA = 1
  359. INDW = INDA + SIZEA
  360. NTHREADS = 1
  361. TID = 0
  362. *
  363. IF( UPPER ) THEN
  364. APOS = INDA + KD
  365. AWPOS = INDA
  366. DPOS = APOS + KD
  367. OFDPOS = DPOS - 1
  368. ABDPOS = KD + 1
  369. ABOFDPOS = KD
  370. ELSE
  371. APOS = INDA
  372. AWPOS = INDA + KD + 1
  373. DPOS = APOS
  374. OFDPOS = DPOS + 1
  375. ABDPOS = 1
  376. ABOFDPOS = 2
  377. ENDIF
  378. *
  379. * Case KD=0:
  380. * The matrix is diagonal. We just copy it (convert to "real" for
  381. * complex because D is double and the imaginary part should be 0)
  382. * and store it in D. A sequential code here is better or
  383. * in a parallel environment it might need two cores for D and E
  384. *
  385. IF( KD.EQ.0 ) THEN
  386. DO 30 I = 1, N
  387. D( I ) = REAL( AB( ABDPOS, I ) )
  388. 30 CONTINUE
  389. DO 40 I = 1, N-1
  390. E( I ) = RZERO
  391. 40 CONTINUE
  392. *
  393. HOUS( 1 ) = 1
  394. WORK( 1 ) = 1
  395. RETURN
  396. END IF
  397. *
  398. * Case KD=1:
  399. * The matrix is already Tridiagonal. We have to make diagonal
  400. * and offdiagonal elements real, and store them in D and E.
  401. * For that, for real precision just copy the diag and offdiag
  402. * to D and E while for the COMPLEX case the bulge chasing is
  403. * performed to convert the hermetian tridiagonal to symmetric
  404. * tridiagonal. A simpler conversion formula might be used, but then
  405. * updating the Q matrix will be required and based if Q is generated
  406. * or not this might complicate the story.
  407. *
  408. IF( KD.EQ.1 ) THEN
  409. DO 50 I = 1, N
  410. D( I ) = REAL( AB( ABDPOS, I ) )
  411. 50 CONTINUE
  412. *
  413. * make off-diagonal elements real and copy them to E
  414. *
  415. IF( UPPER ) THEN
  416. DO 60 I = 1, N - 1
  417. TMP = AB( ABOFDPOS, I+1 )
  418. ABSTMP = ABS( TMP )
  419. AB( ABOFDPOS, I+1 ) = ABSTMP
  420. E( I ) = ABSTMP
  421. IF( ABSTMP.NE.RZERO ) THEN
  422. TMP = TMP / ABSTMP
  423. ELSE
  424. TMP = ONE
  425. END IF
  426. IF( I.LT.N-1 )
  427. $ AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP
  428. C IF( WANTZ ) THEN
  429. C CALL CSCAL( N, CONJG( TMP ), Q( 1, I+1 ), 1 )
  430. C END IF
  431. 60 CONTINUE
  432. ELSE
  433. DO 70 I = 1, N - 1
  434. TMP = AB( ABOFDPOS, I )
  435. ABSTMP = ABS( TMP )
  436. AB( ABOFDPOS, I ) = ABSTMP
  437. E( I ) = ABSTMP
  438. IF( ABSTMP.NE.RZERO ) THEN
  439. TMP = TMP / ABSTMP
  440. ELSE
  441. TMP = ONE
  442. END IF
  443. IF( I.LT.N-1 )
  444. $ AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP
  445. C IF( WANTQ ) THEN
  446. C CALL CSCAL( N, TMP, Q( 1, I+1 ), 1 )
  447. C END IF
  448. 70 CONTINUE
  449. ENDIF
  450. *
  451. HOUS( 1 ) = 1
  452. WORK( 1 ) = 1
  453. RETURN
  454. END IF
  455. *
  456. * Main code start here.
  457. * Reduce the hermitian band of A to a tridiagonal matrix.
  458. *
  459. THGRSIZ = N
  460. GRSIZ = 1
  461. SHIFT = 3
  462. NBTILES = CEILING( REAL(N)/REAL(KD) )
  463. STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
  464. THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) )
  465. *
  466. CALL CLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
  467. CALL CLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA )
  468. *
  469. *
  470. * openMP parallelisation start here
  471. *
  472. #if defined(_OPENMP)
  473. !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
  474. !$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
  475. !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
  476. !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
  477. !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
  478. !$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
  479. !$OMP MASTER
  480. #endif
  481. *
  482. * main bulge chasing loop
  483. *
  484. DO 100 THGRID = 1, THGRNB
  485. STT = (THGRID-1)*THGRSIZ+1
  486. THED = MIN( (STT + THGRSIZ -1), (N-1))
  487. DO 110 I = STT, N-1
  488. ED = MIN( I, THED )
  489. IF( STT.GT.ED ) EXIT
  490. DO 120 M = 1, STEPERCOL
  491. ST = STT
  492. DO 130 SWEEPID = ST, ED
  493. DO 140 K = 1, GRSIZ
  494. MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ)
  495. $ + (M-1)*GRSIZ + K
  496. IF ( MYID.EQ.1 ) THEN
  497. TTYPE = 1
  498. ELSE
  499. TTYPE = MOD( MYID, 2 ) + 2
  500. ENDIF
  501. IF( TTYPE.EQ.2 ) THEN
  502. COLPT = (MYID/2)*KD + SWEEPID
  503. STIND = COLPT-KD+1
  504. EDIND = MIN(COLPT,N)
  505. BLKLASTIND = COLPT
  506. ELSE
  507. COLPT = ((MYID+1)/2)*KD + SWEEPID
  508. STIND = COLPT-KD+1
  509. EDIND = MIN(COLPT,N)
  510. IF( ( STIND.GE.EDIND-1 ).AND.
  511. $ ( EDIND.EQ.N ) ) THEN
  512. BLKLASTIND = N
  513. ELSE
  514. BLKLASTIND = 0
  515. ENDIF
  516. ENDIF
  517. *
  518. * Call the kernel
  519. *
  520. #if defined(_OPENMP) && _OPENMP >= 201307
  521. IF( TTYPE.NE.1 ) THEN
  522. !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
  523. !$OMP$ DEPEND(in:WORK(MYID-1))
  524. !$OMP$ DEPEND(out:WORK(MYID))
  525. TID = OMP_GET_THREAD_NUM()
  526. CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
  527. $ STIND, EDIND, SWEEPID, N, KD, IB,
  528. $ WORK ( INDA ), LDA,
  529. $ HOUS( INDV ), HOUS( INDTAU ), LDV,
  530. $ WORK( INDW + TID*KD ) )
  531. !$OMP END TASK
  532. ELSE
  533. !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
  534. !$OMP$ DEPEND(out:WORK(MYID))
  535. TID = OMP_GET_THREAD_NUM()
  536. CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
  537. $ STIND, EDIND, SWEEPID, N, KD, IB,
  538. $ WORK ( INDA ), LDA,
  539. $ HOUS( INDV ), HOUS( INDTAU ), LDV,
  540. $ WORK( INDW + TID*KD ) )
  541. !$OMP END TASK
  542. ENDIF
  543. #else
  544. CALL CHB2ST_KERNELS( UPLO, WANTQ, TTYPE,
  545. $ STIND, EDIND, SWEEPID, N, KD, IB,
  546. $ WORK ( INDA ), LDA,
  547. $ HOUS( INDV ), HOUS( INDTAU ), LDV,
  548. $ WORK( INDW ) )
  549. #endif
  550. IF ( BLKLASTIND.GE.(N-1) ) THEN
  551. STT = STT + 1
  552. EXIT
  553. ENDIF
  554. 140 CONTINUE
  555. 130 CONTINUE
  556. 120 CONTINUE
  557. 110 CONTINUE
  558. 100 CONTINUE
  559. *
  560. #if defined(_OPENMP)
  561. !$OMP END MASTER
  562. !$OMP END PARALLEL
  563. #endif
  564. *
  565. * Copy the diagonal from A to D. Note that D is REAL thus only
  566. * the Real part is needed, the imaginary part should be zero.
  567. *
  568. DO 150 I = 1, N
  569. D( I ) = REAL( WORK( DPOS+(I-1)*LDA ) )
  570. 150 CONTINUE
  571. *
  572. * Copy the off diagonal from A to E. Note that E is REAL thus only
  573. * the Real part is needed, the imaginary part should be zero.
  574. *
  575. IF( UPPER ) THEN
  576. DO 160 I = 1, N-1
  577. E( I ) = REAL( WORK( OFDPOS+I*LDA ) )
  578. 160 CONTINUE
  579. ELSE
  580. DO 170 I = 1, N-1
  581. E( I ) = REAL( WORK( OFDPOS+(I-1)*LDA ) )
  582. 170 CONTINUE
  583. ENDIF
  584. *
  585. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  586. RETURN
  587. *
  588. * End of CHETRD_HB2ST
  589. *
  590. END