You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zgsvj0.f 36 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932
  1. *> \brief <b> ZGSVJ0 pre-processor for the routine zgesvj. </b>
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download ZGSVJ0 + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zgsvj0.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zgsvj0.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zgsvj0.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
  22. * SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
  23. *
  24. * .. Scalar Arguments ..
  25. * INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
  26. * DOUBLE PRECISION EPS, SFMIN, TOL
  27. * CHARACTER*1 JOBV
  28. * ..
  29. * .. Array Arguments ..
  30. * COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK )
  31. * DOUBLE PRECISION SVA( N )
  32. * ..
  33. *
  34. *
  35. *> \par Purpose:
  36. * =============
  37. *>
  38. *> \verbatim
  39. *>
  40. *> ZGSVJ0 is called from ZGESVJ as a pre-processor and that is its main
  41. *> purpose. It applies Jacobi rotations in the same way as ZGESVJ does, but
  42. *> it does not check convergence (stopping criterion). Few tuning
  43. *> parameters (marked by [TP]) are available for the implementer.
  44. *> \endverbatim
  45. *
  46. * Arguments:
  47. * ==========
  48. *
  49. *> \param[in] JOBV
  50. *> \verbatim
  51. *> JOBV is CHARACTER*1
  52. *> Specifies whether the output from this procedure is used
  53. *> to compute the matrix V:
  54. *> = 'V': the product of the Jacobi rotations is accumulated
  55. *> by postmultiplying the N-by-N array V.
  56. *> (See the description of V.)
  57. *> = 'A': the product of the Jacobi rotations is accumulated
  58. *> by postmultiplying the MV-by-N array V.
  59. *> (See the descriptions of MV and V.)
  60. *> = 'N': the Jacobi rotations are not accumulated.
  61. *> \endverbatim
  62. *>
  63. *> \param[in] M
  64. *> \verbatim
  65. *> M is INTEGER
  66. *> The number of rows of the input matrix A. M >= 0.
  67. *> \endverbatim
  68. *>
  69. *> \param[in] N
  70. *> \verbatim
  71. *> N is INTEGER
  72. *> The number of columns of the input matrix A.
  73. *> M >= N >= 0.
  74. *> \endverbatim
  75. *>
  76. *> \param[in,out] A
  77. *> \verbatim
  78. *> A is COMPLEX*16 array, dimension (LDA,N)
  79. *> On entry, M-by-N matrix A, such that A*diag(D) represents
  80. *> the input matrix.
  81. *> On exit,
  82. *> A_onexit * diag(D_onexit) represents the input matrix A*diag(D)
  83. *> post-multiplied by a sequence of Jacobi rotations, where the
  84. *> rotation threshold and the total number of sweeps are given in
  85. *> TOL and NSWEEP, respectively.
  86. *> (See the descriptions of D, TOL and NSWEEP.)
  87. *> \endverbatim
  88. *>
  89. *> \param[in] LDA
  90. *> \verbatim
  91. *> LDA is INTEGER
  92. *> The leading dimension of the array A. LDA >= max(1,M).
  93. *> \endverbatim
  94. *>
  95. *> \param[in,out] D
  96. *> \verbatim
  97. *> D is COMPLEX*16 array, dimension (N)
  98. *> The array D accumulates the scaling factors from the complex scaled
  99. *> Jacobi rotations.
  100. *> On entry, A*diag(D) represents the input matrix.
  101. *> On exit, A_onexit*diag(D_onexit) represents the input matrix
  102. *> post-multiplied by a sequence of Jacobi rotations, where the
  103. *> rotation threshold and the total number of sweeps are given in
  104. *> TOL and NSWEEP, respectively.
  105. *> (See the descriptions of A, TOL and NSWEEP.)
  106. *> \endverbatim
  107. *>
  108. *> \param[in,out] SVA
  109. *> \verbatim
  110. *> SVA is DOUBLE PRECISION array, dimension (N)
  111. *> On entry, SVA contains the Euclidean norms of the columns of
  112. *> the matrix A*diag(D).
  113. *> On exit, SVA contains the Euclidean norms of the columns of
  114. *> the matrix A_onexit*diag(D_onexit).
  115. *> \endverbatim
  116. *>
  117. *> \param[in] MV
  118. *> \verbatim
  119. *> MV is INTEGER
  120. *> If JOBV = 'A', then MV rows of V are post-multiplied by a
  121. *> sequence of Jacobi rotations.
  122. *> If JOBV = 'N', then MV is not referenced.
  123. *> \endverbatim
  124. *>
  125. *> \param[in,out] V
  126. *> \verbatim
  127. *> V is COMPLEX*16 array, dimension (LDV,N)
  128. *> If JOBV = 'V' then N rows of V are post-multiplied by a
  129. *> sequence of Jacobi rotations.
  130. *> If JOBV = 'A' then MV rows of V are post-multiplied by a
  131. *> sequence of Jacobi rotations.
  132. *> If JOBV = 'N', then V is not referenced.
  133. *> \endverbatim
  134. *>
  135. *> \param[in] LDV
  136. *> \verbatim
  137. *> LDV is INTEGER
  138. *> The leading dimension of the array V, LDV >= 1.
  139. *> If JOBV = 'V', LDV >= N.
  140. *> If JOBV = 'A', LDV >= MV.
  141. *> \endverbatim
  142. *>
  143. *> \param[in] EPS
  144. *> \verbatim
  145. *> EPS is DOUBLE PRECISION
  146. *> EPS = DLAMCH('Epsilon')
  147. *> \endverbatim
  148. *>
  149. *> \param[in] SFMIN
  150. *> \verbatim
  151. *> SFMIN is DOUBLE PRECISION
  152. *> SFMIN = DLAMCH('Safe Minimum')
  153. *> \endverbatim
  154. *>
  155. *> \param[in] TOL
  156. *> \verbatim
  157. *> TOL is DOUBLE PRECISION
  158. *> TOL is the threshold for Jacobi rotations. For a pair
  159. *> A(:,p), A(:,q) of pivot columns, the Jacobi rotation is
  160. *> applied only if ABS(COS(angle(A(:,p),A(:,q)))) > TOL.
  161. *> \endverbatim
  162. *>
  163. *> \param[in] NSWEEP
  164. *> \verbatim
  165. *> NSWEEP is INTEGER
  166. *> NSWEEP is the number of sweeps of Jacobi rotations to be
  167. *> performed.
  168. *> \endverbatim
  169. *>
  170. *> \param[out] WORK
  171. *> \verbatim
  172. *> WORK is COMPLEX*16 array, dimension (LWORK)
  173. *> \endverbatim
  174. *>
  175. *> \param[in] LWORK
  176. *> \verbatim
  177. *> LWORK is INTEGER
  178. *> LWORK is the dimension of WORK. LWORK >= M.
  179. *> \endverbatim
  180. *>
  181. *> \param[out] INFO
  182. *> \verbatim
  183. *> INFO is INTEGER
  184. *> = 0: successful exit.
  185. *> < 0: if INFO = -i, then the i-th argument had an illegal value
  186. *> \endverbatim
  187. *
  188. * Authors:
  189. * ========
  190. *
  191. *> \author Univ. of Tennessee
  192. *> \author Univ. of California Berkeley
  193. *> \author Univ. of Colorado Denver
  194. *> \author NAG Ltd.
  195. *
  196. *> \ingroup complex16OTHERcomputational
  197. *>
  198. *> \par Further Details:
  199. * =====================
  200. *>
  201. *> ZGSVJ0 is used just to enable ZGESVJ to call a simplified version of
  202. *> itself to work on a submatrix of the original matrix.
  203. *>
  204. *> Contributor:
  205. * =============
  206. *>
  207. *> Zlatko Drmac (Zagreb, Croatia)
  208. *>
  209. *> \par Bugs, Examples and Comments:
  210. * ============================
  211. *>
  212. *> Please report all bugs and send interesting test examples and comments to
  213. *> drmac@math.hr. Thank you.
  214. *
  215. * =====================================================================
  216. SUBROUTINE ZGSVJ0( JOBV, M, N, A, LDA, D, SVA, MV, V, LDV, EPS,
  217. $ SFMIN, TOL, NSWEEP, WORK, LWORK, INFO )
  218. *
  219. * -- LAPACK computational routine --
  220. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  221. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  222. *
  223. IMPLICIT NONE
  224. * .. Scalar Arguments ..
  225. INTEGER INFO, LDA, LDV, LWORK, M, MV, N, NSWEEP
  226. DOUBLE PRECISION EPS, SFMIN, TOL
  227. CHARACTER*1 JOBV
  228. * ..
  229. * .. Array Arguments ..
  230. COMPLEX*16 A( LDA, * ), D( N ), V( LDV, * ), WORK( LWORK )
  231. DOUBLE PRECISION SVA( N )
  232. * ..
  233. *
  234. * =====================================================================
  235. *
  236. * .. Local Parameters ..
  237. DOUBLE PRECISION ZERO, HALF, ONE
  238. PARAMETER ( ZERO = 0.0D0, HALF = 0.5D0, ONE = 1.0D0)
  239. COMPLEX*16 CZERO, CONE
  240. PARAMETER ( CZERO = (0.0D0, 0.0D0), CONE = (1.0D0, 0.0D0) )
  241. * ..
  242. * .. Local Scalars ..
  243. COMPLEX*16 AAPQ, OMPQ
  244. DOUBLE PRECISION AAPP, AAPP0, AAPQ1, AAQQ, APOAQ, AQOAP, BIG,
  245. $ BIGTHETA, CS, MXAAPQ, MXSINJ, ROOTBIG, ROOTEPS,
  246. $ ROOTSFMIN, ROOTTOL, SMALL, SN, T, TEMP1, THETA,
  247. $ THSIGN
  248. INTEGER BLSKIP, EMPTSW, i, ibr, IERR, igl, IJBLSK, ir1,
  249. $ ISWROT, jbc, jgl, KBL, LKAHEAD, MVL, NBL,
  250. $ NOTROT, p, PSKIPPED, q, ROWSKIP, SWBAND
  251. LOGICAL APPLV, ROTOK, RSVEC
  252. * ..
  253. * ..
  254. * .. Intrinsic Functions ..
  255. INTRINSIC ABS, MAX, CONJG, DBLE, MIN, SIGN, SQRT
  256. * ..
  257. * .. External Functions ..
  258. DOUBLE PRECISION DZNRM2
  259. COMPLEX*16 ZDOTC
  260. INTEGER IDAMAX
  261. LOGICAL LSAME
  262. EXTERNAL IDAMAX, LSAME, ZDOTC, DZNRM2
  263. * ..
  264. * ..
  265. * .. External Subroutines ..
  266. * ..
  267. * from BLAS
  268. EXTERNAL ZCOPY, ZROT, ZSWAP, ZAXPY
  269. * from LAPACK
  270. EXTERNAL ZLASCL, ZLASSQ, XERBLA
  271. * ..
  272. * .. Executable Statements ..
  273. *
  274. * Test the input parameters.
  275. *
  276. APPLV = LSAME( JOBV, 'A' )
  277. RSVEC = LSAME( JOBV, 'V' )
  278. IF( .NOT.( RSVEC .OR. APPLV .OR. LSAME( JOBV, 'N' ) ) ) THEN
  279. INFO = -1
  280. ELSE IF( M.LT.0 ) THEN
  281. INFO = -2
  282. ELSE IF( ( N.LT.0 ) .OR. ( N.GT.M ) ) THEN
  283. INFO = -3
  284. ELSE IF( LDA.LT.M ) THEN
  285. INFO = -5
  286. ELSE IF( ( RSVEC.OR.APPLV ) .AND. ( MV.LT.0 ) ) THEN
  287. INFO = -8
  288. ELSE IF( ( RSVEC.AND.( LDV.LT.N ) ).OR.
  289. $ ( APPLV.AND.( LDV.LT.MV ) ) ) THEN
  290. INFO = -10
  291. ELSE IF( TOL.LE.EPS ) THEN
  292. INFO = -13
  293. ELSE IF( NSWEEP.LT.0 ) THEN
  294. INFO = -14
  295. ELSE IF( LWORK.LT.M ) THEN
  296. INFO = -16
  297. ELSE
  298. INFO = 0
  299. END IF
  300. *
  301. * #:(
  302. IF( INFO.NE.0 ) THEN
  303. CALL XERBLA( 'ZGSVJ0', -INFO )
  304. RETURN
  305. END IF
  306. *
  307. IF( RSVEC ) THEN
  308. MVL = N
  309. ELSE IF( APPLV ) THEN
  310. MVL = MV
  311. END IF
  312. RSVEC = RSVEC .OR. APPLV
  313. ROOTEPS = SQRT( EPS )
  314. ROOTSFMIN = SQRT( SFMIN )
  315. SMALL = SFMIN / EPS
  316. BIG = ONE / SFMIN
  317. ROOTBIG = ONE / ROOTSFMIN
  318. BIGTHETA = ONE / ROOTEPS
  319. ROOTTOL = SQRT( TOL )
  320. *
  321. * .. Row-cyclic Jacobi SVD algorithm with column pivoting ..
  322. *
  323. EMPTSW = ( N*( N-1 ) ) / 2
  324. NOTROT = 0
  325. *
  326. * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
  327. *
  328. SWBAND = 0
  329. *[TP] SWBAND is a tuning parameter [TP]. It is meaningful and effective
  330. * if ZGESVJ is used as a computational routine in the preconditioned
  331. * Jacobi SVD algorithm ZGEJSV. For sweeps i=1:SWBAND the procedure
  332. * works on pivots inside a band-like region around the diagonal.
  333. * The boundaries are determined dynamically, based on the number of
  334. * pivots above a threshold.
  335. *
  336. KBL = MIN( 8, N )
  337. *[TP] KBL is a tuning parameter that defines the tile size in the
  338. * tiling of the p-q loops of pivot pairs. In general, an optimal
  339. * value of KBL depends on the matrix dimensions and on the
  340. * parameters of the computer's memory.
  341. *
  342. NBL = N / KBL
  343. IF( ( NBL*KBL ).NE.N )NBL = NBL + 1
  344. *
  345. BLSKIP = KBL**2
  346. *[TP] BLKSKIP is a tuning parameter that depends on SWBAND and KBL.
  347. *
  348. ROWSKIP = MIN( 5, KBL )
  349. *[TP] ROWSKIP is a tuning parameter.
  350. *
  351. LKAHEAD = 1
  352. *[TP] LKAHEAD is a tuning parameter.
  353. *
  354. * Quasi block transformations, using the lower (upper) triangular
  355. * structure of the input matrix. The quasi-block-cycling usually
  356. * invokes cubic convergence. Big part of this cycle is done inside
  357. * canonical subspaces of dimensions less than M.
  358. *
  359. *
  360. * .. Row-cyclic pivot strategy with de Rijk's pivoting ..
  361. *
  362. DO 1993 i = 1, NSWEEP
  363. *
  364. * .. go go go ...
  365. *
  366. MXAAPQ = ZERO
  367. MXSINJ = ZERO
  368. ISWROT = 0
  369. *
  370. NOTROT = 0
  371. PSKIPPED = 0
  372. *
  373. * Each sweep is unrolled using KBL-by-KBL tiles over the pivot pairs
  374. * 1 <= p < q <= N. This is the first step toward a blocked implementation
  375. * of the rotations. New implementation, based on block transformations,
  376. * is under development.
  377. *
  378. DO 2000 ibr = 1, NBL
  379. *
  380. igl = ( ibr-1 )*KBL + 1
  381. *
  382. DO 1002 ir1 = 0, MIN( LKAHEAD, NBL-ibr )
  383. *
  384. igl = igl + ir1*KBL
  385. *
  386. DO 2001 p = igl, MIN( igl+KBL-1, N-1 )
  387. *
  388. * .. de Rijk's pivoting
  389. *
  390. q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1
  391. IF( p.NE.q ) THEN
  392. CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 )
  393. IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1,
  394. $ V( 1, q ), 1 )
  395. TEMP1 = SVA( p )
  396. SVA( p ) = SVA( q )
  397. SVA( q ) = TEMP1
  398. AAPQ = D(p)
  399. D(p) = D(q)
  400. D(q) = AAPQ
  401. END IF
  402. *
  403. IF( ir1.EQ.0 ) THEN
  404. *
  405. * Column norms are periodically updated by explicit
  406. * norm computation.
  407. * Caveat:
  408. * Unfortunately, some BLAS implementations compute SNCRM2(M,A(1,p),1)
  409. * as SQRT(S=ZDOTC(M,A(1,p),1,A(1,p),1)), which may cause the result to
  410. * overflow for ||A(:,p)||_2 > SQRT(overflow_threshold), and to
  411. * underflow for ||A(:,p)||_2 < SQRT(underflow_threshold).
  412. * Hence, DZNRM2 cannot be trusted, not even in the case when
  413. * the true norm is far from the under(over)flow boundaries.
  414. * If properly implemented DZNRM2 is available, the IF-THEN-ELSE-END IF
  415. * below should be replaced with "AAPP = DZNRM2( M, A(1,p), 1 )".
  416. *
  417. IF( ( SVA( p ).LT.ROOTBIG ) .AND.
  418. $ ( SVA( p ).GT.ROOTSFMIN ) ) THEN
  419. SVA( p ) = DZNRM2( M, A( 1, p ), 1 )
  420. ELSE
  421. TEMP1 = ZERO
  422. AAPP = ONE
  423. CALL ZLASSQ( M, A( 1, p ), 1, TEMP1, AAPP )
  424. SVA( p ) = TEMP1*SQRT( AAPP )
  425. END IF
  426. AAPP = SVA( p )
  427. ELSE
  428. AAPP = SVA( p )
  429. END IF
  430. *
  431. IF( AAPP.GT.ZERO ) THEN
  432. *
  433. PSKIPPED = 0
  434. *
  435. DO 2002 q = p + 1, MIN( igl+KBL-1, N )
  436. *
  437. AAQQ = SVA( q )
  438. *
  439. IF( AAQQ.GT.ZERO ) THEN
  440. *
  441. AAPP0 = AAPP
  442. IF( AAQQ.GE.ONE ) THEN
  443. ROTOK = ( SMALL*AAPP ).LE.AAQQ
  444. IF( AAPP.LT.( BIG / AAQQ ) ) THEN
  445. AAPQ = ( ZDOTC( M, A( 1, p ), 1,
  446. $ A( 1, q ), 1 ) / AAQQ ) / AAPP
  447. ELSE
  448. CALL ZCOPY( M, A( 1, p ), 1,
  449. $ WORK, 1 )
  450. CALL ZLASCL( 'G', 0, 0, AAPP, ONE,
  451. $ M, 1, WORK, LDA, IERR )
  452. AAPQ = ZDOTC( M, WORK, 1,
  453. $ A( 1, q ), 1 ) / AAQQ
  454. END IF
  455. ELSE
  456. ROTOK = AAPP.LE.( AAQQ / SMALL )
  457. IF( AAPP.GT.( SMALL / AAQQ ) ) THEN
  458. AAPQ = ( ZDOTC( M, A( 1, p ), 1,
  459. $ A( 1, q ), 1 ) / AAPP ) / AAQQ
  460. ELSE
  461. CALL ZCOPY( M, A( 1, q ), 1,
  462. $ WORK, 1 )
  463. CALL ZLASCL( 'G', 0, 0, AAQQ,
  464. $ ONE, M, 1,
  465. $ WORK, LDA, IERR )
  466. AAPQ = ZDOTC( M, A( 1, p ), 1,
  467. $ WORK, 1 ) / AAPP
  468. END IF
  469. END IF
  470. *
  471. * AAPQ = AAPQ * CONJG( CWORK(p) ) * CWORK(q)
  472. AAPQ1 = -ABS(AAPQ)
  473. MXAAPQ = MAX( MXAAPQ, -AAPQ1 )
  474. *
  475. * TO rotate or NOT to rotate, THAT is the question ...
  476. *
  477. IF( ABS( AAPQ1 ).GT.TOL ) THEN
  478. OMPQ = AAPQ / ABS(AAPQ)
  479. *
  480. * .. rotate
  481. *[RTD] ROTATED = ROTATED + ONE
  482. *
  483. IF( ir1.EQ.0 ) THEN
  484. NOTROT = 0
  485. PSKIPPED = 0
  486. ISWROT = ISWROT + 1
  487. END IF
  488. *
  489. IF( ROTOK ) THEN
  490. *
  491. AQOAP = AAQQ / AAPP
  492. APOAQ = AAPP / AAQQ
  493. THETA = -HALF*ABS( AQOAP-APOAQ )/AAPQ1
  494. *
  495. IF( ABS( THETA ).GT.BIGTHETA ) THEN
  496. *
  497. T = HALF / THETA
  498. CS = ONE
  499. CALL ZROT( M, A(1,p), 1, A(1,q), 1,
  500. $ CS, CONJG(OMPQ)*T )
  501. IF ( RSVEC ) THEN
  502. CALL ZROT( MVL, V(1,p), 1,
  503. $ V(1,q), 1, CS, CONJG(OMPQ)*T )
  504. END IF
  505. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  506. $ ONE+T*APOAQ*AAPQ1 ) )
  507. AAPP = AAPP*SQRT( MAX( ZERO,
  508. $ ONE-T*AQOAP*AAPQ1 ) )
  509. MXSINJ = MAX( MXSINJ, ABS( T ) )
  510. *
  511. ELSE
  512. *
  513. * .. choose correct signum for THETA and rotate
  514. *
  515. THSIGN = -SIGN( ONE, AAPQ1 )
  516. T = ONE / ( THETA+THSIGN*
  517. $ SQRT( ONE+THETA*THETA ) )
  518. CS = SQRT( ONE / ( ONE+T*T ) )
  519. SN = T*CS
  520. *
  521. MXSINJ = MAX( MXSINJ, ABS( SN ) )
  522. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  523. $ ONE+T*APOAQ*AAPQ1 ) )
  524. AAPP = AAPP*SQRT( MAX( ZERO,
  525. $ ONE-T*AQOAP*AAPQ1 ) )
  526. *
  527. CALL ZROT( M, A(1,p), 1, A(1,q), 1,
  528. $ CS, CONJG(OMPQ)*SN )
  529. IF ( RSVEC ) THEN
  530. CALL ZROT( MVL, V(1,p), 1,
  531. $ V(1,q), 1, CS, CONJG(OMPQ)*SN )
  532. END IF
  533. END IF
  534. D(p) = -D(q) * OMPQ
  535. *
  536. ELSE
  537. * .. have to use modified Gram-Schmidt like transformation
  538. CALL ZCOPY( M, A( 1, p ), 1,
  539. $ WORK, 1 )
  540. CALL ZLASCL( 'G', 0, 0, AAPP, ONE, M,
  541. $ 1, WORK, LDA,
  542. $ IERR )
  543. CALL ZLASCL( 'G', 0, 0, AAQQ, ONE, M,
  544. $ 1, A( 1, q ), LDA, IERR )
  545. CALL ZAXPY( M, -AAPQ, WORK, 1,
  546. $ A( 1, q ), 1 )
  547. CALL ZLASCL( 'G', 0, 0, ONE, AAQQ, M,
  548. $ 1, A( 1, q ), LDA, IERR )
  549. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  550. $ ONE-AAPQ1*AAPQ1 ) )
  551. MXSINJ = MAX( MXSINJ, SFMIN )
  552. END IF
  553. * END IF ROTOK THEN ... ELSE
  554. *
  555. * In the case of cancellation in updating SVA(q), SVA(p)
  556. * recompute SVA(q), SVA(p).
  557. *
  558. IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS )
  559. $ THEN
  560. IF( ( AAQQ.LT.ROOTBIG ) .AND.
  561. $ ( AAQQ.GT.ROOTSFMIN ) ) THEN
  562. SVA( q ) = DZNRM2( M, A( 1, q ), 1 )
  563. ELSE
  564. T = ZERO
  565. AAQQ = ONE
  566. CALL ZLASSQ( M, A( 1, q ), 1, T,
  567. $ AAQQ )
  568. SVA( q ) = T*SQRT( AAQQ )
  569. END IF
  570. END IF
  571. IF( ( AAPP / AAPP0 ).LE.ROOTEPS ) THEN
  572. IF( ( AAPP.LT.ROOTBIG ) .AND.
  573. $ ( AAPP.GT.ROOTSFMIN ) ) THEN
  574. AAPP = DZNRM2( M, A( 1, p ), 1 )
  575. ELSE
  576. T = ZERO
  577. AAPP = ONE
  578. CALL ZLASSQ( M, A( 1, p ), 1, T,
  579. $ AAPP )
  580. AAPP = T*SQRT( AAPP )
  581. END IF
  582. SVA( p ) = AAPP
  583. END IF
  584. *
  585. ELSE
  586. * A(:,p) and A(:,q) already numerically orthogonal
  587. IF( ir1.EQ.0 )NOTROT = NOTROT + 1
  588. *[RTD] SKIPPED = SKIPPED + 1
  589. PSKIPPED = PSKIPPED + 1
  590. END IF
  591. ELSE
  592. * A(:,q) is zero column
  593. IF( ir1.EQ.0 )NOTROT = NOTROT + 1
  594. PSKIPPED = PSKIPPED + 1
  595. END IF
  596. *
  597. IF( ( i.LE.SWBAND ) .AND.
  598. $ ( PSKIPPED.GT.ROWSKIP ) ) THEN
  599. IF( ir1.EQ.0 )AAPP = -AAPP
  600. NOTROT = 0
  601. GO TO 2103
  602. END IF
  603. *
  604. 2002 CONTINUE
  605. * END q-LOOP
  606. *
  607. 2103 CONTINUE
  608. * bailed out of q-loop
  609. *
  610. SVA( p ) = AAPP
  611. *
  612. ELSE
  613. SVA( p ) = AAPP
  614. IF( ( ir1.EQ.0 ) .AND. ( AAPP.EQ.ZERO ) )
  615. $ NOTROT = NOTROT + MIN( igl+KBL-1, N ) - p
  616. END IF
  617. *
  618. 2001 CONTINUE
  619. * end of the p-loop
  620. * end of doing the block ( ibr, ibr )
  621. 1002 CONTINUE
  622. * end of ir1-loop
  623. *
  624. * ... go to the off diagonal blocks
  625. *
  626. igl = ( ibr-1 )*KBL + 1
  627. *
  628. DO 2010 jbc = ibr + 1, NBL
  629. *
  630. jgl = ( jbc-1 )*KBL + 1
  631. *
  632. * doing the block at ( ibr, jbc )
  633. *
  634. IJBLSK = 0
  635. DO 2100 p = igl, MIN( igl+KBL-1, N )
  636. *
  637. AAPP = SVA( p )
  638. IF( AAPP.GT.ZERO ) THEN
  639. *
  640. PSKIPPED = 0
  641. *
  642. DO 2200 q = jgl, MIN( jgl+KBL-1, N )
  643. *
  644. AAQQ = SVA( q )
  645. IF( AAQQ.GT.ZERO ) THEN
  646. AAPP0 = AAPP
  647. *
  648. * .. M x 2 Jacobi SVD ..
  649. *
  650. * Safe Gram matrix computation
  651. *
  652. IF( AAQQ.GE.ONE ) THEN
  653. IF( AAPP.GE.AAQQ ) THEN
  654. ROTOK = ( SMALL*AAPP ).LE.AAQQ
  655. ELSE
  656. ROTOK = ( SMALL*AAQQ ).LE.AAPP
  657. END IF
  658. IF( AAPP.LT.( BIG / AAQQ ) ) THEN
  659. AAPQ = ( ZDOTC( M, A( 1, p ), 1,
  660. $ A( 1, q ), 1 ) / AAQQ ) / AAPP
  661. ELSE
  662. CALL ZCOPY( M, A( 1, p ), 1,
  663. $ WORK, 1 )
  664. CALL ZLASCL( 'G', 0, 0, AAPP,
  665. $ ONE, M, 1,
  666. $ WORK, LDA, IERR )
  667. AAPQ = ZDOTC( M, WORK, 1,
  668. $ A( 1, q ), 1 ) / AAQQ
  669. END IF
  670. ELSE
  671. IF( AAPP.GE.AAQQ ) THEN
  672. ROTOK = AAPP.LE.( AAQQ / SMALL )
  673. ELSE
  674. ROTOK = AAQQ.LE.( AAPP / SMALL )
  675. END IF
  676. IF( AAPP.GT.( SMALL / AAQQ ) ) THEN
  677. AAPQ = ( ZDOTC( M, A( 1, p ), 1,
  678. $ A( 1, q ), 1 ) / MAX(AAQQ,AAPP) )
  679. $ / MIN(AAQQ,AAPP)
  680. ELSE
  681. CALL ZCOPY( M, A( 1, q ), 1,
  682. $ WORK, 1 )
  683. CALL ZLASCL( 'G', 0, 0, AAQQ,
  684. $ ONE, M, 1,
  685. $ WORK, LDA, IERR )
  686. AAPQ = ZDOTC( M, A( 1, p ), 1,
  687. $ WORK, 1 ) / AAPP
  688. END IF
  689. END IF
  690. *
  691. * AAPQ = AAPQ * CONJG(CWORK(p))*CWORK(q)
  692. AAPQ1 = -ABS(AAPQ)
  693. MXAAPQ = MAX( MXAAPQ, -AAPQ1 )
  694. *
  695. * TO rotate or NOT to rotate, THAT is the question ...
  696. *
  697. IF( ABS( AAPQ1 ).GT.TOL ) THEN
  698. OMPQ = AAPQ / ABS(AAPQ)
  699. NOTROT = 0
  700. *[RTD] ROTATED = ROTATED + 1
  701. PSKIPPED = 0
  702. ISWROT = ISWROT + 1
  703. *
  704. IF( ROTOK ) THEN
  705. *
  706. AQOAP = AAQQ / AAPP
  707. APOAQ = AAPP / AAQQ
  708. THETA = -HALF*ABS( AQOAP-APOAQ )/ AAPQ1
  709. IF( AAQQ.GT.AAPP0 )THETA = -THETA
  710. *
  711. IF( ABS( THETA ).GT.BIGTHETA ) THEN
  712. T = HALF / THETA
  713. CS = ONE
  714. CALL ZROT( M, A(1,p), 1, A(1,q), 1,
  715. $ CS, CONJG(OMPQ)*T )
  716. IF( RSVEC ) THEN
  717. CALL ZROT( MVL, V(1,p), 1,
  718. $ V(1,q), 1, CS, CONJG(OMPQ)*T )
  719. END IF
  720. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  721. $ ONE+T*APOAQ*AAPQ1 ) )
  722. AAPP = AAPP*SQRT( MAX( ZERO,
  723. $ ONE-T*AQOAP*AAPQ1 ) )
  724. MXSINJ = MAX( MXSINJ, ABS( T ) )
  725. ELSE
  726. *
  727. * .. choose correct signum for THETA and rotate
  728. *
  729. THSIGN = -SIGN( ONE, AAPQ1 )
  730. IF( AAQQ.GT.AAPP0 )THSIGN = -THSIGN
  731. T = ONE / ( THETA+THSIGN*
  732. $ SQRT( ONE+THETA*THETA ) )
  733. CS = SQRT( ONE / ( ONE+T*T ) )
  734. SN = T*CS
  735. MXSINJ = MAX( MXSINJ, ABS( SN ) )
  736. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  737. $ ONE+T*APOAQ*AAPQ1 ) )
  738. AAPP = AAPP*SQRT( MAX( ZERO,
  739. $ ONE-T*AQOAP*AAPQ1 ) )
  740. *
  741. CALL ZROT( M, A(1,p), 1, A(1,q), 1,
  742. $ CS, CONJG(OMPQ)*SN )
  743. IF( RSVEC ) THEN
  744. CALL ZROT( MVL, V(1,p), 1,
  745. $ V(1,q), 1, CS, CONJG(OMPQ)*SN )
  746. END IF
  747. END IF
  748. D(p) = -D(q) * OMPQ
  749. *
  750. ELSE
  751. * .. have to use modified Gram-Schmidt like transformation
  752. IF( AAPP.GT.AAQQ ) THEN
  753. CALL ZCOPY( M, A( 1, p ), 1,
  754. $ WORK, 1 )
  755. CALL ZLASCL( 'G', 0, 0, AAPP, ONE,
  756. $ M, 1, WORK,LDA,
  757. $ IERR )
  758. CALL ZLASCL( 'G', 0, 0, AAQQ, ONE,
  759. $ M, 1, A( 1, q ), LDA,
  760. $ IERR )
  761. CALL ZAXPY( M, -AAPQ, WORK,
  762. $ 1, A( 1, q ), 1 )
  763. CALL ZLASCL( 'G', 0, 0, ONE, AAQQ,
  764. $ M, 1, A( 1, q ), LDA,
  765. $ IERR )
  766. SVA( q ) = AAQQ*SQRT( MAX( ZERO,
  767. $ ONE-AAPQ1*AAPQ1 ) )
  768. MXSINJ = MAX( MXSINJ, SFMIN )
  769. ELSE
  770. CALL ZCOPY( M, A( 1, q ), 1,
  771. $ WORK, 1 )
  772. CALL ZLASCL( 'G', 0, 0, AAQQ, ONE,
  773. $ M, 1, WORK,LDA,
  774. $ IERR )
  775. CALL ZLASCL( 'G', 0, 0, AAPP, ONE,
  776. $ M, 1, A( 1, p ), LDA,
  777. $ IERR )
  778. CALL ZAXPY( M, -CONJG(AAPQ),
  779. $ WORK, 1, A( 1, p ), 1 )
  780. CALL ZLASCL( 'G', 0, 0, ONE, AAPP,
  781. $ M, 1, A( 1, p ), LDA,
  782. $ IERR )
  783. SVA( p ) = AAPP*SQRT( MAX( ZERO,
  784. $ ONE-AAPQ1*AAPQ1 ) )
  785. MXSINJ = MAX( MXSINJ, SFMIN )
  786. END IF
  787. END IF
  788. * END IF ROTOK THEN ... ELSE
  789. *
  790. * In the case of cancellation in updating SVA(q), SVA(p)
  791. * .. recompute SVA(q), SVA(p)
  792. IF( ( SVA( q ) / AAQQ )**2.LE.ROOTEPS )
  793. $ THEN
  794. IF( ( AAQQ.LT.ROOTBIG ) .AND.
  795. $ ( AAQQ.GT.ROOTSFMIN ) ) THEN
  796. SVA( q ) = DZNRM2( M, A( 1, q ), 1)
  797. ELSE
  798. T = ZERO
  799. AAQQ = ONE
  800. CALL ZLASSQ( M, A( 1, q ), 1, T,
  801. $ AAQQ )
  802. SVA( q ) = T*SQRT( AAQQ )
  803. END IF
  804. END IF
  805. IF( ( AAPP / AAPP0 )**2.LE.ROOTEPS ) THEN
  806. IF( ( AAPP.LT.ROOTBIG ) .AND.
  807. $ ( AAPP.GT.ROOTSFMIN ) ) THEN
  808. AAPP = DZNRM2( M, A( 1, p ), 1 )
  809. ELSE
  810. T = ZERO
  811. AAPP = ONE
  812. CALL ZLASSQ( M, A( 1, p ), 1, T,
  813. $ AAPP )
  814. AAPP = T*SQRT( AAPP )
  815. END IF
  816. SVA( p ) = AAPP
  817. END IF
  818. * end of OK rotation
  819. ELSE
  820. NOTROT = NOTROT + 1
  821. *[RTD] SKIPPED = SKIPPED + 1
  822. PSKIPPED = PSKIPPED + 1
  823. IJBLSK = IJBLSK + 1
  824. END IF
  825. ELSE
  826. NOTROT = NOTROT + 1
  827. PSKIPPED = PSKIPPED + 1
  828. IJBLSK = IJBLSK + 1
  829. END IF
  830. *
  831. IF( ( i.LE.SWBAND ) .AND. ( IJBLSK.GE.BLSKIP ) )
  832. $ THEN
  833. SVA( p ) = AAPP
  834. NOTROT = 0
  835. GO TO 2011
  836. END IF
  837. IF( ( i.LE.SWBAND ) .AND.
  838. $ ( PSKIPPED.GT.ROWSKIP ) ) THEN
  839. AAPP = -AAPP
  840. NOTROT = 0
  841. GO TO 2203
  842. END IF
  843. *
  844. 2200 CONTINUE
  845. * end of the q-loop
  846. 2203 CONTINUE
  847. *
  848. SVA( p ) = AAPP
  849. *
  850. ELSE
  851. *
  852. IF( AAPP.EQ.ZERO )NOTROT = NOTROT +
  853. $ MIN( jgl+KBL-1, N ) - jgl + 1
  854. IF( AAPP.LT.ZERO )NOTROT = 0
  855. *
  856. END IF
  857. *
  858. 2100 CONTINUE
  859. * end of the p-loop
  860. 2010 CONTINUE
  861. * end of the jbc-loop
  862. 2011 CONTINUE
  863. *2011 bailed out of the jbc-loop
  864. DO 2012 p = igl, MIN( igl+KBL-1, N )
  865. SVA( p ) = ABS( SVA( p ) )
  866. 2012 CONTINUE
  867. ***
  868. 2000 CONTINUE
  869. *2000 :: end of the ibr-loop
  870. *
  871. * .. update SVA(N)
  872. IF( ( SVA( N ).LT.ROOTBIG ) .AND. ( SVA( N ).GT.ROOTSFMIN ) )
  873. $ THEN
  874. SVA( N ) = DZNRM2( M, A( 1, N ), 1 )
  875. ELSE
  876. T = ZERO
  877. AAPP = ONE
  878. CALL ZLASSQ( M, A( 1, N ), 1, T, AAPP )
  879. SVA( N ) = T*SQRT( AAPP )
  880. END IF
  881. *
  882. * Additional steering devices
  883. *
  884. IF( ( i.LT.SWBAND ) .AND. ( ( MXAAPQ.LE.ROOTTOL ) .OR.
  885. $ ( ISWROT.LE.N ) ) )SWBAND = i
  886. *
  887. IF( ( i.GT.SWBAND+1 ) .AND. ( MXAAPQ.LT.SQRT( DBLE( N ) )*
  888. $ TOL ) .AND. ( DBLE( N )*MXAAPQ*MXSINJ.LT.TOL ) ) THEN
  889. GO TO 1994
  890. END IF
  891. *
  892. IF( NOTROT.GE.EMPTSW )GO TO 1994
  893. *
  894. 1993 CONTINUE
  895. * end i=1:NSWEEP loop
  896. *
  897. * #:( Reaching this point means that the procedure has not converged.
  898. INFO = NSWEEP - 1
  899. GO TO 1995
  900. *
  901. 1994 CONTINUE
  902. * #:) Reaching this point means numerical convergence after the i-th
  903. * sweep.
  904. *
  905. INFO = 0
  906. * #:) INFO = 0 confirms successful iterations.
  907. 1995 CONTINUE
  908. *
  909. * Sort the vector SVA() of column norms.
  910. DO 5991 p = 1, N - 1
  911. q = IDAMAX( N-p+1, SVA( p ), 1 ) + p - 1
  912. IF( p.NE.q ) THEN
  913. TEMP1 = SVA( p )
  914. SVA( p ) = SVA( q )
  915. SVA( q ) = TEMP1
  916. AAPQ = D( p )
  917. D( p ) = D( q )
  918. D( q ) = AAPQ
  919. CALL ZSWAP( M, A( 1, p ), 1, A( 1, q ), 1 )
  920. IF( RSVEC )CALL ZSWAP( MVL, V( 1, p ), 1, V( 1, q ), 1 )
  921. END IF
  922. 5991 CONTINUE
  923. *
  924. RETURN
  925. * ..
  926. * .. END OF ZGSVJ0
  927. * ..
  928. END