You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

zlatrs3.f 24 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681
  1. *> \brief \b ZLATRS3 solves a triangular system of equations with the scale factors set to prevent overflow.
  2. *
  3. * Definition:
  4. * ===========
  5. *
  6. * SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
  7. * X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
  8. *
  9. * .. Scalar Arguments ..
  10. * CHARACTER DIAG, NORMIN, TRANS, UPLO
  11. * INTEGER INFO, LDA, LWORK, LDX, N, NRHS
  12. * ..
  13. * .. Array Arguments ..
  14. * DOUBLE PRECISION CNORM( * ), SCALE( * ), WORK( * )
  15. * COMPLEX*16 A( LDA, * ), X( LDX, * )
  16. * ..
  17. *
  18. *
  19. *> \par Purpose:
  20. * =============
  21. *>
  22. *> \verbatim
  23. *>
  24. *> ZLATRS3 solves one of the triangular systems
  25. *>
  26. *> A * X = B * diag(scale), A**T * X = B * diag(scale), or
  27. *> A**H * X = B * diag(scale)
  28. *>
  29. *> with scaling to prevent overflow. Here A is an upper or lower
  30. *> triangular matrix, A**T denotes the transpose of A, A**H denotes the
  31. *> conjugate transpose of A. X and B are n-by-nrhs matrices and scale
  32. *> is an nrhs-element vector of scaling factors. A scaling factor scale(j)
  33. *> is usually less than or equal to 1, chosen such that X(:,j) is less
  34. *> than the overflow threshold. If the matrix A is singular (A(j,j) = 0
  35. *> for some j), then a non-trivial solution to A*X = 0 is returned. If
  36. *> the system is so badly scaled that the solution cannot be represented
  37. *> as (1/scale(k))*X(:,k), then x(:,k) = 0 and scale(k) is returned.
  38. *>
  39. *> This is a BLAS-3 version of LATRS for solving several right
  40. *> hand sides simultaneously.
  41. *>
  42. *> \endverbatim
  43. *
  44. * Arguments:
  45. * ==========
  46. *
  47. *> \param[in] UPLO
  48. *> \verbatim
  49. *> UPLO is CHARACTER*1
  50. *> Specifies whether the matrix A is upper or lower triangular.
  51. *> = 'U': Upper triangular
  52. *> = 'L': Lower triangular
  53. *> \endverbatim
  54. *>
  55. *> \param[in] TRANS
  56. *> \verbatim
  57. *> TRANS is CHARACTER*1
  58. *> Specifies the operation applied to A.
  59. *> = 'N': Solve A * x = s*b (No transpose)
  60. *> = 'T': Solve A**T* x = s*b (Transpose)
  61. *> = 'C': Solve A**T* x = s*b (Conjugate transpose)
  62. *> \endverbatim
  63. *>
  64. *> \param[in] DIAG
  65. *> \verbatim
  66. *> DIAG is CHARACTER*1
  67. *> Specifies whether or not the matrix A is unit triangular.
  68. *> = 'N': Non-unit triangular
  69. *> = 'U': Unit triangular
  70. *> \endverbatim
  71. *>
  72. *> \param[in] NORMIN
  73. *> \verbatim
  74. *> NORMIN is CHARACTER*1
  75. *> Specifies whether CNORM has been set or not.
  76. *> = 'Y': CNORM contains the column norms on entry
  77. *> = 'N': CNORM is not set on entry. On exit, the norms will
  78. *> be computed and stored in CNORM.
  79. *> \endverbatim
  80. *>
  81. *> \param[in] N
  82. *> \verbatim
  83. *> N is INTEGER
  84. *> The order of the matrix A. N >= 0.
  85. *> \endverbatim
  86. *>
  87. *> \param[in] NRHS
  88. *> \verbatim
  89. *> NRHS is INTEGER
  90. *> The number of columns of X. NRHS >= 0.
  91. *> \endverbatim
  92. *>
  93. *> \param[in] A
  94. *> \verbatim
  95. *> A is COMPLEX*16 array, dimension (LDA,N)
  96. *> The triangular matrix A. If UPLO = 'U', the leading n by n
  97. *> upper triangular part of the array A contains the upper
  98. *> triangular matrix, and the strictly lower triangular part of
  99. *> A is not referenced. If UPLO = 'L', the leading n by n lower
  100. *> triangular part of the array A contains the lower triangular
  101. *> matrix, and the strictly upper triangular part of A is not
  102. *> referenced. If DIAG = 'U', the diagonal elements of A are
  103. *> also not referenced and are assumed to be 1.
  104. *> \endverbatim
  105. *>
  106. *> \param[in] LDA
  107. *> \verbatim
  108. *> LDA is INTEGER
  109. *> The leading dimension of the array A. LDA >= max (1,N).
  110. *> \endverbatim
  111. *>
  112. *> \param[in,out] X
  113. *> \verbatim
  114. *> X is COMPLEX*16 array, dimension (LDX,NRHS)
  115. *> On entry, the right hand side B of the triangular system.
  116. *> On exit, X is overwritten by the solution matrix X.
  117. *> \endverbatim
  118. *>
  119. *> \param[in] LDX
  120. *> \verbatim
  121. *> LDX is INTEGER
  122. *> The leading dimension of the array X. LDX >= max (1,N).
  123. *> \endverbatim
  124. *>
  125. *> \param[out] SCALE
  126. *> \verbatim
  127. *> SCALE is DOUBLE PRECISION array, dimension (NRHS)
  128. *> The scaling factor s(k) is for the triangular system
  129. *> A * x(:,k) = s(k)*b(:,k) or A**T* x(:,k) = s(k)*b(:,k).
  130. *> If SCALE = 0, the matrix A is singular or badly scaled.
  131. *> If A(j,j) = 0 is encountered, a non-trivial vector x(:,k)
  132. *> that is an exact or approximate solution to A*x(:,k) = 0
  133. *> is returned. If the system so badly scaled that solution
  134. *> cannot be presented as x(:,k) * 1/s(k), then x(:,k) = 0
  135. *> is returned.
  136. *> \endverbatim
  137. *>
  138. *> \param[in,out] CNORM
  139. *> \verbatim
  140. *> CNORM is DOUBLE PRECISION array, dimension (N)
  141. *>
  142. *> If NORMIN = 'Y', CNORM is an input argument and CNORM(j)
  143. *> contains the norm of the off-diagonal part of the j-th column
  144. *> of A. If TRANS = 'N', CNORM(j) must be greater than or equal
  145. *> to the infinity-norm, and if TRANS = 'T' or 'C', CNORM(j)
  146. *> must be greater than or equal to the 1-norm.
  147. *>
  148. *> If NORMIN = 'N', CNORM is an output argument and CNORM(j)
  149. *> returns the 1-norm of the offdiagonal part of the j-th column
  150. *> of A.
  151. *> \endverbatim
  152. *>
  153. *> \param[out] WORK
  154. *> \verbatim
  155. *> WORK is DOUBLE PRECISION array, dimension (LWORK).
  156. *> On exit, if INFO = 0, WORK(1) returns the optimal size of
  157. *> WORK.
  158. *> \endverbatim
  159. *>
  160. *> \param[in] LWORK
  161. *> \verbatim
  162. *> LWORK is INTEGER
  163. *> The dimension of the array WORK.
  164. *>
  165. *> If MIN(N,NRHS) = 0, LWORK >= 1, else
  166. *> LWORK >= MAX(1, 2*NBA * MAX(NBA, MIN(NRHS, 32)), where
  167. *> NBA = (N + NB - 1)/NB and NB is the optimal block size.
  168. *>
  169. *> If LWORK = -1, then a workspace query is assumed; the routine
  170. *> only calculates the optimal dimensions of the WORK array, returns
  171. *> this value as the first entry of the WORK array, and no error
  172. *> message related to LWORK is issued by XERBLA.
  173. *> \endverbatim
  174. *>
  175. *> \param[out] INFO
  176. *> \verbatim
  177. *> INFO is INTEGER
  178. *> = 0: successful exit
  179. *> < 0: if INFO = -k, the k-th argument had an illegal value
  180. *> \endverbatim
  181. *
  182. * Authors:
  183. * ========
  184. *
  185. *> \author Univ. of Tennessee
  186. *> \author Univ. of California Berkeley
  187. *> \author Univ. of Colorado Denver
  188. *> \author NAG Ltd.
  189. *
  190. *> \ingroup latrs3
  191. *> \par Further Details:
  192. * =====================
  193. * \verbatim
  194. * The algorithm follows the structure of a block triangular solve.
  195. * The diagonal block is solved with a call to the robust the triangular
  196. * solver LATRS for every right-hand side RHS = 1, ..., NRHS
  197. * op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS ),
  198. * where op( A ) = A or op( A ) = A**T or op( A ) = A**H.
  199. * The linear block updates operate on block columns of X,
  200. * B( I, K ) - op(A( I, J )) * X( J, K )
  201. * and use GEMM. To avoid overflow in the linear block update, the worst case
  202. * growth is estimated. For every RHS, a scale factor s <= 1.0 is computed
  203. * such that
  204. * || s * B( I, RHS )||_oo
  205. * + || op(A( I, J )) ||_oo * || s * X( J, RHS ) ||_oo <= Overflow threshold
  206. *
  207. * Once all columns of a block column have been rescaled (BLAS-1), the linear
  208. * update is executed with GEMM without overflow.
  209. *
  210. * To limit rescaling, local scale factors track the scaling of column segments.
  211. * There is one local scale factor s( I, RHS ) per block row I = 1, ..., NBA
  212. * per right-hand side column RHS = 1, ..., NRHS. The global scale factor
  213. * SCALE( RHS ) is chosen as the smallest local scale factor s( I, RHS )
  214. * I = 1, ..., NBA.
  215. * A triangular solve op(A( J, J )) * x( J, RHS ) = SCALOC * b( J, RHS )
  216. * updates the local scale factor s( J, RHS ) := s( J, RHS ) * SCALOC. The
  217. * linear update of potentially inconsistently scaled vector segments
  218. * s( I, RHS ) * b( I, RHS ) - op(A( I, J )) * ( s( J, RHS )* x( J, RHS ) )
  219. * computes a consistent scaling SCAMIN = MIN( s(I, RHS ), s(J, RHS) ) and,
  220. * if necessary, rescales the blocks prior to calling GEMM.
  221. *
  222. * \endverbatim
  223. * =====================================================================
  224. * References:
  225. * C. C. Kjelgaard Mikkelsen, A. B. Schwarz and L. Karlsson (2019).
  226. * Parallel robust solution of triangular linear systems. Concurrency
  227. * and Computation: Practice and Experience, 31(19), e5064.
  228. *
  229. * Contributor:
  230. * Angelika Schwarz, Umea University, Sweden.
  231. *
  232. * =====================================================================
  233. SUBROUTINE ZLATRS3( UPLO, TRANS, DIAG, NORMIN, N, NRHS, A, LDA,
  234. $ X, LDX, SCALE, CNORM, WORK, LWORK, INFO )
  235. IMPLICIT NONE
  236. *
  237. * .. Scalar Arguments ..
  238. CHARACTER DIAG, TRANS, NORMIN, UPLO
  239. INTEGER INFO, LDA, LWORK, LDX, N, NRHS
  240. * ..
  241. * .. Array Arguments ..
  242. COMPLEX*16 A( LDA, * ), X( LDX, * )
  243. DOUBLE PRECISION CNORM( * ), SCALE( * ), WORK( * )
  244. * ..
  245. *
  246. * =====================================================================
  247. *
  248. * .. Parameters ..
  249. DOUBLE PRECISION ZERO, ONE
  250. PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
  251. COMPLEX*16 CZERO, CONE
  252. PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ) )
  253. PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ) )
  254. INTEGER NBMAX, NBMIN, NBRHS, NRHSMIN
  255. PARAMETER ( NRHSMIN = 2, NBRHS = 32 )
  256. PARAMETER ( NBMIN = 8, NBMAX = 64 )
  257. * ..
  258. * .. Local Arrays ..
  259. DOUBLE PRECISION W( NBMAX ), XNRM( NBRHS )
  260. * ..
  261. * .. Local Scalars ..
  262. LOGICAL LQUERY, NOTRAN, NOUNIT, UPPER
  263. INTEGER AWRK, I, IFIRST, IINC, ILAST, II, I1, I2, J,
  264. $ JFIRST, JINC, JLAST, J1, J2, K, KK, K1, K2,
  265. $ LANRM, LDS, LSCALE, NB, NBA, NBX, RHS, LWMIN
  266. DOUBLE PRECISION ANRM, BIGNUM, BNRM, RSCAL, SCAL, SCALOC,
  267. $ SCAMIN, SMLNUM, TMAX
  268. * ..
  269. * .. External Functions ..
  270. LOGICAL LSAME
  271. INTEGER ILAENV
  272. DOUBLE PRECISION DLAMCH, ZLANGE, DLARMM
  273. EXTERNAL ILAENV, LSAME, DLAMCH, ZLANGE, DLARMM
  274. * ..
  275. * .. External Subroutines ..
  276. EXTERNAL ZLATRS, ZDSCAL, XERBLA
  277. * ..
  278. * .. Intrinsic Functions ..
  279. INTRINSIC ABS, MAX, MIN
  280. * ..
  281. * .. Executable Statements ..
  282. *
  283. INFO = 0
  284. UPPER = LSAME( UPLO, 'U' )
  285. NOTRAN = LSAME( TRANS, 'N' )
  286. NOUNIT = LSAME( DIAG, 'N' )
  287. LQUERY = ( LWORK.EQ.-1 )
  288. *
  289. * Partition A and X into blocks.
  290. *
  291. NB = MAX( NBMIN, ILAENV( 1, 'ZLATRS', '', N, N, -1, -1 ) )
  292. NB = MIN( NBMAX, NB )
  293. NBA = MAX( 1, (N + NB - 1) / NB )
  294. NBX = MAX( 1, (NRHS + NBRHS - 1) / NBRHS )
  295. *
  296. * Compute the workspace
  297. *
  298. * The workspace comprises two parts.
  299. * The first part stores the local scale factors. Each simultaneously
  300. * computed right-hand side requires one local scale factor per block
  301. * row. WORK( I + KK * LDS ) is the scale factor of the vector
  302. * segment associated with the I-th block row and the KK-th vector
  303. * in the block column.
  304. *
  305. LSCALE = NBA * MAX( NBA, MIN( NRHS, NBRHS ) )
  306. LDS = NBA
  307. *
  308. * The second part stores upper bounds of the triangular A. There are
  309. * a total of NBA x NBA blocks, of which only the upper triangular
  310. * part or the lower triangular part is referenced. The upper bound of
  311. * the block A( I, J ) is stored as WORK( AWRK + I + J * NBA ).
  312. *
  313. LANRM = NBA * NBA
  314. AWRK = LSCALE
  315. *
  316. IF( MIN( N, NRHS ).EQ.0 ) THEN
  317. LWMIN = 1
  318. ELSE
  319. LWMIN = LSCALE + LANRM
  320. END IF
  321. WORK( 1 ) = LWMIN
  322. *
  323. * Test the input parameters.
  324. *
  325. IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  326. INFO = -1
  327. ELSE IF( .NOT.NOTRAN .AND. .NOT.LSAME( TRANS, 'T' ) .AND. .NOT.
  328. $ LSAME( TRANS, 'C' ) ) THEN
  329. INFO = -2
  330. ELSE IF( .NOT.NOUNIT .AND. .NOT.LSAME( DIAG, 'U' ) ) THEN
  331. INFO = -3
  332. ELSE IF( .NOT.LSAME( NORMIN, 'Y' ) .AND. .NOT.
  333. $ LSAME( NORMIN, 'N' ) ) THEN
  334. INFO = -4
  335. ELSE IF( N.LT.0 ) THEN
  336. INFO = -5
  337. ELSE IF( NRHS.LT.0 ) THEN
  338. INFO = -6
  339. ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  340. INFO = -8
  341. ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
  342. INFO = -10
  343. ELSE IF( .NOT.LQUERY .AND. LWORK.LT.LWMIN ) THEN
  344. INFO = -14
  345. END IF
  346. IF( INFO.NE.0 ) THEN
  347. CALL XERBLA( 'ZLATRS3', -INFO )
  348. RETURN
  349. ELSE IF( LQUERY ) THEN
  350. RETURN
  351. END IF
  352. *
  353. * Initialize scaling factors
  354. *
  355. DO KK = 1, NRHS
  356. SCALE( KK ) = ONE
  357. END DO
  358. *
  359. * Quick return if possible
  360. *
  361. IF( MIN( N, NRHS ).EQ.0 )
  362. $ RETURN
  363. *
  364. * Determine machine dependent constant to control overflow.
  365. *
  366. BIGNUM = DLAMCH( 'Overflow' )
  367. SMLNUM = DLAMCH( 'Safe Minimum' )
  368. *
  369. * Use unblocked code for small problems
  370. *
  371. IF( NRHS.LT.NRHSMIN ) THEN
  372. CALL ZLATRS( UPLO, TRANS, DIAG, NORMIN, N, A, LDA, X( 1, 1),
  373. $ SCALE( 1 ), CNORM, INFO )
  374. DO K = 2, NRHS
  375. CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', N, A, LDA, X( 1, K ),
  376. $ SCALE( K ), CNORM, INFO )
  377. END DO
  378. RETURN
  379. END IF
  380. *
  381. * Compute norms of blocks of A excluding diagonal blocks and find
  382. * the block with the largest norm TMAX.
  383. *
  384. TMAX = ZERO
  385. DO J = 1, NBA
  386. J1 = (J-1)*NB + 1
  387. J2 = MIN( J*NB, N ) + 1
  388. IF ( UPPER ) THEN
  389. IFIRST = 1
  390. ILAST = J - 1
  391. ELSE
  392. IFIRST = J + 1
  393. ILAST = NBA
  394. END IF
  395. DO I = IFIRST, ILAST
  396. I1 = (I-1)*NB + 1
  397. I2 = MIN( I*NB, N ) + 1
  398. *
  399. * Compute upper bound of A( I1:I2-1, J1:J2-1 ).
  400. *
  401. IF( NOTRAN ) THEN
  402. ANRM = ZLANGE( 'I', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
  403. WORK( AWRK + I+(J-1)*NBA ) = ANRM
  404. ELSE
  405. ANRM = ZLANGE( '1', I2-I1, J2-J1, A( I1, J1 ), LDA, W )
  406. WORK( AWRK + J+(I-1) * NBA ) = ANRM
  407. END IF
  408. TMAX = MAX( TMAX, ANRM )
  409. END DO
  410. END DO
  411. *
  412. IF( .NOT. TMAX.LE.DLAMCH('Overflow') ) THEN
  413. *
  414. * Some matrix entries have huge absolute value. At least one upper
  415. * bound norm( A(I1:I2-1, J1:J2-1), 'I') is not a valid floating-point
  416. * number, either due to overflow in LANGE or due to Inf in A.
  417. * Fall back to LATRS. Set normin = 'N' for every right-hand side to
  418. * force computation of TSCAL in LATRS to avoid the likely overflow
  419. * in the computation of the column norms CNORM.
  420. *
  421. DO K = 1, NRHS
  422. CALL ZLATRS( UPLO, TRANS, DIAG, 'N', N, A, LDA, X( 1, K ),
  423. $ SCALE( K ), CNORM, INFO )
  424. END DO
  425. RETURN
  426. END IF
  427. *
  428. * Every right-hand side requires workspace to store NBA local scale
  429. * factors. To save workspace, X is computed successively in block columns
  430. * of width NBRHS, requiring a total of NBA x NBRHS space. If sufficient
  431. * workspace is available, larger values of NBRHS or NBRHS = NRHS are viable.
  432. DO K = 1, NBX
  433. * Loop over block columns (index = K) of X and, for column-wise scalings,
  434. * over individual columns (index = KK).
  435. * K1: column index of the first column in X( J, K )
  436. * K2: column index of the first column in X( J, K+1 )
  437. * so the K2 - K1 is the column count of the block X( J, K )
  438. K1 = (K-1)*NBRHS + 1
  439. K2 = MIN( K*NBRHS, NRHS ) + 1
  440. *
  441. * Initialize local scaling factors of current block column X( J, K )
  442. *
  443. DO KK = 1, K2 - K1
  444. DO I = 1, NBA
  445. WORK( I+KK*LDS ) = ONE
  446. END DO
  447. END DO
  448. *
  449. IF( NOTRAN ) THEN
  450. *
  451. * Solve A * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
  452. *
  453. IF( UPPER ) THEN
  454. JFIRST = NBA
  455. JLAST = 1
  456. JINC = -1
  457. ELSE
  458. JFIRST = 1
  459. JLAST = NBA
  460. JINC = 1
  461. END IF
  462. ELSE
  463. *
  464. * Solve op(A) * X(:, K1:K2-1) = B * diag(scale(K1:K2-1))
  465. * where op(A) = A**T or op(A) = A**H
  466. *
  467. IF( UPPER ) THEN
  468. JFIRST = 1
  469. JLAST = NBA
  470. JINC = 1
  471. ELSE
  472. JFIRST = NBA
  473. JLAST = 1
  474. JINC = -1
  475. END IF
  476. END IF
  477. DO J = JFIRST, JLAST, JINC
  478. * J1: row index of the first row in A( J, J )
  479. * J2: row index of the first row in A( J+1, J+1 )
  480. * so that J2 - J1 is the row count of the block A( J, J )
  481. J1 = (J-1)*NB + 1
  482. J2 = MIN( J*NB, N ) + 1
  483. *
  484. * Solve op(A( J, J )) * X( J, RHS ) = SCALOC * B( J, RHS )
  485. *
  486. DO KK = 1, K2 - K1
  487. RHS = K1 + KK - 1
  488. IF( KK.EQ.1 ) THEN
  489. CALL ZLATRS( UPLO, TRANS, DIAG, 'N', J2-J1,
  490. $ A( J1, J1 ), LDA, X( J1, RHS ),
  491. $ SCALOC, CNORM, INFO )
  492. ELSE
  493. CALL ZLATRS( UPLO, TRANS, DIAG, 'Y', J2-J1,
  494. $ A( J1, J1 ), LDA, X( J1, RHS ),
  495. $ SCALOC, CNORM, INFO )
  496. END IF
  497. * Find largest absolute value entry in the vector segment
  498. * X( J1:J2-1, RHS ) as an upper bound for the worst case
  499. * growth in the linear updates.
  500. XNRM( KK ) = ZLANGE( 'I', J2-J1, 1, X( J1, RHS ),
  501. $ LDX, W )
  502. *
  503. IF( SCALOC .EQ. ZERO ) THEN
  504. * LATRS found that A is singular through A(j,j) = 0.
  505. * Reset the computation x(1:n) = 0, x(j) = 1, SCALE = 0
  506. * and compute op(A)*x = 0. Note that X(J1:J2-1, KK) is
  507. * set by LATRS.
  508. SCALE( RHS ) = ZERO
  509. DO II = 1, J1-1
  510. X( II, KK ) = CZERO
  511. END DO
  512. DO II = J2, N
  513. X( II, KK ) = CZERO
  514. END DO
  515. * Discard the local scale factors.
  516. DO II = 1, NBA
  517. WORK( II+KK*LDS ) = ONE
  518. END DO
  519. SCALOC = ONE
  520. ELSE IF( SCALOC*WORK( J+KK*LDS ) .EQ. ZERO ) THEN
  521. * LATRS computed a valid scale factor, but combined with
  522. * the current scaling the solution does not have a
  523. * scale factor > 0.
  524. *
  525. * Set WORK( J+KK*LDS ) to smallest valid scale
  526. * factor and increase SCALOC accordingly.
  527. SCAL = WORK( J+KK*LDS ) / SMLNUM
  528. SCALOC = SCALOC * SCAL
  529. WORK( J+KK*LDS ) = SMLNUM
  530. * If LATRS overestimated the growth, x may be
  531. * rescaled to preserve a valid combined scale
  532. * factor WORK( J, KK ) > 0.
  533. RSCAL = ONE / SCALOC
  534. IF( XNRM( KK )*RSCAL .LE. BIGNUM ) THEN
  535. XNRM( KK ) = XNRM( KK ) * RSCAL
  536. CALL ZDSCAL( J2-J1, RSCAL, X( J1, RHS ), 1 )
  537. SCALOC = ONE
  538. ELSE
  539. * The system op(A) * x = b is badly scaled and its
  540. * solution cannot be represented as (1/scale) * x.
  541. * Set x to zero. This approach deviates from LATRS
  542. * where a completely meaningless non-zero vector
  543. * is returned that is not a solution to op(A) * x = b.
  544. SCALE( RHS ) = ZERO
  545. DO II = 1, N
  546. X( II, KK ) = CZERO
  547. END DO
  548. * Discard the local scale factors.
  549. DO II = 1, NBA
  550. WORK( II+KK*LDS ) = ONE
  551. END DO
  552. SCALOC = ONE
  553. END IF
  554. END IF
  555. SCALOC = SCALOC * WORK( J+KK*LDS )
  556. WORK( J+KK*LDS ) = SCALOC
  557. END DO
  558. *
  559. * Linear block updates
  560. *
  561. IF( NOTRAN ) THEN
  562. IF( UPPER ) THEN
  563. IFIRST = J - 1
  564. ILAST = 1
  565. IINC = -1
  566. ELSE
  567. IFIRST = J + 1
  568. ILAST = NBA
  569. IINC = 1
  570. END IF
  571. ELSE
  572. IF( UPPER ) THEN
  573. IFIRST = J + 1
  574. ILAST = NBA
  575. IINC = 1
  576. ELSE
  577. IFIRST = J - 1
  578. ILAST = 1
  579. IINC = -1
  580. END IF
  581. END IF
  582. *
  583. DO I = IFIRST, ILAST, IINC
  584. * I1: row index of the first column in X( I, K )
  585. * I2: row index of the first column in X( I+1, K )
  586. * so the I2 - I1 is the row count of the block X( I, K )
  587. I1 = (I-1)*NB + 1
  588. I2 = MIN( I*NB, N ) + 1
  589. *
  590. * Prepare the linear update to be executed with GEMM.
  591. * For each column, compute a consistent scaling, a
  592. * scaling factor to survive the linear update, and
  593. * rescale the column segments, if necessary. Then
  594. * the linear update is safely executed.
  595. *
  596. DO KK = 1, K2 - K1
  597. RHS = K1 + KK - 1
  598. * Compute consistent scaling
  599. SCAMIN = MIN( WORK( I+KK*LDS), WORK( J+KK*LDS ) )
  600. *
  601. * Compute scaling factor to survive the linear update
  602. * simulating consistent scaling.
  603. *
  604. BNRM = ZLANGE( 'I', I2-I1, 1, X( I1, RHS ), LDX, W )
  605. BNRM = BNRM*( SCAMIN / WORK( I+KK*LDS ) )
  606. XNRM( KK ) = XNRM( KK )*( SCAMIN / WORK( J+KK*LDS) )
  607. ANRM = WORK( AWRK + I+(J-1)*NBA )
  608. SCALOC = DLARMM( ANRM, XNRM( KK ), BNRM )
  609. *
  610. * Simultaneously apply the robust update factor and the
  611. * consistency scaling factor to X( I, KK ) and X( J, KK ).
  612. *
  613. SCAL = ( SCAMIN / WORK( I+KK*LDS) )*SCALOC
  614. IF( SCAL.NE.ONE ) THEN
  615. CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
  616. WORK( I+KK*LDS ) = SCAMIN*SCALOC
  617. END IF
  618. *
  619. SCAL = ( SCAMIN / WORK( J+KK*LDS ) )*SCALOC
  620. IF( SCAL.NE.ONE ) THEN
  621. CALL ZDSCAL( J2-J1, SCAL, X( J1, RHS ), 1 )
  622. WORK( J+KK*LDS ) = SCAMIN*SCALOC
  623. END IF
  624. END DO
  625. *
  626. IF( NOTRAN ) THEN
  627. *
  628. * B( I, K ) := B( I, K ) - A( I, J ) * X( J, K )
  629. *
  630. CALL ZGEMM( 'N', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  631. $ A( I1, J1 ), LDA, X( J1, K1 ), LDX,
  632. $ CONE, X( I1, K1 ), LDX )
  633. ELSE IF( LSAME( TRANS, 'T' ) ) THEN
  634. *
  635. * B( I, K ) := B( I, K ) - A( I, J )**T * X( J, K )
  636. *
  637. CALL ZGEMM( 'T', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  638. $ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
  639. $ CONE, X( I1, K1 ), LDX )
  640. ELSE
  641. *
  642. * B( I, K ) := B( I, K ) - A( I, J )**H * X( J, K )
  643. *
  644. CALL ZGEMM( 'C', 'N', I2-I1, K2-K1, J2-J1, -CONE,
  645. $ A( J1, I1 ), LDA, X( J1, K1 ), LDX,
  646. $ CONE, X( I1, K1 ), LDX )
  647. END IF
  648. END DO
  649. END DO
  650. *
  651. * Reduce local scaling factors
  652. *
  653. DO KK = 1, K2 - K1
  654. RHS = K1 + KK - 1
  655. DO I = 1, NBA
  656. SCALE( RHS ) = MIN( SCALE( RHS ), WORK( I+KK*LDS ) )
  657. END DO
  658. END DO
  659. *
  660. * Realize consistent scaling
  661. *
  662. DO KK = 1, K2 - K1
  663. RHS = K1 + KK - 1
  664. IF( SCALE( RHS ).NE.ONE .AND. SCALE( RHS ).NE. ZERO ) THEN
  665. DO I = 1, NBA
  666. I1 = (I - 1) * NB + 1
  667. I2 = MIN( I * NB, N ) + 1
  668. SCAL = SCALE( RHS ) / WORK( I+KK*LDS )
  669. IF( SCAL.NE.ONE )
  670. $ CALL ZDSCAL( I2-I1, SCAL, X( I1, RHS ), 1 )
  671. END DO
  672. END IF
  673. END DO
  674. END DO
  675. RETURN
  676. *
  677. * End of ZLATRS3
  678. *
  679. END