You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dtrsyl3.f 45 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241
  1. *> \brief \b DTRSYL3
  2. *
  3. * Definition:
  4. * ===========
  5. *
  6. *
  7. *> \par Purpose
  8. * =============
  9. *>
  10. *> \verbatim
  11. *>
  12. *> DTRSYL3 solves the real Sylvester matrix equation:
  13. *>
  14. *> op(A)*X + X*op(B) = scale*C or
  15. *> op(A)*X - X*op(B) = scale*C,
  16. *>
  17. *> where op(A) = A or A**T, and A and B are both upper quasi-
  18. *> triangular. A is M-by-M and B is N-by-N; the right hand side C and
  19. *> the solution X are M-by-N; and scale is an output scale factor, set
  20. *> <= 1 to avoid overflow in X.
  21. *>
  22. *> A and B must be in Schur canonical form (as returned by DHSEQR), that
  23. *> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
  24. *> each 2-by-2 diagonal block has its diagonal elements equal and its
  25. *> off-diagonal elements of opposite sign.
  26. *>
  27. *> This is the block version of the algorithm.
  28. *> \endverbatim
  29. *
  30. * Arguments
  31. * =========
  32. *
  33. *> \param[in] TRANA
  34. *> \verbatim
  35. *> TRANA is CHARACTER*1
  36. *> Specifies the option op(A):
  37. *> = 'N': op(A) = A (No transpose)
  38. *> = 'T': op(A) = A**T (Transpose)
  39. *> = 'C': op(A) = A**H (Conjugate transpose = Transpose)
  40. *> \endverbatim
  41. *>
  42. *> \param[in] TRANB
  43. *> \verbatim
  44. *> TRANB is CHARACTER*1
  45. *> Specifies the option op(B):
  46. *> = 'N': op(B) = B (No transpose)
  47. *> = 'T': op(B) = B**T (Transpose)
  48. *> = 'C': op(B) = B**H (Conjugate transpose = Transpose)
  49. *> \endverbatim
  50. *>
  51. *> \param[in] ISGN
  52. *> \verbatim
  53. *> ISGN is INTEGER
  54. *> Specifies the sign in the equation:
  55. *> = +1: solve op(A)*X + X*op(B) = scale*C
  56. *> = -1: solve op(A)*X - X*op(B) = scale*C
  57. *> \endverbatim
  58. *>
  59. *> \param[in] M
  60. *> \verbatim
  61. *> M is INTEGER
  62. *> The order of the matrix A, and the number of rows in the
  63. *> matrices X and C. M >= 0.
  64. *> \endverbatim
  65. *>
  66. *> \param[in] N
  67. *> \verbatim
  68. *> N is INTEGER
  69. *> The order of the matrix B, and the number of columns in the
  70. *> matrices X and C. N >= 0.
  71. *> \endverbatim
  72. *>
  73. *> \param[in] A
  74. *> \verbatim
  75. *> A is DOUBLE PRECISION array, dimension (LDA,M)
  76. *> The upper quasi-triangular matrix A, in Schur canonical form.
  77. *> \endverbatim
  78. *>
  79. *> \param[in] LDA
  80. *> \verbatim
  81. *> LDA is INTEGER
  82. *> The leading dimension of the array A. LDA >= max(1,M).
  83. *> \endverbatim
  84. *>
  85. *> \param[in] B
  86. *> \verbatim
  87. *> B is DOUBLE PRECISION array, dimension (LDB,N)
  88. *> The upper quasi-triangular matrix B, in Schur canonical form.
  89. *> \endverbatim
  90. *>
  91. *> \param[in] LDB
  92. *> \verbatim
  93. *> LDB is INTEGER
  94. *> The leading dimension of the array B. LDB >= max(1,N).
  95. *> \endverbatim
  96. *>
  97. *> \param[in,out] C
  98. *> \verbatim
  99. *> C is DOUBLE PRECISION array, dimension (LDC,N)
  100. *> On entry, the M-by-N right hand side matrix C.
  101. *> On exit, C is overwritten by the solution matrix X.
  102. *> \endverbatim
  103. *>
  104. *> \param[in] LDC
  105. *> \verbatim
  106. *> LDC is INTEGER
  107. *> The leading dimension of the array C. LDC >= max(1,M)
  108. *> \endverbatim
  109. *>
  110. *> \param[out] SCALE
  111. *> \verbatim
  112. *> SCALE is DOUBLE PRECISION
  113. *> The scale factor, scale, set <= 1 to avoid overflow in X.
  114. *> \endverbatim
  115. *>
  116. *> \param[out] IWORK
  117. *> \verbatim
  118. *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
  119. *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
  120. *> \endverbatim
  121. *>
  122. *> \param[in] LIWORK
  123. *> \verbatim
  124. *> IWORK is INTEGER
  125. *> The dimension of the array IWORK. LIWORK >= ((M + NB - 1) / NB + 1)
  126. *> + ((N + NB - 1) / NB + 1), where NB is the optimal block size.
  127. *>
  128. *> If LIWORK = -1, then a workspace query is assumed; the routine
  129. *> only calculates the optimal dimension of the IWORK array,
  130. *> returns this value as the first entry of the IWORK array, and
  131. *> no error message related to LIWORK is issued by XERBLA.
  132. *> \endverbatim
  133. *>
  134. *> \param[out] SWORK
  135. *> \verbatim
  136. *> SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
  137. *> MAX(1,COLS)).
  138. *> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
  139. *> and SWORK(2) returns the optimal COLS.
  140. *> \endverbatim
  141. *>
  142. *> \param[in] LDSWORK
  143. *> \verbatim
  144. *> LDSWORK is INTEGER
  145. *> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
  146. *> and NB is the optimal block size.
  147. *>
  148. *> If LDSWORK = -1, then a workspace query is assumed; the routine
  149. *> only calculates the optimal dimensions of the SWORK matrix,
  150. *> returns these values as the first and second entry of the SWORK
  151. *> matrix, and no error message related LWORK is issued by XERBLA.
  152. *> \endverbatim
  153. *>
  154. *> \param[out] INFO
  155. *> \verbatim
  156. *> INFO is INTEGER
  157. *> = 0: successful exit
  158. *> < 0: if INFO = -i, the i-th argument had an illegal value
  159. *> = 1: A and B have common or very close eigenvalues; perturbed
  160. *> values were used to solve the equation (but the matrices
  161. *> A and B are unchanged).
  162. *> \endverbatim
  163. *
  164. * =====================================================================
  165. * References:
  166. * E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
  167. * algorithms: The triangular Sylvester equation, ACM Transactions
  168. * on Mathematical Software (TOMS), volume 29, pages 218--243.
  169. *
  170. * A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
  171. * Solution of the Triangular Sylvester Equation. Lecture Notes in
  172. * Computer Science, vol 12043, pages 82--92, Springer.
  173. *
  174. * Contributor:
  175. * Angelika Schwarz, Umea University, Sweden.
  176. *
  177. * =====================================================================
  178. SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
  179. $ LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK,
  180. $ INFO )
  181. IMPLICIT NONE
  182. *
  183. * .. Scalar Arguments ..
  184. CHARACTER TRANA, TRANB
  185. INTEGER INFO, ISGN, LDA, LDB, LDC, M, N,
  186. $ LIWORK, LDSWORK
  187. DOUBLE PRECISION SCALE
  188. * ..
  189. * .. Array Arguments ..
  190. INTEGER IWORK( * )
  191. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ),
  192. $ SWORK( LDSWORK, * )
  193. * ..
  194. * .. Parameters ..
  195. DOUBLE PRECISION ZERO, ONE
  196. PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
  197. * ..
  198. * .. Local Scalars ..
  199. LOGICAL NOTRNA, NOTRNB, LQUERY, SKIP
  200. INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
  201. $ K, K1, K2, L, L1, L2, LL, NBA, NB, NBB, PC
  202. DOUBLE PRECISION ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
  203. $ SCAMIN, SGN, XNRM, BUF, SMLNUM
  204. * ..
  205. * .. Local Arrays ..
  206. DOUBLE PRECISION WNRM( MAX( M, N ) )
  207. * ..
  208. * .. External Functions ..
  209. LOGICAL LSAME
  210. INTEGER ILAENV
  211. DOUBLE PRECISION DLANGE, DLAMCH, DLARMM
  212. EXTERNAL DLANGE, DLAMCH, DLARMM, ILAENV, LSAME
  213. * ..
  214. * .. External Subroutines ..
  215. EXTERNAL DGEMM, DLASCL, DSCAL, DTRSYL, XERBLA
  216. * ..
  217. * .. Intrinsic Functions ..
  218. INTRINSIC ABS, DBLE, EXPONENT, MAX, MIN
  219. * ..
  220. * .. Executable Statements ..
  221. *
  222. * Decode and Test input parameters
  223. *
  224. NOTRNA = LSAME( TRANA, 'N' )
  225. NOTRNB = LSAME( TRANB, 'N' )
  226. *
  227. * Use the same block size for all matrices.
  228. *
  229. NB = MAX(8, ILAENV( 1, 'DTRSYL', '', M, N, -1, -1) )
  230. *
  231. * Compute number of blocks in A and B
  232. *
  233. NBA = MAX( 1, (M + NB - 1) / NB )
  234. NBB = MAX( 1, (N + NB - 1) / NB )
  235. *
  236. * Compute workspace
  237. *
  238. INFO = 0
  239. LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 )
  240. IWORK( 1 ) = NBA + NBB + 2
  241. IF( LQUERY ) THEN
  242. LDSWORK = 2
  243. SWORK( 1, 1 ) = MAX( NBA, NBB )
  244. SWORK( 2, 1 ) = 2 * NBB + NBA
  245. END IF
  246. *
  247. * Test the input arguments
  248. *
  249. IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
  250. $ LSAME( TRANA, 'C' ) ) THEN
  251. INFO = -1
  252. ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
  253. $ LSAME( TRANB, 'C' ) ) THEN
  254. INFO = -2
  255. ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
  256. INFO = -3
  257. ELSE IF( M.LT.0 ) THEN
  258. INFO = -4
  259. ELSE IF( N.LT.0 ) THEN
  260. INFO = -5
  261. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  262. INFO = -7
  263. ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
  264. INFO = -9
  265. ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
  266. INFO = -11
  267. END IF
  268. IF( INFO.NE.0 ) THEN
  269. CALL XERBLA( 'DTRSYL3', -INFO )
  270. RETURN
  271. ELSE IF( LQUERY ) THEN
  272. RETURN
  273. END IF
  274. *
  275. * Quick return if possible
  276. *
  277. SCALE = ONE
  278. IF( M.EQ.0 .OR. N.EQ.0 )
  279. $ RETURN
  280. *
  281. * Use unblocked code for small problems or if insufficient
  282. * workspaces are provided
  283. *
  284. IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) .OR.
  285. $ LIWORK.LT.IWORK(1) ) THEN
  286. CALL DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
  287. $ C, LDC, SCALE, INFO )
  288. RETURN
  289. END IF
  290. *
  291. * Set constants to control overflow
  292. *
  293. SMLNUM = DLAMCH( 'S' )
  294. BIGNUM = ONE / SMLNUM
  295. *
  296. * Partition A such that 2-by-2 blocks on the diagonal are not split
  297. *
  298. SKIP = .FALSE.
  299. DO I = 1, NBA
  300. IWORK( I ) = ( I - 1 ) * NB + 1
  301. END DO
  302. IWORK( NBA + 1 ) = M + 1
  303. DO K = 1, NBA
  304. L1 = IWORK( K )
  305. L2 = IWORK( K + 1 ) - 1
  306. DO L = L1, L2
  307. IF( SKIP ) THEN
  308. SKIP = .FALSE.
  309. CYCLE
  310. END IF
  311. IF( L.GE.M ) THEN
  312. * A( M, M ) is a 1-by-1 block
  313. CYCLE
  314. END IF
  315. IF( A( L, L+1 ).NE.ZERO .AND. A( L+1, L ).NE.ZERO ) THEN
  316. * Check if 2-by-2 block is split
  317. IF( L + 1 .EQ. IWORK( K + 1 ) ) THEN
  318. IWORK( K + 1 ) = IWORK( K + 1 ) + 1
  319. CYCLE
  320. END IF
  321. SKIP = .TRUE.
  322. END IF
  323. END DO
  324. END DO
  325. IWORK( NBA + 1 ) = M + 1
  326. IF( IWORK( NBA ).GE.IWORK( NBA + 1 ) ) THEN
  327. IWORK( NBA ) = IWORK( NBA + 1 )
  328. NBA = NBA - 1
  329. END IF
  330. *
  331. * Partition B such that 2-by-2 blocks on the diagonal are not split
  332. *
  333. PC = NBA + 1
  334. SKIP = .FALSE.
  335. DO I = 1, NBB
  336. IWORK( PC + I ) = ( I - 1 ) * NB + 1
  337. END DO
  338. IWORK( PC + NBB + 1 ) = N + 1
  339. DO K = 1, NBB
  340. L1 = IWORK( PC + K )
  341. L2 = IWORK( PC + K + 1 ) - 1
  342. DO L = L1, L2
  343. IF( SKIP ) THEN
  344. SKIP = .FALSE.
  345. CYCLE
  346. END IF
  347. IF( L.GE.N ) THEN
  348. * B( N, N ) is a 1-by-1 block
  349. CYCLE
  350. END IF
  351. IF( B( L, L+1 ).NE.ZERO .AND. B( L+1, L ).NE.ZERO ) THEN
  352. * Check if 2-by-2 block is split
  353. IF( L + 1 .EQ. IWORK( PC + K + 1 ) ) THEN
  354. IWORK( PC + K + 1 ) = IWORK( PC + K + 1 ) + 1
  355. CYCLE
  356. END IF
  357. SKIP = .TRUE.
  358. END IF
  359. END DO
  360. END DO
  361. IWORK( PC + NBB + 1 ) = N + 1
  362. IF( IWORK( PC + NBB ).GE.IWORK( PC + NBB + 1 ) ) THEN
  363. IWORK( PC + NBB ) = IWORK( PC + NBB + 1 )
  364. NBB = NBB - 1
  365. END IF
  366. *
  367. * Set local scaling factors - must never attain zero.
  368. *
  369. DO L = 1, NBB
  370. DO K = 1, NBA
  371. SWORK( K, L ) = ONE
  372. END DO
  373. END DO
  374. *
  375. * Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
  376. * This scaling is to ensure compatibility with TRSYL and may get flushed.
  377. *
  378. BUF = ONE
  379. *
  380. * Compute upper bounds of blocks of A and B
  381. *
  382. AWRK = NBB
  383. DO K = 1, NBA
  384. K1 = IWORK( K )
  385. K2 = IWORK( K + 1 )
  386. DO L = K, NBA
  387. L1 = IWORK( L )
  388. L2 = IWORK( L + 1 )
  389. IF( NOTRNA ) THEN
  390. SWORK( K, AWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
  391. $ A( K1, L1 ), LDA, WNRM )
  392. ELSE
  393. SWORK( L, AWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
  394. $ A( K1, L1 ), LDA, WNRM )
  395. END IF
  396. END DO
  397. END DO
  398. BWRK = NBB + NBA
  399. DO K = 1, NBB
  400. K1 = IWORK( PC + K )
  401. K2 = IWORK( PC + K + 1 )
  402. DO L = K, NBB
  403. L1 = IWORK( PC + L )
  404. L2 = IWORK( PC + L + 1 )
  405. IF( NOTRNB ) THEN
  406. SWORK( K, BWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
  407. $ B( K1, L1 ), LDB, WNRM )
  408. ELSE
  409. SWORK( L, BWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
  410. $ B( K1, L1 ), LDB, WNRM )
  411. END IF
  412. END DO
  413. END DO
  414. *
  415. SGN = DBLE( ISGN )
  416. *
  417. IF( NOTRNA .AND. NOTRNB ) THEN
  418. *
  419. * Solve A*X + ISGN*X*B = scale*C.
  420. *
  421. * The (K,L)th block of X is determined starting from
  422. * bottom-left corner column by column by
  423. *
  424. * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
  425. *
  426. * Where
  427. * M L-1
  428. * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
  429. * I=K+1 J=1
  430. *
  431. * Start loop over block rows (index = K) and block columns (index = L)
  432. *
  433. DO K = NBA, 1, -1
  434. *
  435. * K1: row index of the first row in X( K, L )
  436. * K2: row index of the first row in X( K+1, L )
  437. * so the K2 - K1 is the column count of the block X( K, L )
  438. *
  439. K1 = IWORK( K )
  440. K2 = IWORK( K + 1 )
  441. DO L = 1, NBB
  442. *
  443. * L1: column index of the first column in X( K, L )
  444. * L2: column index of the first column in X( K, L + 1)
  445. * so that L2 - L1 is the row count of the block X( K, L )
  446. *
  447. L1 = IWORK( PC + L )
  448. L2 = IWORK( PC + L + 1 )
  449. *
  450. CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  451. $ A( K1, K1 ), LDA,
  452. $ B( L1, L1 ), LDB,
  453. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  454. INFO = MAX( INFO, IINFO )
  455. *
  456. IF ( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  457. IF( SCALOC .EQ. ZERO ) THEN
  458. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  459. * is larger than the product of BIGNUM**2 and cannot be
  460. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  461. * Mark the computation as pointless.
  462. BUF = ZERO
  463. ELSE
  464. * Use second scaling factor to prevent flushing to zero.
  465. BUF = BUF*2.D0**EXPONENT( SCALOC )
  466. END IF
  467. DO JJ = 1, NBB
  468. DO LL = 1, NBA
  469. * Bound by BIGNUM to not introduce Inf. The value
  470. * is irrelevant; corresponding entries of the
  471. * solution will be flushed in consistency scaling.
  472. SWORK( LL, JJ ) = MIN( BIGNUM,
  473. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  474. END DO
  475. END DO
  476. END IF
  477. SWORK( K, L ) = SCALOC * SWORK( K, L )
  478. XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  479. $ WNRM )
  480. *
  481. DO I = K - 1, 1, -1
  482. *
  483. * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
  484. *
  485. I1 = IWORK( I )
  486. I2 = IWORK( I + 1 )
  487. *
  488. * Compute scaling factor to survive the linear update
  489. * simulating consistent scaling.
  490. *
  491. CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  492. $ LDC, WNRM )
  493. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  494. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  495. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  496. ANRM = SWORK( I, AWRK + K )
  497. SCALOC = DLARMM( ANRM, XNRM, CNRM )
  498. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  499. * Use second scaling factor to prevent flushing to zero.
  500. BUF = BUF*2.D0**EXPONENT( SCALOC )
  501. DO JJ = 1, NBB
  502. DO LL = 1, NBA
  503. SWORK( LL, JJ ) = MIN( BIGNUM,
  504. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  505. END DO
  506. END DO
  507. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  508. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  509. END IF
  510. CNRM = CNRM * SCALOC
  511. XNRM = XNRM * SCALOC
  512. *
  513. * Simultaneously apply the robust update factor and the
  514. * consistency scaling factor to C( I, L ) and C( K, L ).
  515. *
  516. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  517. IF (SCAL .NE. ONE) THEN
  518. DO JJ = L1, L2-1
  519. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
  520. END DO
  521. ENDIF
  522. *
  523. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  524. IF (SCAL .NE. ONE) THEN
  525. DO LL = L1, L2-1
  526. CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1)
  527. END DO
  528. ENDIF
  529. *
  530. * Record current scaling factor
  531. *
  532. SWORK( K, L ) = SCAMIN * SCALOC
  533. SWORK( I, L ) = SCAMIN * SCALOC
  534. *
  535. CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  536. $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
  537. $ ONE, C( I1, L1 ), LDC )
  538. *
  539. END DO
  540. *
  541. DO J = L + 1, NBB
  542. *
  543. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
  544. *
  545. J1 = IWORK( PC + J )
  546. J2 = IWORK( PC + J + 1 )
  547. *
  548. * Compute scaling factor to survive the linear update
  549. * simulating consistent scaling.
  550. *
  551. CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  552. $ LDC, WNRM )
  553. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  554. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  555. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  556. BNRM = SWORK(L, BWRK + J)
  557. SCALOC = DLARMM( BNRM, XNRM, CNRM )
  558. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  559. * Use second scaling factor to prevent flushing to zero.
  560. BUF = BUF*2.D0**EXPONENT( SCALOC )
  561. DO JJ = 1, NBB
  562. DO LL = 1, NBA
  563. SWORK( LL, JJ ) = MIN( BIGNUM,
  564. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  565. END DO
  566. END DO
  567. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  568. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  569. END IF
  570. CNRM = CNRM * SCALOC
  571. XNRM = XNRM * SCALOC
  572. *
  573. * Simultaneously apply the robust update factor and the
  574. * consistency scaling factor to C( K, J ) and C( K, L).
  575. *
  576. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  577. IF( SCAL .NE. ONE ) THEN
  578. DO LL = L1, L2-1
  579. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  580. END DO
  581. ENDIF
  582. *
  583. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  584. IF( SCAL .NE. ONE ) THEN
  585. DO JJ = J1, J2-1
  586. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  587. END DO
  588. ENDIF
  589. *
  590. * Record current scaling factor
  591. *
  592. SWORK( K, L ) = SCAMIN * SCALOC
  593. SWORK( K, J ) = SCAMIN * SCALOC
  594. *
  595. CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
  596. $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
  597. $ ONE, C( K1, J1 ), LDC )
  598. END DO
  599. END DO
  600. END DO
  601. ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
  602. *
  603. * Solve A**T*X + ISGN*X*B = scale*C.
  604. *
  605. * The (K,L)th block of X is determined starting from
  606. * upper-left corner column by column by
  607. *
  608. * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
  609. *
  610. * Where
  611. * K-1 L-1
  612. * R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
  613. * I=1 J=1
  614. *
  615. * Start loop over block rows (index = K) and block columns (index = L)
  616. *
  617. DO K = 1, NBA
  618. *
  619. * K1: row index of the first row in X( K, L )
  620. * K2: row index of the first row in X( K+1, L )
  621. * so the K2 - K1 is the column count of the block X( K, L )
  622. *
  623. K1 = IWORK( K )
  624. K2 = IWORK( K + 1 )
  625. DO L = 1, NBB
  626. *
  627. * L1: column index of the first column in X( K, L )
  628. * L2: column index of the first column in X( K, L + 1)
  629. * so that L2 - L1 is the row count of the block X( K, L )
  630. *
  631. L1 = IWORK( PC + L )
  632. L2 = IWORK( PC + L + 1 )
  633. *
  634. CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  635. $ A( K1, K1 ), LDA,
  636. $ B( L1, L1 ), LDB,
  637. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  638. INFO = MAX( INFO, IINFO )
  639. *
  640. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  641. IF( SCALOC .EQ. ZERO ) THEN
  642. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  643. * is larger than the product of BIGNUM**2 and cannot be
  644. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  645. * Mark the computation as pointless.
  646. BUF = ZERO
  647. ELSE
  648. * Use second scaling factor to prevent flushing to zero.
  649. BUF = BUF*2.D0**EXPONENT( SCALOC )
  650. END IF
  651. DO JJ = 1, NBB
  652. DO LL = 1, NBA
  653. * Bound by BIGNUM to not introduce Inf. The value
  654. * is irrelevant; corresponding entries of the
  655. * solution will be flushed in consistency scaling.
  656. SWORK( LL, JJ ) = MIN( BIGNUM,
  657. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  658. END DO
  659. END DO
  660. END IF
  661. SWORK( K, L ) = SCALOC * SWORK( K, L )
  662. XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  663. $ WNRM )
  664. *
  665. DO I = K + 1, NBA
  666. *
  667. * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
  668. *
  669. I1 = IWORK( I )
  670. I2 = IWORK( I + 1 )
  671. *
  672. * Compute scaling factor to survive the linear update
  673. * simulating consistent scaling.
  674. *
  675. CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  676. $ LDC, WNRM )
  677. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  678. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  679. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  680. ANRM = SWORK( I, AWRK + K )
  681. SCALOC = DLARMM( ANRM, XNRM, CNRM )
  682. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  683. * Use second scaling factor to prevent flushing to zero.
  684. BUF = BUF*2.D0**EXPONENT( SCALOC )
  685. DO JJ = 1, NBB
  686. DO LL = 1, NBA
  687. SWORK( LL, JJ ) = MIN( BIGNUM,
  688. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  689. END DO
  690. END DO
  691. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  692. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  693. END IF
  694. CNRM = CNRM * SCALOC
  695. XNRM = XNRM * SCALOC
  696. *
  697. * Simultaneously apply the robust update factor and the
  698. * consistency scaling factor to to C( I, L ) and C( K, L ).
  699. *
  700. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  701. IF (SCAL .NE. ONE) THEN
  702. DO LL = L1, L2-1
  703. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  704. END DO
  705. ENDIF
  706. *
  707. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  708. IF (SCAL .NE. ONE) THEN
  709. DO LL = L1, L2-1
  710. CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  711. END DO
  712. ENDIF
  713. *
  714. * Record current scaling factor
  715. *
  716. SWORK( K, L ) = SCAMIN * SCALOC
  717. SWORK( I, L ) = SCAMIN * SCALOC
  718. *
  719. CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  720. $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
  721. $ ONE, C( I1, L1 ), LDC )
  722. END DO
  723. *
  724. DO J = L + 1, NBB
  725. *
  726. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
  727. *
  728. J1 = IWORK( PC + J )
  729. J2 = IWORK( PC + J + 1 )
  730. *
  731. * Compute scaling factor to survive the linear update
  732. * simulating consistent scaling.
  733. *
  734. CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  735. $ LDC, WNRM )
  736. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  737. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  738. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  739. BNRM = SWORK( L, BWRK + J )
  740. SCALOC = DLARMM( BNRM, XNRM, CNRM )
  741. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  742. * Use second scaling factor to prevent flushing to zero.
  743. BUF = BUF*2.D0**EXPONENT( SCALOC )
  744. DO JJ = 1, NBB
  745. DO LL = 1, NBA
  746. SWORK( LL, JJ ) = MIN( BIGNUM,
  747. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  748. END DO
  749. END DO
  750. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  751. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  752. END IF
  753. CNRM = CNRM * SCALOC
  754. XNRM = XNRM * SCALOC
  755. *
  756. * Simultaneously apply the robust update factor and the
  757. * consistency scaling factor to to C( K, J ) and C( K, L ).
  758. *
  759. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  760. IF( SCAL .NE. ONE ) THEN
  761. DO LL = L1, L2-1
  762. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  763. END DO
  764. ENDIF
  765. *
  766. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  767. IF( SCAL .NE. ONE ) THEN
  768. DO JJ = J1, J2-1
  769. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  770. END DO
  771. ENDIF
  772. *
  773. * Record current scaling factor
  774. *
  775. SWORK( K, L ) = SCAMIN * SCALOC
  776. SWORK( K, J ) = SCAMIN * SCALOC
  777. *
  778. CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
  779. $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
  780. $ ONE, C( K1, J1 ), LDC )
  781. END DO
  782. END DO
  783. END DO
  784. ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
  785. *
  786. * Solve A**T*X + ISGN*X*B**T = scale*C.
  787. *
  788. * The (K,L)th block of X is determined starting from
  789. * top-right corner column by column by
  790. *
  791. * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
  792. *
  793. * Where
  794. * K-1 N
  795. * R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
  796. * I=1 J=L+1
  797. *
  798. * Start loop over block rows (index = K) and block columns (index = L)
  799. *
  800. DO K = 1, NBA
  801. *
  802. * K1: row index of the first row in X( K, L )
  803. * K2: row index of the first row in X( K+1, L )
  804. * so the K2 - K1 is the column count of the block X( K, L )
  805. *
  806. K1 = IWORK( K )
  807. K2 = IWORK( K + 1 )
  808. DO L = NBB, 1, -1
  809. *
  810. * L1: column index of the first column in X( K, L )
  811. * L2: column index of the first column in X( K, L + 1)
  812. * so that L2 - L1 is the row count of the block X( K, L )
  813. *
  814. L1 = IWORK( PC + L )
  815. L2 = IWORK( PC + L + 1 )
  816. *
  817. CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  818. $ A( K1, K1 ), LDA,
  819. $ B( L1, L1 ), LDB,
  820. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  821. INFO = MAX( INFO, IINFO )
  822. *
  823. SWORK( K, L ) = SCALOC * SWORK( K, L )
  824. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  825. IF( SCALOC .EQ. ZERO ) THEN
  826. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  827. * is larger than the product of BIGNUM**2 and cannot be
  828. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  829. * Mark the computation as pointless.
  830. BUF = ZERO
  831. ELSE
  832. * Use second scaling factor to prevent flushing to zero.
  833. BUF = BUF*2.D0**EXPONENT( SCALOC )
  834. END IF
  835. DO JJ = 1, NBB
  836. DO LL = 1, NBA
  837. * Bound by BIGNUM to not introduce Inf. The value
  838. * is irrelevant; corresponding entries of the
  839. * solution will be flushed in consistency scaling.
  840. SWORK( LL, JJ ) = MIN( BIGNUM,
  841. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  842. END DO
  843. END DO
  844. END IF
  845. XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  846. $ WNRM )
  847. *
  848. DO I = K + 1, NBA
  849. *
  850. * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
  851. *
  852. I1 = IWORK( I )
  853. I2 = IWORK( I + 1 )
  854. *
  855. * Compute scaling factor to survive the linear update
  856. * simulating consistent scaling.
  857. *
  858. CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  859. $ LDC, WNRM )
  860. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  861. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  862. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  863. ANRM = SWORK( I, AWRK + K )
  864. SCALOC = DLARMM( ANRM, XNRM, CNRM )
  865. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  866. * Use second scaling factor to prevent flushing to zero.
  867. BUF = BUF*2.D0**EXPONENT( SCALOC )
  868. DO JJ = 1, NBB
  869. DO LL = 1, NBA
  870. SWORK( LL, JJ ) = MIN( BIGNUM,
  871. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  872. END DO
  873. END DO
  874. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  875. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  876. END IF
  877. CNRM = CNRM * SCALOC
  878. XNRM = XNRM * SCALOC
  879. *
  880. * Simultaneously apply the robust update factor and the
  881. * consistency scaling factor to C( I, L ) and C( K, L ).
  882. *
  883. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  884. IF (SCAL .NE. ONE) THEN
  885. DO LL = L1, L2-1
  886. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  887. END DO
  888. ENDIF
  889. *
  890. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  891. IF (SCAL .NE. ONE) THEN
  892. DO LL = L1, L2-1
  893. CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  894. END DO
  895. ENDIF
  896. *
  897. * Record current scaling factor
  898. *
  899. SWORK( K, L ) = SCAMIN * SCALOC
  900. SWORK( I, L ) = SCAMIN * SCALOC
  901. *
  902. CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  903. $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
  904. $ ONE, C( I1, L1 ), LDC )
  905. END DO
  906. *
  907. DO J = 1, L - 1
  908. *
  909. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
  910. *
  911. J1 = IWORK( PC + J )
  912. J2 = IWORK( PC + J + 1 )
  913. *
  914. * Compute scaling factor to survive the linear update
  915. * simulating consistent scaling.
  916. *
  917. CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  918. $ LDC, WNRM )
  919. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  920. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  921. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  922. BNRM = SWORK( L, BWRK + J )
  923. SCALOC = DLARMM( BNRM, XNRM, CNRM )
  924. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  925. * Use second scaling factor to prevent flushing to zero.
  926. BUF = BUF*2.D0**EXPONENT( SCALOC )
  927. DO JJ = 1, NBB
  928. DO LL = 1, NBA
  929. SWORK( LL, JJ ) = MIN( BIGNUM,
  930. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  931. END DO
  932. END DO
  933. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  934. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  935. END IF
  936. CNRM = CNRM * SCALOC
  937. XNRM = XNRM * SCALOC
  938. *
  939. * Simultaneously apply the robust update factor and the
  940. * consistency scaling factor to C( K, J ) and C( K, L ).
  941. *
  942. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  943. IF( SCAL .NE. ONE ) THEN
  944. DO LL = L1, L2-1
  945. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1)
  946. END DO
  947. ENDIF
  948. *
  949. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  950. IF( SCAL .NE. ONE ) THEN
  951. DO JJ = J1, J2-1
  952. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  953. END DO
  954. ENDIF
  955. *
  956. * Record current scaling factor
  957. *
  958. SWORK( K, L ) = SCAMIN * SCALOC
  959. SWORK( K, J ) = SCAMIN * SCALOC
  960. *
  961. CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
  962. $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
  963. $ ONE, C( K1, J1 ), LDC )
  964. END DO
  965. END DO
  966. END DO
  967. ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
  968. *
  969. * Solve A*X + ISGN*X*B**T = scale*C.
  970. *
  971. * The (K,L)th block of X is determined starting from
  972. * bottom-right corner column by column by
  973. *
  974. * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
  975. *
  976. * Where
  977. * M N
  978. * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
  979. * I=K+1 J=L+1
  980. *
  981. * Start loop over block rows (index = K) and block columns (index = L)
  982. *
  983. DO K = NBA, 1, -1
  984. *
  985. * K1: row index of the first row in X( K, L )
  986. * K2: row index of the first row in X( K+1, L )
  987. * so the K2 - K1 is the column count of the block X( K, L )
  988. *
  989. K1 = IWORK( K )
  990. K2 = IWORK( K + 1 )
  991. DO L = NBB, 1, -1
  992. *
  993. * L1: column index of the first column in X( K, L )
  994. * L2: column index of the first column in X( K, L + 1)
  995. * so that L2 - L1 is the row count of the block X( K, L )
  996. *
  997. L1 = IWORK( PC + L )
  998. L2 = IWORK( PC + L + 1 )
  999. *
  1000. CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  1001. $ A( K1, K1 ), LDA,
  1002. $ B( L1, L1 ), LDB,
  1003. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  1004. INFO = MAX( INFO, IINFO )
  1005. *
  1006. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  1007. IF( SCALOC .EQ. ZERO ) THEN
  1008. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  1009. * is larger than the product of BIGNUM**2 and cannot be
  1010. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  1011. * Mark the computation as pointless.
  1012. BUF = ZERO
  1013. ELSE
  1014. * Use second scaling factor to prevent flushing to zero.
  1015. BUF = BUF*2.D0**EXPONENT( SCALOC )
  1016. END IF
  1017. DO JJ = 1, NBB
  1018. DO LL = 1, NBA
  1019. * Bound by BIGNUM to not introduce Inf. The value
  1020. * is irrelevant; corresponding entries of the
  1021. * solution will be flushed in consistency scaling.
  1022. SWORK( LL, JJ ) = MIN( BIGNUM,
  1023. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  1024. END DO
  1025. END DO
  1026. END IF
  1027. SWORK( K, L ) = SCALOC * SWORK( K, L )
  1028. XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  1029. $ WNRM )
  1030. *
  1031. DO I = 1, K - 1
  1032. *
  1033. * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
  1034. *
  1035. I1 = IWORK( I )
  1036. I2 = IWORK( I + 1 )
  1037. *
  1038. * Compute scaling factor to survive the linear update
  1039. * simulating consistent scaling.
  1040. *
  1041. CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  1042. $ LDC, WNRM )
  1043. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  1044. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  1045. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  1046. ANRM = SWORK( I, AWRK + K )
  1047. SCALOC = DLARMM( ANRM, XNRM, CNRM )
  1048. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  1049. * Use second scaling factor to prevent flushing to zero.
  1050. BUF = BUF*2.D0**EXPONENT( SCALOC )
  1051. DO JJ = 1, NBB
  1052. DO LL = 1, NBA
  1053. SWORK( LL, JJ ) = MIN( BIGNUM,
  1054. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  1055. END DO
  1056. END DO
  1057. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  1058. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  1059. END IF
  1060. CNRM = CNRM * SCALOC
  1061. XNRM = XNRM * SCALOC
  1062. *
  1063. * Simultaneously apply the robust update factor and the
  1064. * consistency scaling factor to C( I, L ) and C( K, L ).
  1065. *
  1066. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  1067. IF (SCAL .NE. ONE) THEN
  1068. DO LL = L1, L2-1
  1069. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  1070. END DO
  1071. ENDIF
  1072. *
  1073. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  1074. IF (SCAL .NE. ONE) THEN
  1075. DO LL = L1, L2-1
  1076. CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  1077. END DO
  1078. ENDIF
  1079. *
  1080. * Record current scaling factor
  1081. *
  1082. SWORK( K, L ) = SCAMIN * SCALOC
  1083. SWORK( I, L ) = SCAMIN * SCALOC
  1084. *
  1085. CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  1086. $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
  1087. $ ONE, C( I1, L1 ), LDC )
  1088. *
  1089. END DO
  1090. *
  1091. DO J = 1, L - 1
  1092. *
  1093. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
  1094. *
  1095. J1 = IWORK( PC + J )
  1096. J2 = IWORK( PC + J + 1 )
  1097. *
  1098. * Compute scaling factor to survive the linear update
  1099. * simulating consistent scaling.
  1100. *
  1101. CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  1102. $ LDC, WNRM )
  1103. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  1104. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  1105. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  1106. BNRM = SWORK( L, BWRK + J )
  1107. SCALOC = DLARMM( BNRM, XNRM, CNRM )
  1108. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  1109. * Use second scaling factor to prevent flushing to zero.
  1110. BUF = BUF*2.D0**EXPONENT( SCALOC )
  1111. DO JJ = 1, NBB
  1112. DO LL = 1, NBA
  1113. SWORK( LL, JJ ) = MIN( BIGNUM,
  1114. $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
  1115. END DO
  1116. END DO
  1117. SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
  1118. SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
  1119. END IF
  1120. CNRM = CNRM * SCALOC
  1121. XNRM = XNRM * SCALOC
  1122. *
  1123. * Simultaneously apply the robust update factor and the
  1124. * consistency scaling factor to C( K, J ) and C( K, L ).
  1125. *
  1126. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  1127. IF( SCAL .NE. ONE ) THEN
  1128. DO JJ = L1, L2-1
  1129. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  1130. END DO
  1131. ENDIF
  1132. *
  1133. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  1134. IF( SCAL .NE. ONE ) THEN
  1135. DO JJ = J1, J2-1
  1136. CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  1137. END DO
  1138. ENDIF
  1139. *
  1140. * Record current scaling factor
  1141. *
  1142. SWORK( K, L ) = SCAMIN * SCALOC
  1143. SWORK( K, J ) = SCAMIN * SCALOC
  1144. *
  1145. CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
  1146. $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
  1147. $ ONE, C( K1, J1 ), LDC )
  1148. END DO
  1149. END DO
  1150. END DO
  1151. *
  1152. END IF
  1153. *
  1154. * Reduce local scaling factors
  1155. *
  1156. SCALE = SWORK( 1, 1 )
  1157. DO K = 1, NBA
  1158. DO L = 1, NBB
  1159. SCALE = MIN( SCALE, SWORK( K, L ) )
  1160. END DO
  1161. END DO
  1162. *
  1163. IF( SCALE .EQ. ZERO ) THEN
  1164. *
  1165. * The magnitude of the largest entry of the solution is larger
  1166. * than the product of BIGNUM**2 and cannot be represented in the
  1167. * form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
  1168. * zero and give up.
  1169. *
  1170. IWORK(1) = NBA + NBB + 2
  1171. SWORK(1,1) = MAX( NBA, NBB )
  1172. SWORK(2,1) = 2 * NBB + NBA
  1173. RETURN
  1174. END IF
  1175. *
  1176. * Realize consistent scaling
  1177. *
  1178. DO K = 1, NBA
  1179. K1 = IWORK( K )
  1180. K2 = IWORK( K + 1 )
  1181. DO L = 1, NBB
  1182. L1 = IWORK( PC + L )
  1183. L2 = IWORK( PC + L + 1 )
  1184. SCAL = SCALE / SWORK( K, L )
  1185. IF( SCAL .NE. ONE ) THEN
  1186. DO LL = L1, L2-1
  1187. CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  1188. END DO
  1189. ENDIF
  1190. END DO
  1191. END DO
  1192. *
  1193. IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
  1194. *
  1195. * Decrease SCALE as much as possible.
  1196. *
  1197. SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
  1198. BUF = BUF * SCALOC
  1199. SCALE = SCALE / SCALOC
  1200. END IF
  1201. IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
  1202. *
  1203. * In case of overly aggressive scaling during the computation,
  1204. * flushing of the global scale factor may be prevented by
  1205. * undoing some of the scaling. This step is to ensure that
  1206. * this routine flushes only scale factors that TRSYL also
  1207. * flushes and be usable as a drop-in replacement.
  1208. *
  1209. * How much can the normwise largest entry be upscaled?
  1210. *
  1211. SCAL = C( 1, 1 )
  1212. DO K = 1, M
  1213. DO L = 1, N
  1214. SCAL = MAX( SCAL, ABS( C( K, L ) ) )
  1215. END DO
  1216. END DO
  1217. *
  1218. * Increase BUF as close to 1 as possible and apply scaling.
  1219. *
  1220. SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
  1221. BUF = BUF * SCALOC
  1222. CALL DLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK(1) )
  1223. END IF
  1224. *
  1225. * Combine with buffer scaling factor. SCALE will be flushed if
  1226. * BUF is less than one here.
  1227. *
  1228. SCALE = SCALE * BUF
  1229. *
  1230. * Restore workspace dimensions
  1231. *
  1232. IWORK(1) = NBA + NBB + 2
  1233. SWORK(1,1) = MAX( NBA, NBB )
  1234. SWORK(2,1) = 2 * NBB + NBA
  1235. *
  1236. RETURN
  1237. *
  1238. * End of DTRSYL3
  1239. *
  1240. END