You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

strsyl3.f 45 kB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244
  1. *> \brief \b STRSYL3
  2. *
  3. * Definition:
  4. * ===========
  5. *
  6. *
  7. *> \par Purpose
  8. * =============
  9. *>
  10. *> \verbatim
  11. *>
  12. *> STRSYL3 solves the real Sylvester matrix equation:
  13. *>
  14. *> op(A)*X + X*op(B) = scale*C or
  15. *> op(A)*X - X*op(B) = scale*C,
  16. *>
  17. *> where op(A) = A or A**T, and A and B are both upper quasi-
  18. *> triangular. A is M-by-M and B is N-by-N; the right hand side C and
  19. *> the solution X are M-by-N; and scale is an output scale factor, set
  20. *> <= 1 to avoid overflow in X.
  21. *>
  22. *> A and B must be in Schur canonical form (as returned by SHSEQR), that
  23. *> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
  24. *> each 2-by-2 diagonal block has its diagonal elements equal and its
  25. *> off-diagonal elements of opposite sign.
  26. *>
  27. *> This is the block version of the algorithm.
  28. *> \endverbatim
  29. *
  30. * Arguments
  31. * =========
  32. *
  33. *> \param[in] TRANA
  34. *> \verbatim
  35. *> TRANA is CHARACTER*1
  36. *> Specifies the option op(A):
  37. *> = 'N': op(A) = A (No transpose)
  38. *> = 'T': op(A) = A**T (Transpose)
  39. *> = 'C': op(A) = A**H (Conjugate transpose = Transpose)
  40. *> \endverbatim
  41. *>
  42. *> \param[in] TRANB
  43. *> \verbatim
  44. *> TRANB is CHARACTER*1
  45. *> Specifies the option op(B):
  46. *> = 'N': op(B) = B (No transpose)
  47. *> = 'T': op(B) = B**T (Transpose)
  48. *> = 'C': op(B) = B**H (Conjugate transpose = Transpose)
  49. *> \endverbatim
  50. *>
  51. *> \param[in] ISGN
  52. *> \verbatim
  53. *> ISGN is INTEGER
  54. *> Specifies the sign in the equation:
  55. *> = +1: solve op(A)*X + X*op(B) = scale*C
  56. *> = -1: solve op(A)*X - X*op(B) = scale*C
  57. *> \endverbatim
  58. *>
  59. *> \param[in] M
  60. *> \verbatim
  61. *> M is INTEGER
  62. *> The order of the matrix A, and the number of rows in the
  63. *> matrices X and C. M >= 0.
  64. *> \endverbatim
  65. *>
  66. *> \param[in] N
  67. *> \verbatim
  68. *> N is INTEGER
  69. *> The order of the matrix B, and the number of columns in the
  70. *> matrices X and C. N >= 0.
  71. *> \endverbatim
  72. *>
  73. *> \param[in] A
  74. *> \verbatim
  75. *> A is REAL array, dimension (LDA,M)
  76. *> The upper quasi-triangular matrix A, in Schur canonical form.
  77. *> \endverbatim
  78. *>
  79. *> \param[in] LDA
  80. *> \verbatim
  81. *> LDA is INTEGER
  82. *> The leading dimension of the array A. LDA >= max(1,M).
  83. *> \endverbatim
  84. *>
  85. *> \param[in] B
  86. *> \verbatim
  87. *> B is REAL array, dimension (LDB,N)
  88. *> The upper quasi-triangular matrix B, in Schur canonical form.
  89. *> \endverbatim
  90. *>
  91. *> \param[in] LDB
  92. *> \verbatim
  93. *> LDB is INTEGER
  94. *> The leading dimension of the array B. LDB >= max(1,N).
  95. *> \endverbatim
  96. *>
  97. *> \param[in,out] C
  98. *> \verbatim
  99. *> C is REAL array, dimension (LDC,N)
  100. *> On entry, the M-by-N right hand side matrix C.
  101. *> On exit, C is overwritten by the solution matrix X.
  102. *> \endverbatim
  103. *>
  104. *> \param[in] LDC
  105. *> \verbatim
  106. *> LDC is INTEGER
  107. *> The leading dimension of the array C. LDC >= max(1,M)
  108. *> \endverbatim
  109. *>
  110. *> \param[out] SCALE
  111. *> \verbatim
  112. *> SCALE is REAL
  113. *> The scale factor, scale, set <= 1 to avoid overflow in X.
  114. *> \endverbatim
  115. *>
  116. *> \param[out] IWORK
  117. *> \verbatim
  118. *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
  119. *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
  120. *> \endverbatim
  121. *>
  122. *> \param[in] LIWORK
  123. *> \verbatim
  124. *> IWORK is INTEGER
  125. *> The dimension of the array IWORK. LIWORK >= ((M + NB - 1) / NB + 1)
  126. *> + ((N + NB - 1) / NB + 1), where NB is the optimal block size.
  127. *>
  128. *> If LIWORK = -1, then a workspace query is assumed; the routine
  129. *> only calculates the optimal dimension of the IWORK array,
  130. *> returns this value as the first entry of the IWORK array, and
  131. *> no error message related to LIWORK is issued by XERBLA.
  132. *> \endverbatim
  133. *>
  134. *> \param[out] SWORK
  135. *> \verbatim
  136. *> SWORK is REAL array, dimension (MAX(2, ROWS),
  137. *> MAX(1,COLS)).
  138. *> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
  139. *> and SWORK(2) returns the optimal COLS.
  140. *> \endverbatim
  141. *>
  142. *> \param[in] LDSWORK
  143. *> \verbatim
  144. *> LDSWORK is INTEGER
  145. *> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
  146. *> and NB is the optimal block size.
  147. *>
  148. *> If LDSWORK = -1, then a workspace query is assumed; the routine
  149. *> only calculates the optimal dimensions of the SWORK matrix,
  150. *> returns these values as the first and second entry of the SWORK
  151. *> matrix, and no error message related LWORK is issued by XERBLA.
  152. *> \endverbatim
  153. *>
  154. *> \param[out] INFO
  155. *> \verbatim
  156. *> INFO is INTEGER
  157. *> = 0: successful exit
  158. *> < 0: if INFO = -i, the i-th argument had an illegal value
  159. *> = 1: A and B have common or very close eigenvalues; perturbed
  160. *> values were used to solve the equation (but the matrices
  161. *> A and B are unchanged).
  162. *> \endverbatim
  163. *
  164. * =====================================================================
  165. * References:
  166. * E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
  167. * algorithms: The triangular Sylvester equation, ACM Transactions
  168. * on Mathematical Software (TOMS), volume 29, pages 218--243.
  169. *
  170. * A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
  171. * Solution of the Triangular Sylvester Equation. Lecture Notes in
  172. * Computer Science, vol 12043, pages 82--92, Springer.
  173. *
  174. * Contributor:
  175. * Angelika Schwarz, Umea University, Sweden.
  176. *
  177. * =====================================================================
  178. SUBROUTINE STRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
  179. $ LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK,
  180. $ INFO )
  181. IMPLICIT NONE
  182. *
  183. * .. Scalar Arguments ..
  184. CHARACTER TRANA, TRANB
  185. INTEGER INFO, ISGN, LDA, LDB, LDC, M, N,
  186. $ LIWORK, LDSWORK
  187. REAL SCALE
  188. * ..
  189. * .. Array Arguments ..
  190. INTEGER IWORK( * )
  191. REAL A( LDA, * ), B( LDB, * ), C( LDC, * ),
  192. $ SWORK( LDSWORK, * )
  193. * ..
  194. * .. Parameters ..
  195. REAL ZERO, ONE
  196. PARAMETER ( ZERO = 0.0E+0, ONE = 1.0E+0 )
  197. * ..
  198. * .. Local Scalars ..
  199. LOGICAL NOTRNA, NOTRNB, LQUERY, SKIP
  200. INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
  201. $ K, K1, K2, L, L1, L2, LL, NBA, NB, NBB, PC
  202. REAL ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
  203. $ SCAMIN, SGN, XNRM, BUF, SMLNUM
  204. * ..
  205. * .. Local Arrays ..
  206. REAL WNRM( MAX( M, N ) )
  207. * ..
  208. * .. External Functions ..
  209. LOGICAL LSAME
  210. INTEGER ILAENV
  211. REAL SLANGE, SLAMCH, SLARMM
  212. EXTERNAL SLANGE, SLAMCH, SLARMM, ILAENV, LSAME
  213. * ..
  214. * .. External Subroutines ..
  215. EXTERNAL SGEMM, SLASCL, SSCAL, STRSYL, XERBLA
  216. * ..
  217. * .. Intrinsic Functions ..
  218. INTRINSIC ABS, EXPONENT, MAX, MIN, REAL
  219. * ..
  220. * .. Executable Statements ..
  221. *
  222. * Decode and Test input parameters
  223. *
  224. NOTRNA = LSAME( TRANA, 'N' )
  225. NOTRNB = LSAME( TRANB, 'N' )
  226. *
  227. * Use the same block size for all matrices.
  228. *
  229. NB = MAX(8, ILAENV( 1, 'STRSYL', '', M, N, -1, -1) )
  230. *
  231. * Compute number of blocks in A and B
  232. *
  233. NBA = MAX( 1, (M + NB - 1) / NB )
  234. NBB = MAX( 1, (N + NB - 1) / NB )
  235. *
  236. * Compute workspace
  237. *
  238. INFO = 0
  239. LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 )
  240. IWORK( 1 ) = NBA + NBB + 2
  241. IF( LQUERY ) THEN
  242. LDSWORK = 2
  243. SWORK( 1, 1 ) = MAX( NBA, NBB )
  244. SWORK( 2, 1 ) = 2 * NBB + NBA
  245. END IF
  246. *
  247. * Test the input arguments
  248. *
  249. IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
  250. $ LSAME( TRANA, 'C' ) ) THEN
  251. INFO = -1
  252. ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
  253. $ LSAME( TRANB, 'C' ) ) THEN
  254. INFO = -2
  255. ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
  256. INFO = -3
  257. ELSE IF( M.LT.0 ) THEN
  258. INFO = -4
  259. ELSE IF( N.LT.0 ) THEN
  260. INFO = -5
  261. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  262. INFO = -7
  263. ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
  264. INFO = -9
  265. ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
  266. INFO = -11
  267. ELSE IF( .NOT.LQUERY .AND. LIWORK.LT.IWORK(1) ) THEN
  268. INFO = -14
  269. ELSE IF( .NOT.LQUERY .AND. LDSWORK.LT.MAX( NBA, NBB ) ) THEN
  270. INFO = -16
  271. END IF
  272. IF( INFO.NE.0 ) THEN
  273. CALL XERBLA( 'STRSYL3', -INFO )
  274. RETURN
  275. ELSE IF( LQUERY ) THEN
  276. RETURN
  277. END IF
  278. *
  279. * Quick return if possible
  280. *
  281. SCALE = ONE
  282. IF( M.EQ.0 .OR. N.EQ.0 )
  283. $ RETURN
  284. *
  285. * Use unblocked code for small problems or if insufficient
  286. * workspaces are provided
  287. *
  288. IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) .OR.
  289. $ LIWORK.LT.IWORK(1) ) THEN
  290. CALL STRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
  291. $ C, LDC, SCALE, INFO )
  292. RETURN
  293. END IF
  294. *
  295. * Set constants to control overflow
  296. *
  297. SMLNUM = SLAMCH( 'S' )
  298. BIGNUM = ONE / SMLNUM
  299. *
  300. * Partition A such that 2-by-2 blocks on the diagonal are not split
  301. *
  302. SKIP = .FALSE.
  303. DO I = 1, NBA
  304. IWORK( I ) = ( I - 1 ) * NB + 1
  305. END DO
  306. IWORK( NBA + 1 ) = M + 1
  307. DO K = 1, NBA
  308. L1 = IWORK( K )
  309. L2 = IWORK( K + 1 ) - 1
  310. DO L = L1, L2
  311. IF( SKIP ) THEN
  312. SKIP = .FALSE.
  313. CYCLE
  314. END IF
  315. IF( L.GE.M ) THEN
  316. * A( M, M ) is a 1-by-1 block
  317. CYCLE
  318. END IF
  319. IF( A( L, L+1 ).NE.ZERO .AND. A( L+1, L ).NE.ZERO ) THEN
  320. * Check if 2-by-2 block is split
  321. IF( L + 1 .EQ. IWORK( K + 1 ) ) THEN
  322. IWORK( K + 1 ) = IWORK( K + 1 ) + 1
  323. CYCLE
  324. END IF
  325. SKIP = .TRUE.
  326. END IF
  327. END DO
  328. END DO
  329. IWORK( NBA + 1 ) = M + 1
  330. IF( IWORK( NBA ).GE.IWORK( NBA + 1 ) ) THEN
  331. IWORK( NBA ) = IWORK( NBA + 1 )
  332. NBA = NBA - 1
  333. END IF
  334. *
  335. * Partition B such that 2-by-2 blocks on the diagonal are not split
  336. *
  337. PC = NBA + 1
  338. SKIP = .FALSE.
  339. DO I = 1, NBB
  340. IWORK( PC + I ) = ( I - 1 ) * NB + 1
  341. END DO
  342. IWORK( PC + NBB + 1 ) = N + 1
  343. DO K = 1, NBB
  344. L1 = IWORK( PC + K )
  345. L2 = IWORK( PC + K + 1 ) - 1
  346. DO L = L1, L2
  347. IF( SKIP ) THEN
  348. SKIP = .FALSE.
  349. CYCLE
  350. END IF
  351. IF( L.GE.N ) THEN
  352. * B( N, N ) is a 1-by-1 block
  353. CYCLE
  354. END IF
  355. IF( B( L, L+1 ).NE.ZERO .AND. B( L+1, L ).NE.ZERO ) THEN
  356. * Check if 2-by-2 block is split
  357. IF( L + 1 .EQ. IWORK( PC + K + 1 ) ) THEN
  358. IWORK( PC + K + 1 ) = IWORK( PC + K + 1 ) + 1
  359. CYCLE
  360. END IF
  361. SKIP = .TRUE.
  362. END IF
  363. END DO
  364. END DO
  365. IWORK( PC + NBB + 1 ) = N + 1
  366. IF( IWORK( PC + NBB ).GE.IWORK( PC + NBB + 1 ) ) THEN
  367. IWORK( PC + NBB ) = IWORK( PC + NBB + 1 )
  368. NBB = NBB - 1
  369. END IF
  370. *
  371. * Set local scaling factors - must never attain zero.
  372. *
  373. DO L = 1, NBB
  374. DO K = 1, NBA
  375. SWORK( K, L ) = ONE
  376. END DO
  377. END DO
  378. *
  379. * Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
  380. * This scaling is to ensure compatibility with TRSYL and may get flushed.
  381. *
  382. BUF = ONE
  383. *
  384. * Compute upper bounds of blocks of A and B
  385. *
  386. AWRK = NBB
  387. DO K = 1, NBA
  388. K1 = IWORK( K )
  389. K2 = IWORK( K + 1 )
  390. DO L = K, NBA
  391. L1 = IWORK( L )
  392. L2 = IWORK( L + 1 )
  393. IF( NOTRNA ) THEN
  394. SWORK( K, AWRK + L ) = SLANGE( 'I', K2-K1, L2-L1,
  395. $ A( K1, L1 ), LDA, WNRM )
  396. ELSE
  397. SWORK( L, AWRK + K ) = SLANGE( '1', K2-K1, L2-L1,
  398. $ A( K1, L1 ), LDA, WNRM )
  399. END IF
  400. END DO
  401. END DO
  402. BWRK = NBB + NBA
  403. DO K = 1, NBB
  404. K1 = IWORK( PC + K )
  405. K2 = IWORK( PC + K + 1 )
  406. DO L = K, NBB
  407. L1 = IWORK( PC + L )
  408. L2 = IWORK( PC + L + 1 )
  409. IF( NOTRNB ) THEN
  410. SWORK( K, BWRK + L ) = SLANGE( 'I', K2-K1, L2-L1,
  411. $ B( K1, L1 ), LDB, WNRM )
  412. ELSE
  413. SWORK( L, BWRK + K ) = SLANGE( '1', K2-K1, L2-L1,
  414. $ B( K1, L1 ), LDB, WNRM )
  415. END IF
  416. END DO
  417. END DO
  418. *
  419. SGN = REAL( ISGN )
  420. *
  421. IF( NOTRNA .AND. NOTRNB ) THEN
  422. *
  423. * Solve A*X + ISGN*X*B = scale*C.
  424. *
  425. * The (K,L)th block of X is determined starting from
  426. * bottom-left corner column by column by
  427. *
  428. * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
  429. *
  430. * Where
  431. * M L-1
  432. * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
  433. * I=K+1 J=1
  434. *
  435. * Start loop over block rows (index = K) and block columns (index = L)
  436. *
  437. DO K = NBA, 1, -1
  438. *
  439. * K1: row index of the first row in X( K, L )
  440. * K2: row index of the first row in X( K+1, L )
  441. * so the K2 - K1 is the column count of the block X( K, L )
  442. *
  443. K1 = IWORK( K )
  444. K2 = IWORK( K + 1 )
  445. DO L = 1, NBB
  446. *
  447. * L1: column index of the first column in X( K, L )
  448. * L2: column index of the first column in X( K, L + 1)
  449. * so that L2 - L1 is the row count of the block X( K, L )
  450. *
  451. L1 = IWORK( PC + L )
  452. L2 = IWORK( PC + L + 1 )
  453. *
  454. CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  455. $ A( K1, K1 ), LDA,
  456. $ B( L1, L1 ), LDB,
  457. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  458. INFO = MAX( INFO, IINFO )
  459. *
  460. IF ( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  461. IF( SCALOC .EQ. ZERO ) THEN
  462. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  463. * is larger than the product of BIGNUM**2 and cannot be
  464. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  465. * Mark the computation as pointless.
  466. BUF = ZERO
  467. ELSE
  468. * Use second scaling factor to prevent flushing to zero.
  469. BUF = BUF*2.E0**EXPONENT( SCALOC )
  470. END IF
  471. DO JJ = 1, NBB
  472. DO LL = 1, NBA
  473. * Bound by BIGNUM to not introduce Inf. The value
  474. * is irrelevant; corresponding entries of the
  475. * solution will be flushed in consistency scaling.
  476. SWORK( LL, JJ ) = MIN( BIGNUM,
  477. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  478. END DO
  479. END DO
  480. END IF
  481. SWORK( K, L ) = SCALOC * SWORK( K, L )
  482. XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  483. $ WNRM )
  484. *
  485. DO I = K - 1, 1, -1
  486. *
  487. * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
  488. *
  489. I1 = IWORK( I )
  490. I2 = IWORK( I + 1 )
  491. *
  492. * Compute scaling factor to survive the linear update
  493. * simulating consistent scaling.
  494. *
  495. CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  496. $ LDC, WNRM )
  497. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  498. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  499. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  500. ANRM = SWORK( I, AWRK + K )
  501. SCALOC = SLARMM( ANRM, XNRM, CNRM )
  502. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  503. * Use second scaling factor to prevent flushing to zero.
  504. BUF = BUF*2.E0**EXPONENT( SCALOC )
  505. DO JJ = 1, NBB
  506. DO LL = 1, NBA
  507. SWORK( LL, JJ ) = MIN( BIGNUM,
  508. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  509. END DO
  510. END DO
  511. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  512. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  513. END IF
  514. CNRM = CNRM * SCALOC
  515. XNRM = XNRM * SCALOC
  516. *
  517. * Simultaneously apply the robust update factor and the
  518. * consistency scaling factor to C( I, L ) and C( K, L ).
  519. *
  520. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  521. IF (SCAL .NE. ONE) THEN
  522. DO JJ = L1, L2-1
  523. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
  524. END DO
  525. ENDIF
  526. *
  527. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  528. IF (SCAL .NE. ONE) THEN
  529. DO LL = L1, L2-1
  530. CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1)
  531. END DO
  532. ENDIF
  533. *
  534. * Record current scaling factor
  535. *
  536. SWORK( K, L ) = SCAMIN * SCALOC
  537. SWORK( I, L ) = SCAMIN * SCALOC
  538. *
  539. CALL SGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  540. $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
  541. $ ONE, C( I1, L1 ), LDC )
  542. *
  543. END DO
  544. *
  545. DO J = L + 1, NBB
  546. *
  547. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
  548. *
  549. J1 = IWORK( PC + J )
  550. J2 = IWORK( PC + J + 1 )
  551. *
  552. * Compute scaling factor to survive the linear update
  553. * simulating consistent scaling.
  554. *
  555. CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  556. $ LDC, WNRM )
  557. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  558. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  559. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  560. BNRM = SWORK(L, BWRK + J)
  561. SCALOC = SLARMM( BNRM, XNRM, CNRM )
  562. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  563. * Use second scaling factor to prevent flushing to zero.
  564. BUF = BUF*2.E0**EXPONENT( SCALOC )
  565. DO JJ = 1, NBB
  566. DO LL = 1, NBA
  567. SWORK( LL, JJ ) = MIN( BIGNUM,
  568. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  569. END DO
  570. END DO
  571. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  572. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  573. END IF
  574. CNRM = CNRM * SCALOC
  575. XNRM = XNRM * SCALOC
  576. *
  577. * Simultaneously apply the robust update factor and the
  578. * consistency scaling factor to C( K, J ) and C( K, L).
  579. *
  580. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  581. IF( SCAL .NE. ONE ) THEN
  582. DO LL = L1, L2-1
  583. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  584. END DO
  585. ENDIF
  586. *
  587. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  588. IF( SCAL .NE. ONE ) THEN
  589. DO JJ = J1, J2-1
  590. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  591. END DO
  592. ENDIF
  593. *
  594. * Record current scaling factor
  595. *
  596. SWORK( K, L ) = SCAMIN * SCALOC
  597. SWORK( K, J ) = SCAMIN * SCALOC
  598. *
  599. CALL SGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
  600. $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
  601. $ ONE, C( K1, J1 ), LDC )
  602. END DO
  603. END DO
  604. END DO
  605. ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
  606. *
  607. * Solve A**T*X + ISGN*X*B = scale*C.
  608. *
  609. * The (K,L)th block of X is determined starting from
  610. * upper-left corner column by column by
  611. *
  612. * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
  613. *
  614. * Where
  615. * K-1 L-1
  616. * R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
  617. * I=1 J=1
  618. *
  619. * Start loop over block rows (index = K) and block columns (index = L)
  620. *
  621. DO K = 1, NBA
  622. *
  623. * K1: row index of the first row in X( K, L )
  624. * K2: row index of the first row in X( K+1, L )
  625. * so the K2 - K1 is the column count of the block X( K, L )
  626. *
  627. K1 = IWORK( K )
  628. K2 = IWORK( K + 1 )
  629. DO L = 1, NBB
  630. *
  631. * L1: column index of the first column in X( K, L )
  632. * L2: column index of the first column in X( K, L + 1)
  633. * so that L2 - L1 is the row count of the block X( K, L )
  634. *
  635. L1 = IWORK( PC + L )
  636. L2 = IWORK( PC + L + 1 )
  637. *
  638. CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  639. $ A( K1, K1 ), LDA,
  640. $ B( L1, L1 ), LDB,
  641. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  642. INFO = MAX( INFO, IINFO )
  643. *
  644. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  645. IF( SCALOC .EQ. ZERO ) THEN
  646. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  647. * is larger than the product of BIGNUM**2 and cannot be
  648. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  649. * Mark the computation as pointless.
  650. BUF = ZERO
  651. ELSE
  652. * Use second scaling factor to prevent flushing to zero.
  653. BUF = BUF*2.E0**EXPONENT( SCALOC )
  654. END IF
  655. DO JJ = 1, NBB
  656. DO LL = 1, NBA
  657. * Bound by BIGNUM to not introduce Inf. The value
  658. * is irrelevant; corresponding entries of the
  659. * solution will be flushed in consistency scaling.
  660. SWORK( LL, JJ ) = MIN( BIGNUM,
  661. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  662. END DO
  663. END DO
  664. END IF
  665. SWORK( K, L ) = SCALOC * SWORK( K, L )
  666. XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  667. $ WNRM )
  668. *
  669. DO I = K + 1, NBA
  670. *
  671. * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
  672. *
  673. I1 = IWORK( I )
  674. I2 = IWORK( I + 1 )
  675. *
  676. * Compute scaling factor to survive the linear update
  677. * simulating consistent scaling.
  678. *
  679. CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  680. $ LDC, WNRM )
  681. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  682. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  683. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  684. ANRM = SWORK( I, AWRK + K )
  685. SCALOC = SLARMM( ANRM, XNRM, CNRM )
  686. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  687. * Use second scaling factor to prevent flushing to zero.
  688. BUF = BUF*2.E0**EXPONENT( SCALOC )
  689. DO JJ = 1, NBB
  690. DO LL = 1, NBA
  691. SWORK( LL, JJ ) = MIN( BIGNUM,
  692. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  693. END DO
  694. END DO
  695. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  696. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  697. END IF
  698. CNRM = CNRM * SCALOC
  699. XNRM = XNRM * SCALOC
  700. *
  701. * Simultaneously apply the robust update factor and the
  702. * consistency scaling factor to to C( I, L ) and C( K, L ).
  703. *
  704. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  705. IF (SCAL .NE. ONE) THEN
  706. DO LL = L1, L2-1
  707. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  708. END DO
  709. ENDIF
  710. *
  711. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  712. IF (SCAL .NE. ONE) THEN
  713. DO LL = L1, L2-1
  714. CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  715. END DO
  716. ENDIF
  717. *
  718. * Record current scaling factor
  719. *
  720. SWORK( K, L ) = SCAMIN * SCALOC
  721. SWORK( I, L ) = SCAMIN * SCALOC
  722. *
  723. CALL SGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  724. $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
  725. $ ONE, C( I1, L1 ), LDC )
  726. END DO
  727. *
  728. DO J = L + 1, NBB
  729. *
  730. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
  731. *
  732. J1 = IWORK( PC + J )
  733. J2 = IWORK( PC + J + 1 )
  734. *
  735. * Compute scaling factor to survive the linear update
  736. * simulating consistent scaling.
  737. *
  738. CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  739. $ LDC, WNRM )
  740. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  741. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  742. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  743. BNRM = SWORK( L, BWRK + J )
  744. SCALOC = SLARMM( BNRM, XNRM, CNRM )
  745. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  746. * Use second scaling factor to prevent flushing to zero.
  747. BUF = BUF*2.E0**EXPONENT( SCALOC )
  748. DO JJ = 1, NBB
  749. DO LL = 1, NBA
  750. SWORK( LL, JJ ) = MIN( BIGNUM,
  751. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  752. END DO
  753. END DO
  754. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  755. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  756. END IF
  757. CNRM = CNRM * SCALOC
  758. XNRM = XNRM * SCALOC
  759. *
  760. * Simultaneously apply the robust update factor and the
  761. * consistency scaling factor to to C( K, J ) and C( K, L ).
  762. *
  763. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  764. IF( SCAL .NE. ONE ) THEN
  765. DO LL = L1, L2-1
  766. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  767. END DO
  768. ENDIF
  769. *
  770. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  771. IF( SCAL .NE. ONE ) THEN
  772. DO JJ = J1, J2-1
  773. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  774. END DO
  775. ENDIF
  776. *
  777. * Record current scaling factor
  778. *
  779. SWORK( K, L ) = SCAMIN * SCALOC
  780. SWORK( K, J ) = SCAMIN * SCALOC
  781. *
  782. CALL SGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
  783. $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
  784. $ ONE, C( K1, J1 ), LDC )
  785. END DO
  786. END DO
  787. END DO
  788. ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
  789. *
  790. * Solve A**T*X + ISGN*X*B**T = scale*C.
  791. *
  792. * The (K,L)th block of X is determined starting from
  793. * top-right corner column by column by
  794. *
  795. * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
  796. *
  797. * Where
  798. * K-1 N
  799. * R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
  800. * I=1 J=L+1
  801. *
  802. * Start loop over block rows (index = K) and block columns (index = L)
  803. *
  804. DO K = 1, NBA
  805. *
  806. * K1: row index of the first row in X( K, L )
  807. * K2: row index of the first row in X( K+1, L )
  808. * so the K2 - K1 is the column count of the block X( K, L )
  809. *
  810. K1 = IWORK( K )
  811. K2 = IWORK( K + 1 )
  812. DO L = NBB, 1, -1
  813. *
  814. * L1: column index of the first column in X( K, L )
  815. * L2: column index of the first column in X( K, L + 1)
  816. * so that L2 - L1 is the row count of the block X( K, L )
  817. *
  818. L1 = IWORK( PC + L )
  819. L2 = IWORK( PC + L + 1 )
  820. *
  821. CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  822. $ A( K1, K1 ), LDA,
  823. $ B( L1, L1 ), LDB,
  824. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  825. INFO = MAX( INFO, IINFO )
  826. *
  827. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  828. IF( SCALOC .EQ. ZERO ) THEN
  829. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  830. * is larger than the product of BIGNUM**2 and cannot be
  831. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  832. * Mark the computation as pointless.
  833. BUF = ZERO
  834. ELSE
  835. * Use second scaling factor to prevent flushing to zero.
  836. BUF = BUF*2.E0**EXPONENT( SCALOC )
  837. END IF
  838. DO JJ = 1, NBB
  839. DO LL = 1, NBA
  840. * Bound by BIGNUM to not introduce Inf. The value
  841. * is irrelevant; corresponding entries of the
  842. * solution will be flushed in consistency scaling.
  843. SWORK( LL, JJ ) = MIN( BIGNUM,
  844. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  845. END DO
  846. END DO
  847. END IF
  848. SWORK( K, L ) = SCALOC * SWORK( K, L )
  849. XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  850. $ WNRM )
  851. *
  852. DO I = K + 1, NBA
  853. *
  854. * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
  855. *
  856. I1 = IWORK( I )
  857. I2 = IWORK( I + 1 )
  858. *
  859. * Compute scaling factor to survive the linear update
  860. * simulating consistent scaling.
  861. *
  862. CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  863. $ LDC, WNRM )
  864. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  865. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  866. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  867. ANRM = SWORK( I, AWRK + K )
  868. SCALOC = SLARMM( ANRM, XNRM, CNRM )
  869. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  870. * Use second scaling factor to prevent flushing to zero.
  871. BUF = BUF*2.E0**EXPONENT( SCALOC )
  872. DO JJ = 1, NBB
  873. DO LL = 1, NBA
  874. SWORK( LL, JJ ) = MIN( BIGNUM,
  875. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  876. END DO
  877. END DO
  878. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  879. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  880. END IF
  881. CNRM = CNRM * SCALOC
  882. XNRM = XNRM * SCALOC
  883. *
  884. * Simultaneously apply the robust update factor and the
  885. * consistency scaling factor to C( I, L ) and C( K, L ).
  886. *
  887. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  888. IF (SCAL .NE. ONE) THEN
  889. DO LL = L1, L2-1
  890. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  891. END DO
  892. ENDIF
  893. *
  894. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  895. IF (SCAL .NE. ONE) THEN
  896. DO LL = L1, L2-1
  897. CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  898. END DO
  899. ENDIF
  900. *
  901. * Record current scaling factor
  902. *
  903. SWORK( K, L ) = SCAMIN * SCALOC
  904. SWORK( I, L ) = SCAMIN * SCALOC
  905. *
  906. CALL SGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  907. $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
  908. $ ONE, C( I1, L1 ), LDC )
  909. END DO
  910. *
  911. DO J = 1, L - 1
  912. *
  913. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
  914. *
  915. J1 = IWORK( PC + J )
  916. J2 = IWORK( PC + J + 1 )
  917. *
  918. * Compute scaling factor to survive the linear update
  919. * simulating consistent scaling.
  920. *
  921. CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  922. $ LDC, WNRM )
  923. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  924. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  925. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  926. BNRM = SWORK( L, BWRK + J )
  927. SCALOC = SLARMM( BNRM, XNRM, CNRM )
  928. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  929. * Use second scaling factor to prevent flushing to zero.
  930. BUF = BUF*2.E0**EXPONENT( SCALOC )
  931. DO JJ = 1, NBB
  932. DO LL = 1, NBA
  933. SWORK( LL, JJ ) = MIN( BIGNUM,
  934. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  935. END DO
  936. END DO
  937. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  938. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  939. END IF
  940. CNRM = CNRM * SCALOC
  941. XNRM = XNRM * SCALOC
  942. *
  943. * Simultaneously apply the robust update factor and the
  944. * consistency scaling factor to C( K, J ) and C( K, L ).
  945. *
  946. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  947. IF( SCAL .NE. ONE ) THEN
  948. DO LL = L1, L2-1
  949. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1)
  950. END DO
  951. ENDIF
  952. *
  953. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  954. IF( SCAL .NE. ONE ) THEN
  955. DO JJ = J1, J2-1
  956. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  957. END DO
  958. ENDIF
  959. *
  960. * Record current scaling factor
  961. *
  962. SWORK( K, L ) = SCAMIN * SCALOC
  963. SWORK( K, J ) = SCAMIN * SCALOC
  964. *
  965. CALL SGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
  966. $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
  967. $ ONE, C( K1, J1 ), LDC )
  968. END DO
  969. END DO
  970. END DO
  971. ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
  972. *
  973. * Solve A*X + ISGN*X*B**T = scale*C.
  974. *
  975. * The (K,L)th block of X is determined starting from
  976. * bottom-right corner column by column by
  977. *
  978. * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
  979. *
  980. * Where
  981. * M N
  982. * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
  983. * I=K+1 J=L+1
  984. *
  985. * Start loop over block rows (index = K) and block columns (index = L)
  986. *
  987. DO K = NBA, 1, -1
  988. *
  989. * K1: row index of the first row in X( K, L )
  990. * K2: row index of the first row in X( K+1, L )
  991. * so the K2 - K1 is the column count of the block X( K, L )
  992. *
  993. K1 = IWORK( K )
  994. K2 = IWORK( K + 1 )
  995. DO L = NBB, 1, -1
  996. *
  997. * L1: column index of the first column in X( K, L )
  998. * L2: column index of the first column in X( K, L + 1)
  999. * so that L2 - L1 is the row count of the block X( K, L )
  1000. *
  1001. L1 = IWORK( PC + L )
  1002. L2 = IWORK( PC + L + 1 )
  1003. *
  1004. CALL STRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
  1005. $ A( K1, K1 ), LDA,
  1006. $ B( L1, L1 ), LDB,
  1007. $ C( K1, L1 ), LDC, SCALOC, IINFO )
  1008. INFO = MAX( INFO, IINFO )
  1009. *
  1010. IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
  1011. IF( SCALOC .EQ. ZERO ) THEN
  1012. * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
  1013. * is larger than the product of BIGNUM**2 and cannot be
  1014. * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
  1015. * Mark the computation as pointless.
  1016. BUF = ZERO
  1017. ELSE
  1018. * Use second scaling factor to prevent flushing to zero.
  1019. BUF = BUF*2.E0**EXPONENT( SCALOC )
  1020. END IF
  1021. DO JJ = 1, NBB
  1022. DO LL = 1, NBA
  1023. * Bound by BIGNUM to not introduce Inf. The value
  1024. * is irrelevant; corresponding entries of the
  1025. * solution will be flushed in consistency scaling.
  1026. SWORK( LL, JJ ) = MIN( BIGNUM,
  1027. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  1028. END DO
  1029. END DO
  1030. END IF
  1031. SWORK( K, L ) = SCALOC * SWORK( K, L )
  1032. XNRM = SLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
  1033. $ WNRM )
  1034. *
  1035. DO I = 1, K - 1
  1036. *
  1037. * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
  1038. *
  1039. I1 = IWORK( I )
  1040. I2 = IWORK( I + 1 )
  1041. *
  1042. * Compute scaling factor to survive the linear update
  1043. * simulating consistent scaling.
  1044. *
  1045. CNRM = SLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
  1046. $ LDC, WNRM )
  1047. SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
  1048. CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
  1049. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  1050. ANRM = SWORK( I, AWRK + K )
  1051. SCALOC = SLARMM( ANRM, XNRM, CNRM )
  1052. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  1053. * Use second scaling factor to prevent flushing to zero.
  1054. BUF = BUF*2.E0**EXPONENT( SCALOC )
  1055. DO JJ = 1, NBB
  1056. DO LL = 1, NBA
  1057. SWORK( LL, JJ ) = MIN( BIGNUM,
  1058. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  1059. END DO
  1060. END DO
  1061. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  1062. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  1063. END IF
  1064. CNRM = CNRM * SCALOC
  1065. XNRM = XNRM * SCALOC
  1066. *
  1067. * Simultaneously apply the robust update factor and the
  1068. * consistency scaling factor to C( I, L ) and C( K, L ).
  1069. *
  1070. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  1071. IF (SCAL .NE. ONE) THEN
  1072. DO LL = L1, L2-1
  1073. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  1074. END DO
  1075. ENDIF
  1076. *
  1077. SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
  1078. IF (SCAL .NE. ONE) THEN
  1079. DO LL = L1, L2-1
  1080. CALL SSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
  1081. END DO
  1082. ENDIF
  1083. *
  1084. * Record current scaling factor
  1085. *
  1086. SWORK( K, L ) = SCAMIN * SCALOC
  1087. SWORK( I, L ) = SCAMIN * SCALOC
  1088. *
  1089. CALL SGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
  1090. $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
  1091. $ ONE, C( I1, L1 ), LDC )
  1092. *
  1093. END DO
  1094. *
  1095. DO J = 1, L - 1
  1096. *
  1097. * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
  1098. *
  1099. J1 = IWORK( PC + J )
  1100. J2 = IWORK( PC + J + 1 )
  1101. *
  1102. * Compute scaling factor to survive the linear update
  1103. * simulating consistent scaling.
  1104. *
  1105. CNRM = SLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
  1106. $ LDC, WNRM )
  1107. SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
  1108. CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
  1109. XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
  1110. BNRM = SWORK( L, BWRK + J )
  1111. SCALOC = SLARMM( BNRM, XNRM, CNRM )
  1112. IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
  1113. * Use second scaling factor to prevent flushing to zero.
  1114. BUF = BUF*2.E0**EXPONENT( SCALOC )
  1115. DO JJ = 1, NBB
  1116. DO LL = 1, NBA
  1117. SWORK( LL, JJ ) = MIN( BIGNUM,
  1118. $ SWORK( LL, JJ ) / 2.E0**EXPONENT( SCALOC ) )
  1119. END DO
  1120. END DO
  1121. SCAMIN = SCAMIN / 2.E0**EXPONENT( SCALOC )
  1122. SCALOC = SCALOC / 2.E0**EXPONENT( SCALOC )
  1123. END IF
  1124. CNRM = CNRM * SCALOC
  1125. XNRM = XNRM * SCALOC
  1126. *
  1127. * Simultaneously apply the robust update factor and the
  1128. * consistency scaling factor to C( K, J ) and C( K, L ).
  1129. *
  1130. SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
  1131. IF( SCAL .NE. ONE ) THEN
  1132. DO JJ = L1, L2-1
  1133. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  1134. END DO
  1135. ENDIF
  1136. *
  1137. SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
  1138. IF( SCAL .NE. ONE ) THEN
  1139. DO JJ = J1, J2-1
  1140. CALL SSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
  1141. END DO
  1142. ENDIF
  1143. *
  1144. * Record current scaling factor
  1145. *
  1146. SWORK( K, L ) = SCAMIN * SCALOC
  1147. SWORK( K, J ) = SCAMIN * SCALOC
  1148. *
  1149. CALL SGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
  1150. $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
  1151. $ ONE, C( K1, J1 ), LDC )
  1152. END DO
  1153. END DO
  1154. END DO
  1155. *
  1156. END IF
  1157. *
  1158. * Reduce local scaling factors
  1159. *
  1160. SCALE = SWORK( 1, 1 )
  1161. DO K = 1, NBA
  1162. DO L = 1, NBB
  1163. SCALE = MIN( SCALE, SWORK( K, L ) )
  1164. END DO
  1165. END DO
  1166. *
  1167. IF( SCALE .EQ. ZERO ) THEN
  1168. *
  1169. * The magnitude of the largest entry of the solution is larger
  1170. * than the product of BIGNUM**2 and cannot be represented in the
  1171. * form (1/SCALE)*X if SCALE is REAL. Set SCALE to zero and give up.
  1172. *
  1173. IWORK(1) = NBA + NBB + 2
  1174. SWORK(1,1) = MAX( NBA, NBB )
  1175. SWORK(2,1) = 2 * NBB + NBA
  1176. RETURN
  1177. END IF
  1178. *
  1179. * Realize consistent scaling
  1180. *
  1181. DO K = 1, NBA
  1182. K1 = IWORK( K )
  1183. K2 = IWORK( K + 1 )
  1184. DO L = 1, NBB
  1185. L1 = IWORK( PC + L )
  1186. L2 = IWORK( PC + L + 1 )
  1187. SCAL = SCALE / SWORK( K, L )
  1188. IF( SCAL .NE. ONE ) THEN
  1189. DO LL = L1, L2-1
  1190. CALL SSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
  1191. END DO
  1192. ENDIF
  1193. END DO
  1194. END DO
  1195. *
  1196. IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
  1197. *
  1198. * Decrease SCALE as much as possible.
  1199. *
  1200. SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
  1201. BUF = BUF * SCALOC
  1202. SCALE = SCALE / SCALOC
  1203. END IF
  1204. IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
  1205. *
  1206. * In case of overly aggressive scaling during the computation,
  1207. * flushing of the global scale factor may be prevented by
  1208. * undoing some of the scaling. This step is to ensure that
  1209. * this routine flushes only scale factors that TRSYL also
  1210. * flushes and be usable as a drop-in replacement.
  1211. *
  1212. * How much can the normwise largest entry be upscaled?
  1213. *
  1214. SCAL = C( 1, 1 )
  1215. DO K = 1, M
  1216. DO L = 1, N
  1217. SCAL = MAX( SCAL, ABS( C( K, L ) ) )
  1218. END DO
  1219. END DO
  1220. *
  1221. * Increase BUF as close to 1 as possible and apply scaling.
  1222. *
  1223. SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
  1224. BUF = BUF * SCALOC
  1225. CALL SLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK(1) )
  1226. END IF
  1227. *
  1228. * Combine with buffer scaling factor. SCALE will be flushed if
  1229. * BUF is less than one here.
  1230. *
  1231. SCALE = SCALE * BUF
  1232. *
  1233. * Restore workspace dimensions
  1234. *
  1235. IWORK(1) = NBA + NBB + 2
  1236. SWORK(1,1) = MAX( NBA, NBB )
  1237. SWORK(2,1) = 2 * NBB + NBA
  1238. *
  1239. RETURN
  1240. *
  1241. * End of STRSYL3
  1242. *
  1243. END