You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dsgesv.f 13 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. *> \brief <b> DSGESV computes the solution to system of linear equations A * X = B for GE matrices</b> (mixed precision with iterative refinement)
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download DSGESV + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsgesv.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsgesv.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsgesv.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
  22. * SWORK, ITER, INFO )
  23. *
  24. * .. Scalar Arguments ..
  25. * INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS
  26. * ..
  27. * .. Array Arguments ..
  28. * INTEGER IPIV( * )
  29. * REAL SWORK( * )
  30. * DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ),
  31. * $ X( LDX, * )
  32. * ..
  33. *
  34. *
  35. *> \par Purpose:
  36. * =============
  37. *>
  38. *> \verbatim
  39. *>
  40. *> DSGESV computes the solution to a real system of linear equations
  41. *> A * X = B,
  42. *> where A is an N-by-N matrix and X and B are N-by-NRHS matrices.
  43. *>
  44. *> DSGESV first attempts to factorize the matrix in SINGLE PRECISION
  45. *> and use this factorization within an iterative refinement procedure
  46. *> to produce a solution with DOUBLE PRECISION normwise backward error
  47. *> quality (see below). If the approach fails the method switches to a
  48. *> DOUBLE PRECISION factorization and solve.
  49. *>
  50. *> The iterative refinement is not going to be a winning strategy if
  51. *> the ratio SINGLE PRECISION performance over DOUBLE PRECISION
  52. *> performance is too small. A reasonable strategy should take the
  53. *> number of right-hand sides and the size of the matrix into account.
  54. *> This might be done with a call to ILAENV in the future. Up to now, we
  55. *> always try iterative refinement.
  56. *>
  57. *> The iterative refinement process is stopped if
  58. *> ITER > ITERMAX
  59. *> or for all the RHS we have:
  60. *> RNRM < SQRT(N)*XNRM*ANRM*EPS*BWDMAX
  61. *> where
  62. *> o ITER is the number of the current iteration in the iterative
  63. *> refinement process
  64. *> o RNRM is the infinity-norm of the residual
  65. *> o XNRM is the infinity-norm of the solution
  66. *> o ANRM is the infinity-operator-norm of the matrix A
  67. *> o EPS is the machine epsilon returned by DLAMCH('Epsilon')
  68. *> The value ITERMAX and BWDMAX are fixed to 30 and 1.0D+00
  69. *> respectively.
  70. *> \endverbatim
  71. *
  72. * Arguments:
  73. * ==========
  74. *
  75. *> \param[in] N
  76. *> \verbatim
  77. *> N is INTEGER
  78. *> The number of linear equations, i.e., the order of the
  79. *> matrix A. N >= 0.
  80. *> \endverbatim
  81. *>
  82. *> \param[in] NRHS
  83. *> \verbatim
  84. *> NRHS is INTEGER
  85. *> The number of right hand sides, i.e., the number of columns
  86. *> of the matrix B. NRHS >= 0.
  87. *> \endverbatim
  88. *>
  89. *> \param[in,out] A
  90. *> \verbatim
  91. *> A is DOUBLE PRECISION array,
  92. *> dimension (LDA,N)
  93. *> On entry, the N-by-N coefficient matrix A.
  94. *> On exit, if iterative refinement has been successfully used
  95. *> (INFO.EQ.0 and ITER.GE.0, see description below), then A is
  96. *> unchanged, if double precision factorization has been used
  97. *> (INFO.EQ.0 and ITER.LT.0, see description below), then the
  98. *> array A contains the factors L and U from the factorization
  99. *> A = P*L*U; the unit diagonal elements of L are not stored.
  100. *> \endverbatim
  101. *>
  102. *> \param[in] LDA
  103. *> \verbatim
  104. *> LDA is INTEGER
  105. *> The leading dimension of the array A. LDA >= max(1,N).
  106. *> \endverbatim
  107. *>
  108. *> \param[out] IPIV
  109. *> \verbatim
  110. *> IPIV is INTEGER array, dimension (N)
  111. *> The pivot indices that define the permutation matrix P;
  112. *> row i of the matrix was interchanged with row IPIV(i).
  113. *> Corresponds either to the single precision factorization
  114. *> (if INFO.EQ.0 and ITER.GE.0) or the double precision
  115. *> factorization (if INFO.EQ.0 and ITER.LT.0).
  116. *> \endverbatim
  117. *>
  118. *> \param[in] B
  119. *> \verbatim
  120. *> B is DOUBLE PRECISION array, dimension (LDB,NRHS)
  121. *> The N-by-NRHS right hand side matrix B.
  122. *> \endverbatim
  123. *>
  124. *> \param[in] LDB
  125. *> \verbatim
  126. *> LDB is INTEGER
  127. *> The leading dimension of the array B. LDB >= max(1,N).
  128. *> \endverbatim
  129. *>
  130. *> \param[out] X
  131. *> \verbatim
  132. *> X is DOUBLE PRECISION array, dimension (LDX,NRHS)
  133. *> If INFO = 0, the N-by-NRHS solution matrix X.
  134. *> \endverbatim
  135. *>
  136. *> \param[in] LDX
  137. *> \verbatim
  138. *> LDX is INTEGER
  139. *> The leading dimension of the array X. LDX >= max(1,N).
  140. *> \endverbatim
  141. *>
  142. *> \param[out] WORK
  143. *> \verbatim
  144. *> WORK is DOUBLE PRECISION array, dimension (N,NRHS)
  145. *> This array is used to hold the residual vectors.
  146. *> \endverbatim
  147. *>
  148. *> \param[out] SWORK
  149. *> \verbatim
  150. *> SWORK is REAL array, dimension (N*(N+NRHS))
  151. *> This array is used to use the single precision matrix and the
  152. *> right-hand sides or solutions in single precision.
  153. *> \endverbatim
  154. *>
  155. *> \param[out] ITER
  156. *> \verbatim
  157. *> ITER is INTEGER
  158. *> < 0: iterative refinement has failed, double precision
  159. *> factorization has been performed
  160. *> -1 : the routine fell back to full precision for
  161. *> implementation- or machine-specific reasons
  162. *> -2 : narrowing the precision induced an overflow,
  163. *> the routine fell back to full precision
  164. *> -3 : failure of SGETRF
  165. *> -31: stop the iterative refinement after the 30th
  166. *> iterations
  167. *> > 0: iterative refinement has been successfully used.
  168. *> Returns the number of iterations
  169. *> \endverbatim
  170. *>
  171. *> \param[out] INFO
  172. *> \verbatim
  173. *> INFO is INTEGER
  174. *> = 0: successful exit
  175. *> < 0: if INFO = -i, the i-th argument had an illegal value
  176. *> > 0: if INFO = i, U(i,i) computed in DOUBLE PRECISION is
  177. *> exactly zero. The factorization has been completed,
  178. *> but the factor U is exactly singular, so the solution
  179. *> could not be computed.
  180. *> \endverbatim
  181. *
  182. * Authors:
  183. * ========
  184. *
  185. *> \author Univ. of Tennessee
  186. *> \author Univ. of California Berkeley
  187. *> \author Univ. of Colorado Denver
  188. *> \author NAG Ltd.
  189. *
  190. *> \date June 2016
  191. *
  192. *> \ingroup doubleGEsolve
  193. *
  194. * =====================================================================
  195. SUBROUTINE DSGESV( N, NRHS, A, LDA, IPIV, B, LDB, X, LDX, WORK,
  196. $ SWORK, ITER, INFO )
  197. *
  198. * -- LAPACK driver routine (version 3.8.0) --
  199. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  200. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  201. * June 2016
  202. *
  203. * .. Scalar Arguments ..
  204. INTEGER INFO, ITER, LDA, LDB, LDX, N, NRHS
  205. * ..
  206. * .. Array Arguments ..
  207. INTEGER IPIV( * )
  208. REAL SWORK( * )
  209. DOUBLE PRECISION A( LDA, * ), B( LDB, * ), WORK( N, * ),
  210. $ X( LDX, * )
  211. * ..
  212. *
  213. * =====================================================================
  214. *
  215. * .. Parameters ..
  216. LOGICAL DOITREF
  217. PARAMETER ( DOITREF = .TRUE. )
  218. *
  219. INTEGER ITERMAX
  220. PARAMETER ( ITERMAX = 30 )
  221. *
  222. DOUBLE PRECISION BWDMAX
  223. PARAMETER ( BWDMAX = 1.0E+00 )
  224. *
  225. DOUBLE PRECISION NEGONE, ONE
  226. PARAMETER ( NEGONE = -1.0D+0, ONE = 1.0D+0 )
  227. *
  228. * .. Local Scalars ..
  229. INTEGER I, IITER, PTSA, PTSX
  230. DOUBLE PRECISION ANRM, CTE, EPS, RNRM, XNRM
  231. *
  232. * .. External Subroutines ..
  233. EXTERNAL DAXPY, DGEMM, DLACPY, DLAG2S, DGETRF, DGETRS,
  234. $ SGETRF, SGETRS, SLAG2D, XERBLA
  235. * ..
  236. * .. External Functions ..
  237. INTEGER IDAMAX
  238. DOUBLE PRECISION DLAMCH, DLANGE
  239. EXTERNAL IDAMAX, DLAMCH, DLANGE
  240. * ..
  241. * .. Intrinsic Functions ..
  242. INTRINSIC ABS, DBLE, MAX, SQRT
  243. * ..
  244. * .. Executable Statements ..
  245. *
  246. INFO = 0
  247. ITER = 0
  248. *
  249. * Test the input parameters.
  250. *
  251. IF( N.LT.0 ) THEN
  252. INFO = -1
  253. ELSE IF( NRHS.LT.0 ) THEN
  254. INFO = -2
  255. ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  256. INFO = -4
  257. ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
  258. INFO = -7
  259. ELSE IF( LDX.LT.MAX( 1, N ) ) THEN
  260. INFO = -9
  261. END IF
  262. IF( INFO.NE.0 ) THEN
  263. CALL XERBLA( 'DSGESV', -INFO )
  264. RETURN
  265. END IF
  266. *
  267. * Quick return if (N.EQ.0).
  268. *
  269. IF( N.EQ.0 )
  270. $ RETURN
  271. *
  272. * Skip single precision iterative refinement if a priori slower
  273. * than double precision factorization.
  274. *
  275. IF( .NOT.DOITREF ) THEN
  276. ITER = -1
  277. GO TO 40
  278. END IF
  279. *
  280. * Compute some constants.
  281. *
  282. ANRM = DLANGE( 'I', N, N, A, LDA, WORK )
  283. EPS = DLAMCH( 'Epsilon' )
  284. CTE = ANRM*EPS*SQRT( DBLE( N ) )*BWDMAX
  285. *
  286. * Set the indices PTSA, PTSX for referencing SA and SX in SWORK.
  287. *
  288. PTSA = 1
  289. PTSX = PTSA + N*N
  290. *
  291. * Convert B from double precision to single precision and store the
  292. * result in SX.
  293. *
  294. CALL DLAG2S( N, NRHS, B, LDB, SWORK( PTSX ), N, INFO )
  295. *
  296. IF( INFO.NE.0 ) THEN
  297. ITER = -2
  298. GO TO 40
  299. END IF
  300. *
  301. * Convert A from double precision to single precision and store the
  302. * result in SA.
  303. *
  304. CALL DLAG2S( N, N, A, LDA, SWORK( PTSA ), N, INFO )
  305. *
  306. IF( INFO.NE.0 ) THEN
  307. ITER = -2
  308. GO TO 40
  309. END IF
  310. *
  311. * Compute the LU factorization of SA.
  312. *
  313. CALL SGETRF( N, N, SWORK( PTSA ), N, IPIV, INFO )
  314. *
  315. IF( INFO.NE.0 ) THEN
  316. ITER = -3
  317. GO TO 40
  318. END IF
  319. *
  320. * Solve the system SA*SX = SB.
  321. *
  322. CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
  323. $ SWORK( PTSX ), N, INFO )
  324. *
  325. * Convert SX back to double precision
  326. *
  327. CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, X, LDX, INFO )
  328. *
  329. * Compute R = B - AX (R is WORK).
  330. *
  331. CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
  332. *
  333. CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE, A,
  334. $ LDA, X, LDX, ONE, WORK, N )
  335. *
  336. * Check whether the NRHS normwise backward errors satisfy the
  337. * stopping criterion. If yes, set ITER=0 and return.
  338. *
  339. DO I = 1, NRHS
  340. XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
  341. RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
  342. IF( RNRM.GT.XNRM*CTE )
  343. $ GO TO 10
  344. END DO
  345. *
  346. * If we are here, the NRHS normwise backward errors satisfy the
  347. * stopping criterion. We are good to exit.
  348. *
  349. ITER = 0
  350. RETURN
  351. *
  352. 10 CONTINUE
  353. *
  354. DO 30 IITER = 1, ITERMAX
  355. *
  356. * Convert R (in WORK) from double precision to single precision
  357. * and store the result in SX.
  358. *
  359. CALL DLAG2S( N, NRHS, WORK, N, SWORK( PTSX ), N, INFO )
  360. *
  361. IF( INFO.NE.0 ) THEN
  362. ITER = -2
  363. GO TO 40
  364. END IF
  365. *
  366. * Solve the system SA*SX = SR.
  367. *
  368. CALL SGETRS( 'No transpose', N, NRHS, SWORK( PTSA ), N, IPIV,
  369. $ SWORK( PTSX ), N, INFO )
  370. *
  371. * Convert SX back to double precision and update the current
  372. * iterate.
  373. *
  374. CALL SLAG2D( N, NRHS, SWORK( PTSX ), N, WORK, N, INFO )
  375. *
  376. DO I = 1, NRHS
  377. CALL DAXPY( N, ONE, WORK( 1, I ), 1, X( 1, I ), 1 )
  378. END DO
  379. *
  380. * Compute R = B - AX (R is WORK).
  381. *
  382. CALL DLACPY( 'All', N, NRHS, B, LDB, WORK, N )
  383. *
  384. CALL DGEMM( 'No Transpose', 'No Transpose', N, NRHS, N, NEGONE,
  385. $ A, LDA, X, LDX, ONE, WORK, N )
  386. *
  387. * Check whether the NRHS normwise backward errors satisfy the
  388. * stopping criterion. If yes, set ITER=IITER>0 and return.
  389. *
  390. DO I = 1, NRHS
  391. XNRM = ABS( X( IDAMAX( N, X( 1, I ), 1 ), I ) )
  392. RNRM = ABS( WORK( IDAMAX( N, WORK( 1, I ), 1 ), I ) )
  393. IF( RNRM.GT.XNRM*CTE )
  394. $ GO TO 20
  395. END DO
  396. *
  397. * If we are here, the NRHS normwise backward errors satisfy the
  398. * stopping criterion, we are good to exit.
  399. *
  400. ITER = IITER
  401. *
  402. RETURN
  403. *
  404. 20 CONTINUE
  405. *
  406. 30 CONTINUE
  407. *
  408. * If we are at this place of the code, this is because we have
  409. * performed ITER=ITERMAX iterations and never satisified the
  410. * stopping criterion, set up the ITER flag accordingly and follow up
  411. * on double precision routine.
  412. *
  413. ITER = -ITERMAX - 1
  414. *
  415. 40 CONTINUE
  416. *
  417. * Single-precision iterative refinement failed to converge to a
  418. * satisfactory solution, so we resort to double precision.
  419. *
  420. CALL DGETRF( N, N, A, LDA, IPIV, INFO )
  421. *
  422. IF( INFO.NE.0 )
  423. $ RETURN
  424. *
  425. CALL DLACPY( 'All', N, NRHS, B, LDB, X, LDX )
  426. CALL DGETRS( 'No transpose', N, NRHS, A, LDA, IPIV, X, LDX,
  427. $ INFO )
  428. *
  429. RETURN
  430. *
  431. * End of DSGESV.
  432. *
  433. END