You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgbtrf.f 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513
  1. *> \brief \b DGBTRF
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download DGBTRF + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  22. *
  23. * .. Scalar Arguments ..
  24. * INTEGER INFO, KL, KU, LDAB, M, N
  25. * ..
  26. * .. Array Arguments ..
  27. * INTEGER IPIV( * )
  28. * DOUBLE PRECISION AB( LDAB, * )
  29. * ..
  30. *
  31. *
  32. *> \par Purpose:
  33. * =============
  34. *>
  35. *> \verbatim
  36. *>
  37. *> DGBTRF computes an LU factorization of a real m-by-n band matrix A
  38. *> using partial pivoting with row interchanges.
  39. *>
  40. *> This is the blocked version of the algorithm, calling Level 3 BLAS.
  41. *> \endverbatim
  42. *
  43. * Arguments:
  44. * ==========
  45. *
  46. *> \param[in] M
  47. *> \verbatim
  48. *> M is INTEGER
  49. *> The number of rows of the matrix A. M >= 0.
  50. *> \endverbatim
  51. *>
  52. *> \param[in] N
  53. *> \verbatim
  54. *> N is INTEGER
  55. *> The number of columns of the matrix A. N >= 0.
  56. *> \endverbatim
  57. *>
  58. *> \param[in] KL
  59. *> \verbatim
  60. *> KL is INTEGER
  61. *> The number of subdiagonals within the band of A. KL >= 0.
  62. *> \endverbatim
  63. *>
  64. *> \param[in] KU
  65. *> \verbatim
  66. *> KU is INTEGER
  67. *> The number of superdiagonals within the band of A. KU >= 0.
  68. *> \endverbatim
  69. *>
  70. *> \param[in,out] AB
  71. *> \verbatim
  72. *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
  73. *> On entry, the matrix A in band storage, in rows KL+1 to
  74. *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
  75. *> The j-th column of A is stored in the j-th column of the
  76. *> array AB as follows:
  77. *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
  78. *>
  79. *> On exit, details of the factorization: U is stored as an
  80. *> upper triangular band matrix with KL+KU superdiagonals in
  81. *> rows 1 to KL+KU+1, and the multipliers used during the
  82. *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
  83. *> See below for further details.
  84. *> \endverbatim
  85. *>
  86. *> \param[in] LDAB
  87. *> \verbatim
  88. *> LDAB is INTEGER
  89. *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
  90. *> \endverbatim
  91. *>
  92. *> \param[out] IPIV
  93. *> \verbatim
  94. *> IPIV is INTEGER array, dimension (min(M,N))
  95. *> The pivot indices; for 1 <= i <= min(M,N), row i of the
  96. *> matrix was interchanged with row IPIV(i).
  97. *> \endverbatim
  98. *>
  99. *> \param[out] INFO
  100. *> \verbatim
  101. *> INFO is INTEGER
  102. *> = 0: successful exit
  103. *> < 0: if INFO = -i, the i-th argument had an illegal value
  104. *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
  105. *> has been completed, but the factor U is exactly
  106. *> singular, and division by zero will occur if it is used
  107. *> to solve a system of equations.
  108. *> \endverbatim
  109. *
  110. * Authors:
  111. * ========
  112. *
  113. *> \author Univ. of Tennessee
  114. *> \author Univ. of California Berkeley
  115. *> \author Univ. of Colorado Denver
  116. *> \author NAG Ltd.
  117. *
  118. *> \ingroup doubleGBcomputational
  119. *
  120. *> \par Further Details:
  121. * =====================
  122. *>
  123. *> \verbatim
  124. *>
  125. *> The band storage scheme is illustrated by the following example, when
  126. *> M = N = 6, KL = 2, KU = 1:
  127. *>
  128. *> On entry: On exit:
  129. *>
  130. *> * * * + + + * * * u14 u25 u36
  131. *> * * + + + + * * u13 u24 u35 u46
  132. *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
  133. *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
  134. *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
  135. *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
  136. *>
  137. *> Array elements marked * are not used by the routine; elements marked
  138. *> + need not be set on entry, but are required by the routine to store
  139. *> elements of U because of fill-in resulting from the row interchanges.
  140. *> \endverbatim
  141. *>
  142. * =====================================================================
  143. SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  144. *
  145. * -- LAPACK computational routine --
  146. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  147. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  148. *
  149. * .. Scalar Arguments ..
  150. INTEGER INFO, KL, KU, LDAB, M, N
  151. * ..
  152. * .. Array Arguments ..
  153. INTEGER IPIV( * )
  154. DOUBLE PRECISION AB( LDAB, * )
  155. * ..
  156. *
  157. * =====================================================================
  158. *
  159. * .. Parameters ..
  160. DOUBLE PRECISION ONE, ZERO
  161. PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
  162. INTEGER NBMAX, LDWORK
  163. PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
  164. * ..
  165. * .. Local Scalars ..
  166. INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
  167. $ JU, K2, KM, KV, NB, NW
  168. DOUBLE PRECISION TEMP
  169. * ..
  170. * .. Local Arrays ..
  171. DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
  172. $ WORK31( LDWORK, NBMAX )
  173. * ..
  174. * .. External Functions ..
  175. INTEGER IDAMAX, ILAENV
  176. EXTERNAL IDAMAX, ILAENV
  177. * ..
  178. * .. External Subroutines ..
  179. EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
  180. $ DSWAP, DTRSM, XERBLA
  181. * ..
  182. * .. Intrinsic Functions ..
  183. INTRINSIC MAX, MIN
  184. * ..
  185. * .. Executable Statements ..
  186. *
  187. * KV is the number of superdiagonals in the factor U, allowing for
  188. * fill-in
  189. *
  190. KV = KU + KL
  191. *
  192. * Test the input parameters.
  193. *
  194. INFO = 0
  195. IF( M.LT.0 ) THEN
  196. INFO = -1
  197. ELSE IF( N.LT.0 ) THEN
  198. INFO = -2
  199. ELSE IF( KL.LT.0 ) THEN
  200. INFO = -3
  201. ELSE IF( KU.LT.0 ) THEN
  202. INFO = -4
  203. ELSE IF( LDAB.LT.KL+KV+1 ) THEN
  204. INFO = -6
  205. END IF
  206. IF( INFO.NE.0 ) THEN
  207. CALL XERBLA( 'DGBTRF', -INFO )
  208. RETURN
  209. END IF
  210. *
  211. * Quick return if possible
  212. *
  213. IF( M.EQ.0 .OR. N.EQ.0 )
  214. $ RETURN
  215. *
  216. * Determine the block size for this environment
  217. *
  218. NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
  219. *
  220. * The block size must not exceed the limit set by the size of the
  221. * local arrays WORK13 and WORK31.
  222. *
  223. NB = MIN( NB, NBMAX )
  224. *
  225. IF( NB.LE.1 .OR. NB.GT.KL ) THEN
  226. *
  227. * Use unblocked code
  228. *
  229. CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  230. ELSE
  231. *
  232. * Use blocked code
  233. *
  234. * Zero the superdiagonal elements of the work array WORK13
  235. *
  236. DO 20 J = 1, NB
  237. DO 10 I = 1, J - 1
  238. WORK13( I, J ) = ZERO
  239. 10 CONTINUE
  240. 20 CONTINUE
  241. *
  242. * Zero the subdiagonal elements of the work array WORK31
  243. *
  244. DO 40 J = 1, NB
  245. DO 30 I = J + 1, NB
  246. WORK31( I, J ) = ZERO
  247. 30 CONTINUE
  248. 40 CONTINUE
  249. *
  250. * Gaussian elimination with partial pivoting
  251. *
  252. * Set fill-in elements in columns KU+2 to KV to zero
  253. *
  254. DO 60 J = KU + 2, MIN( KV, N )
  255. DO 50 I = KV - J + 2, KL
  256. AB( I, J ) = ZERO
  257. 50 CONTINUE
  258. 60 CONTINUE
  259. *
  260. * JU is the index of the last column affected by the current
  261. * stage of the factorization
  262. *
  263. JU = 1
  264. *
  265. DO 180 J = 1, MIN( M, N ), NB
  266. JB = MIN( NB, MIN( M, N )-J+1 )
  267. *
  268. * The active part of the matrix is partitioned
  269. *
  270. * A11 A12 A13
  271. * A21 A22 A23
  272. * A31 A32 A33
  273. *
  274. * Here A11, A21 and A31 denote the current block of JB columns
  275. * which is about to be factorized. The number of rows in the
  276. * partitioning are JB, I2, I3 respectively, and the numbers
  277. * of columns are JB, J2, J3. The superdiagonal elements of A13
  278. * and the subdiagonal elements of A31 lie outside the band.
  279. *
  280. I2 = MIN( KL-JB, M-J-JB+1 )
  281. I3 = MIN( JB, M-J-KL+1 )
  282. *
  283. * J2 and J3 are computed after JU has been updated.
  284. *
  285. * Factorize the current block of JB columns
  286. *
  287. DO 80 JJ = J, J + JB - 1
  288. *
  289. * Set fill-in elements in column JJ+KV to zero
  290. *
  291. IF( JJ+KV.LE.N ) THEN
  292. DO 70 I = 1, KL
  293. AB( I, JJ+KV ) = ZERO
  294. 70 CONTINUE
  295. END IF
  296. *
  297. * Find pivot and test for singularity. KM is the number of
  298. * subdiagonal elements in the current column.
  299. *
  300. KM = MIN( KL, M-JJ )
  301. JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
  302. IPIV( JJ ) = JP + JJ - J
  303. IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
  304. JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
  305. IF( JP.NE.1 ) THEN
  306. *
  307. * Apply interchange to columns J to J+JB-1
  308. *
  309. IF( JP+JJ-1.LT.J+KL ) THEN
  310. *
  311. CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
  312. $ AB( KV+JP+JJ-J, J ), LDAB-1 )
  313. ELSE
  314. *
  315. * The interchange affects columns J to JJ-1 of A31
  316. * which are stored in the work array WORK31
  317. *
  318. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  319. $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  320. CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
  321. $ AB( KV+JP, JJ ), LDAB-1 )
  322. END IF
  323. END IF
  324. *
  325. * Compute multipliers
  326. *
  327. CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
  328. $ 1 )
  329. *
  330. * Update trailing submatrix within the band and within
  331. * the current block. JM is the index of the last column
  332. * which needs to be updated.
  333. *
  334. JM = MIN( JU, J+JB-1 )
  335. IF( JM.GT.JJ )
  336. $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
  337. $ AB( KV, JJ+1 ), LDAB-1,
  338. $ AB( KV+1, JJ+1 ), LDAB-1 )
  339. ELSE
  340. *
  341. * If pivot is zero, set INFO to the index of the pivot
  342. * unless a zero pivot has already been found.
  343. *
  344. IF( INFO.EQ.0 )
  345. $ INFO = JJ
  346. END IF
  347. *
  348. * Copy current column of A31 into the work array WORK31
  349. *
  350. NW = MIN( JJ-J+1, I3 )
  351. IF( NW.GT.0 )
  352. $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
  353. $ WORK31( 1, JJ-J+1 ), 1 )
  354. 80 CONTINUE
  355. IF( J+JB.LE.N ) THEN
  356. *
  357. * Apply the row interchanges to the other blocks.
  358. *
  359. J2 = MIN( JU-J+1, KV ) - JB
  360. J3 = MAX( 0, JU-J-KV+1 )
  361. *
  362. * Use DLASWP to apply the row interchanges to A12, A22, and
  363. * A32.
  364. *
  365. CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
  366. $ IPIV( J ), 1 )
  367. *
  368. * Adjust the pivot indices.
  369. *
  370. DO 90 I = J, J + JB - 1
  371. IPIV( I ) = IPIV( I ) + J - 1
  372. 90 CONTINUE
  373. *
  374. * Apply the row interchanges to A13, A23, and A33
  375. * columnwise.
  376. *
  377. K2 = J - 1 + JB + J2
  378. DO 110 I = 1, J3
  379. JJ = K2 + I
  380. DO 100 II = J + I - 1, J + JB - 1
  381. IP = IPIV( II )
  382. IF( IP.NE.II ) THEN
  383. TEMP = AB( KV+1+II-JJ, JJ )
  384. AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
  385. AB( KV+1+IP-JJ, JJ ) = TEMP
  386. END IF
  387. 100 CONTINUE
  388. 110 CONTINUE
  389. *
  390. * Update the relevant part of the trailing submatrix
  391. *
  392. IF( J2.GT.0 ) THEN
  393. *
  394. * Update A12
  395. *
  396. CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  397. $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
  398. $ AB( KV+1-JB, J+JB ), LDAB-1 )
  399. *
  400. IF( I2.GT.0 ) THEN
  401. *
  402. * Update A22
  403. *
  404. CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
  405. $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  406. $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  407. $ AB( KV+1, J+JB ), LDAB-1 )
  408. END IF
  409. *
  410. IF( I3.GT.0 ) THEN
  411. *
  412. * Update A32
  413. *
  414. CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
  415. $ JB, -ONE, WORK31, LDWORK,
  416. $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  417. $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
  418. END IF
  419. END IF
  420. *
  421. IF( J3.GT.0 ) THEN
  422. *
  423. * Copy the lower triangle of A13 into the work array
  424. * WORK13
  425. *
  426. DO 130 JJ = 1, J3
  427. DO 120 II = JJ, JB
  428. WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
  429. 120 CONTINUE
  430. 130 CONTINUE
  431. *
  432. * Update A13 in the work array
  433. *
  434. CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  435. $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
  436. $ WORK13, LDWORK )
  437. *
  438. IF( I2.GT.0 ) THEN
  439. *
  440. * Update A23
  441. *
  442. CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
  443. $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  444. $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
  445. $ LDAB-1 )
  446. END IF
  447. *
  448. IF( I3.GT.0 ) THEN
  449. *
  450. * Update A33
  451. *
  452. CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
  453. $ JB, -ONE, WORK31, LDWORK, WORK13,
  454. $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
  455. END IF
  456. *
  457. * Copy the lower triangle of A13 back into place
  458. *
  459. DO 150 JJ = 1, J3
  460. DO 140 II = JJ, JB
  461. AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
  462. 140 CONTINUE
  463. 150 CONTINUE
  464. END IF
  465. ELSE
  466. *
  467. * Adjust the pivot indices.
  468. *
  469. DO 160 I = J, J + JB - 1
  470. IPIV( I ) = IPIV( I ) + J - 1
  471. 160 CONTINUE
  472. END IF
  473. *
  474. * Partially undo the interchanges in the current block to
  475. * restore the upper triangular form of A31 and copy the upper
  476. * triangle of A31 back into place
  477. *
  478. DO 170 JJ = J + JB - 1, J, -1
  479. JP = IPIV( JJ ) - JJ + 1
  480. IF( JP.NE.1 ) THEN
  481. *
  482. * Apply interchange to columns J to JJ-1
  483. *
  484. IF( JP+JJ-1.LT.J+KL ) THEN
  485. *
  486. * The interchange does not affect A31
  487. *
  488. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  489. $ AB( KV+JP+JJ-J, J ), LDAB-1 )
  490. ELSE
  491. *
  492. * The interchange does affect A31
  493. *
  494. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  495. $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  496. END IF
  497. END IF
  498. *
  499. * Copy the current column of A31 back into place
  500. *
  501. NW = MIN( I3, JJ-J+1 )
  502. IF( NW.GT.0 )
  503. $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
  504. $ AB( KV+KL+1-JJ+J, JJ ), 1 )
  505. 170 CONTINUE
  506. 180 CONTINUE
  507. END IF
  508. *
  509. RETURN
  510. *
  511. * End of DGBTRF
  512. *
  513. END