You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgbtrf.f 16 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516
  1. *> \brief \b DGBTRF
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download DGBTRF + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgbtrf.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgbtrf.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgbtrf.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  22. *
  23. * .. Scalar Arguments ..
  24. * INTEGER INFO, KL, KU, LDAB, M, N
  25. * ..
  26. * .. Array Arguments ..
  27. * INTEGER IPIV( * )
  28. * DOUBLE PRECISION AB( LDAB, * )
  29. * ..
  30. *
  31. *
  32. *> \par Purpose:
  33. * =============
  34. *>
  35. *> \verbatim
  36. *>
  37. *> DGBTRF computes an LU factorization of a real m-by-n band matrix A
  38. *> using partial pivoting with row interchanges.
  39. *>
  40. *> This is the blocked version of the algorithm, calling Level 3 BLAS.
  41. *> \endverbatim
  42. *
  43. * Arguments:
  44. * ==========
  45. *
  46. *> \param[in] M
  47. *> \verbatim
  48. *> M is INTEGER
  49. *> The number of rows of the matrix A. M >= 0.
  50. *> \endverbatim
  51. *>
  52. *> \param[in] N
  53. *> \verbatim
  54. *> N is INTEGER
  55. *> The number of columns of the matrix A. N >= 0.
  56. *> \endverbatim
  57. *>
  58. *> \param[in] KL
  59. *> \verbatim
  60. *> KL is INTEGER
  61. *> The number of subdiagonals within the band of A. KL >= 0.
  62. *> \endverbatim
  63. *>
  64. *> \param[in] KU
  65. *> \verbatim
  66. *> KU is INTEGER
  67. *> The number of superdiagonals within the band of A. KU >= 0.
  68. *> \endverbatim
  69. *>
  70. *> \param[in,out] AB
  71. *> \verbatim
  72. *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
  73. *> On entry, the matrix A in band storage, in rows KL+1 to
  74. *> 2*KL+KU+1; rows 1 to KL of the array need not be set.
  75. *> The j-th column of A is stored in the j-th column of the
  76. *> array AB as follows:
  77. *> AB(kl+ku+1+i-j,j) = A(i,j) for max(1,j-ku)<=i<=min(m,j+kl)
  78. *>
  79. *> On exit, details of the factorization: U is stored as an
  80. *> upper triangular band matrix with KL+KU superdiagonals in
  81. *> rows 1 to KL+KU+1, and the multipliers used during the
  82. *> factorization are stored in rows KL+KU+2 to 2*KL+KU+1.
  83. *> See below for further details.
  84. *> \endverbatim
  85. *>
  86. *> \param[in] LDAB
  87. *> \verbatim
  88. *> LDAB is INTEGER
  89. *> The leading dimension of the array AB. LDAB >= 2*KL+KU+1.
  90. *> \endverbatim
  91. *>
  92. *> \param[out] IPIV
  93. *> \verbatim
  94. *> IPIV is INTEGER array, dimension (min(M,N))
  95. *> The pivot indices; for 1 <= i <= min(M,N), row i of the
  96. *> matrix was interchanged with row IPIV(i).
  97. *> \endverbatim
  98. *>
  99. *> \param[out] INFO
  100. *> \verbatim
  101. *> INFO is INTEGER
  102. *> = 0: successful exit
  103. *> < 0: if INFO = -i, the i-th argument had an illegal value
  104. *> > 0: if INFO = +i, U(i,i) is exactly zero. The factorization
  105. *> has been completed, but the factor U is exactly
  106. *> singular, and division by zero will occur if it is used
  107. *> to solve a system of equations.
  108. *> \endverbatim
  109. *
  110. * Authors:
  111. * ========
  112. *
  113. *> \author Univ. of Tennessee
  114. *> \author Univ. of California Berkeley
  115. *> \author Univ. of Colorado Denver
  116. *> \author NAG Ltd.
  117. *
  118. *> \date December 2016
  119. *
  120. *> \ingroup doubleGBcomputational
  121. *
  122. *> \par Further Details:
  123. * =====================
  124. *>
  125. *> \verbatim
  126. *>
  127. *> The band storage scheme is illustrated by the following example, when
  128. *> M = N = 6, KL = 2, KU = 1:
  129. *>
  130. *> On entry: On exit:
  131. *>
  132. *> * * * + + + * * * u14 u25 u36
  133. *> * * + + + + * * u13 u24 u35 u46
  134. *> * a12 a23 a34 a45 a56 * u12 u23 u34 u45 u56
  135. *> a11 a22 a33 a44 a55 a66 u11 u22 u33 u44 u55 u66
  136. *> a21 a32 a43 a54 a65 * m21 m32 m43 m54 m65 *
  137. *> a31 a42 a53 a64 * * m31 m42 m53 m64 * *
  138. *>
  139. *> Array elements marked * are not used by the routine; elements marked
  140. *> + need not be set on entry, but are required by the routine to store
  141. *> elements of U because of fill-in resulting from the row interchanges.
  142. *> \endverbatim
  143. *>
  144. * =====================================================================
  145. SUBROUTINE DGBTRF( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  146. *
  147. * -- LAPACK computational routine (version 3.7.0) --
  148. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  149. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  150. * December 2016
  151. *
  152. * .. Scalar Arguments ..
  153. INTEGER INFO, KL, KU, LDAB, M, N
  154. * ..
  155. * .. Array Arguments ..
  156. INTEGER IPIV( * )
  157. DOUBLE PRECISION AB( LDAB, * )
  158. * ..
  159. *
  160. * =====================================================================
  161. *
  162. * .. Parameters ..
  163. DOUBLE PRECISION ONE, ZERO
  164. PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
  165. INTEGER NBMAX, LDWORK
  166. PARAMETER ( NBMAX = 64, LDWORK = NBMAX+1 )
  167. * ..
  168. * .. Local Scalars ..
  169. INTEGER I, I2, I3, II, IP, J, J2, J3, JB, JJ, JM, JP,
  170. $ JU, K2, KM, KV, NB, NW
  171. DOUBLE PRECISION TEMP
  172. * ..
  173. * .. Local Arrays ..
  174. DOUBLE PRECISION WORK13( LDWORK, NBMAX ),
  175. $ WORK31( LDWORK, NBMAX )
  176. * ..
  177. * .. External Functions ..
  178. INTEGER IDAMAX, ILAENV
  179. EXTERNAL IDAMAX, ILAENV
  180. * ..
  181. * .. External Subroutines ..
  182. EXTERNAL DCOPY, DGBTF2, DGEMM, DGER, DLASWP, DSCAL,
  183. $ DSWAP, DTRSM, XERBLA
  184. * ..
  185. * .. Intrinsic Functions ..
  186. INTRINSIC MAX, MIN
  187. * ..
  188. * .. Executable Statements ..
  189. *
  190. * KV is the number of superdiagonals in the factor U, allowing for
  191. * fill-in
  192. *
  193. KV = KU + KL
  194. *
  195. * Test the input parameters.
  196. *
  197. INFO = 0
  198. IF( M.LT.0 ) THEN
  199. INFO = -1
  200. ELSE IF( N.LT.0 ) THEN
  201. INFO = -2
  202. ELSE IF( KL.LT.0 ) THEN
  203. INFO = -3
  204. ELSE IF( KU.LT.0 ) THEN
  205. INFO = -4
  206. ELSE IF( LDAB.LT.KL+KV+1 ) THEN
  207. INFO = -6
  208. END IF
  209. IF( INFO.NE.0 ) THEN
  210. CALL XERBLA( 'DGBTRF', -INFO )
  211. RETURN
  212. END IF
  213. *
  214. * Quick return if possible
  215. *
  216. IF( M.EQ.0 .OR. N.EQ.0 )
  217. $ RETURN
  218. *
  219. * Determine the block size for this environment
  220. *
  221. NB = ILAENV( 1, 'DGBTRF', ' ', M, N, KL, KU )
  222. *
  223. * The block size must not exceed the limit set by the size of the
  224. * local arrays WORK13 and WORK31.
  225. *
  226. NB = MIN( NB, NBMAX )
  227. *
  228. IF( NB.LE.1 .OR. NB.GT.KL ) THEN
  229. *
  230. * Use unblocked code
  231. *
  232. CALL DGBTF2( M, N, KL, KU, AB, LDAB, IPIV, INFO )
  233. ELSE
  234. *
  235. * Use blocked code
  236. *
  237. * Zero the superdiagonal elements of the work array WORK13
  238. *
  239. DO 20 J = 1, NB
  240. DO 10 I = 1, J - 1
  241. WORK13( I, J ) = ZERO
  242. 10 CONTINUE
  243. 20 CONTINUE
  244. *
  245. * Zero the subdiagonal elements of the work array WORK31
  246. *
  247. DO 40 J = 1, NB
  248. DO 30 I = J + 1, NB
  249. WORK31( I, J ) = ZERO
  250. 30 CONTINUE
  251. 40 CONTINUE
  252. *
  253. * Gaussian elimination with partial pivoting
  254. *
  255. * Set fill-in elements in columns KU+2 to KV to zero
  256. *
  257. DO 60 J = KU + 2, MIN( KV, N )
  258. DO 50 I = KV - J + 2, KL
  259. AB( I, J ) = ZERO
  260. 50 CONTINUE
  261. 60 CONTINUE
  262. *
  263. * JU is the index of the last column affected by the current
  264. * stage of the factorization
  265. *
  266. JU = 1
  267. *
  268. DO 180 J = 1, MIN( M, N ), NB
  269. JB = MIN( NB, MIN( M, N )-J+1 )
  270. *
  271. * The active part of the matrix is partitioned
  272. *
  273. * A11 A12 A13
  274. * A21 A22 A23
  275. * A31 A32 A33
  276. *
  277. * Here A11, A21 and A31 denote the current block of JB columns
  278. * which is about to be factorized. The number of rows in the
  279. * partitioning are JB, I2, I3 respectively, and the numbers
  280. * of columns are JB, J2, J3. The superdiagonal elements of A13
  281. * and the subdiagonal elements of A31 lie outside the band.
  282. *
  283. I2 = MIN( KL-JB, M-J-JB+1 )
  284. I3 = MIN( JB, M-J-KL+1 )
  285. *
  286. * J2 and J3 are computed after JU has been updated.
  287. *
  288. * Factorize the current block of JB columns
  289. *
  290. DO 80 JJ = J, J + JB - 1
  291. *
  292. * Set fill-in elements in column JJ+KV to zero
  293. *
  294. IF( JJ+KV.LE.N ) THEN
  295. DO 70 I = 1, KL
  296. AB( I, JJ+KV ) = ZERO
  297. 70 CONTINUE
  298. END IF
  299. *
  300. * Find pivot and test for singularity. KM is the number of
  301. * subdiagonal elements in the current column.
  302. *
  303. KM = MIN( KL, M-JJ )
  304. JP = IDAMAX( KM+1, AB( KV+1, JJ ), 1 )
  305. IPIV( JJ ) = JP + JJ - J
  306. IF( AB( KV+JP, JJ ).NE.ZERO ) THEN
  307. JU = MAX( JU, MIN( JJ+KU+JP-1, N ) )
  308. IF( JP.NE.1 ) THEN
  309. *
  310. * Apply interchange to columns J to J+JB-1
  311. *
  312. IF( JP+JJ-1.LT.J+KL ) THEN
  313. *
  314. CALL DSWAP( JB, AB( KV+1+JJ-J, J ), LDAB-1,
  315. $ AB( KV+JP+JJ-J, J ), LDAB-1 )
  316. ELSE
  317. *
  318. * The interchange affects columns J to JJ-1 of A31
  319. * which are stored in the work array WORK31
  320. *
  321. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  322. $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  323. CALL DSWAP( J+JB-JJ, AB( KV+1, JJ ), LDAB-1,
  324. $ AB( KV+JP, JJ ), LDAB-1 )
  325. END IF
  326. END IF
  327. *
  328. * Compute multipliers
  329. *
  330. CALL DSCAL( KM, ONE / AB( KV+1, JJ ), AB( KV+2, JJ ),
  331. $ 1 )
  332. *
  333. * Update trailing submatrix within the band and within
  334. * the current block. JM is the index of the last column
  335. * which needs to be updated.
  336. *
  337. JM = MIN( JU, J+JB-1 )
  338. IF( JM.GT.JJ )
  339. $ CALL DGER( KM, JM-JJ, -ONE, AB( KV+2, JJ ), 1,
  340. $ AB( KV, JJ+1 ), LDAB-1,
  341. $ AB( KV+1, JJ+1 ), LDAB-1 )
  342. ELSE
  343. *
  344. * If pivot is zero, set INFO to the index of the pivot
  345. * unless a zero pivot has already been found.
  346. *
  347. IF( INFO.EQ.0 )
  348. $ INFO = JJ
  349. END IF
  350. *
  351. * Copy current column of A31 into the work array WORK31
  352. *
  353. NW = MIN( JJ-J+1, I3 )
  354. IF( NW.GT.0 )
  355. $ CALL DCOPY( NW, AB( KV+KL+1-JJ+J, JJ ), 1,
  356. $ WORK31( 1, JJ-J+1 ), 1 )
  357. 80 CONTINUE
  358. IF( J+JB.LE.N ) THEN
  359. *
  360. * Apply the row interchanges to the other blocks.
  361. *
  362. J2 = MIN( JU-J+1, KV ) - JB
  363. J3 = MAX( 0, JU-J-KV+1 )
  364. *
  365. * Use DLASWP to apply the row interchanges to A12, A22, and
  366. * A32.
  367. *
  368. CALL DLASWP( J2, AB( KV+1-JB, J+JB ), LDAB-1, 1, JB,
  369. $ IPIV( J ), 1 )
  370. *
  371. * Adjust the pivot indices.
  372. *
  373. DO 90 I = J, J + JB - 1
  374. IPIV( I ) = IPIV( I ) + J - 1
  375. 90 CONTINUE
  376. *
  377. * Apply the row interchanges to A13, A23, and A33
  378. * columnwise.
  379. *
  380. K2 = J - 1 + JB + J2
  381. DO 110 I = 1, J3
  382. JJ = K2 + I
  383. DO 100 II = J + I - 1, J + JB - 1
  384. IP = IPIV( II )
  385. IF( IP.NE.II ) THEN
  386. TEMP = AB( KV+1+II-JJ, JJ )
  387. AB( KV+1+II-JJ, JJ ) = AB( KV+1+IP-JJ, JJ )
  388. AB( KV+1+IP-JJ, JJ ) = TEMP
  389. END IF
  390. 100 CONTINUE
  391. 110 CONTINUE
  392. *
  393. * Update the relevant part of the trailing submatrix
  394. *
  395. IF( J2.GT.0 ) THEN
  396. *
  397. * Update A12
  398. *
  399. CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  400. $ JB, J2, ONE, AB( KV+1, J ), LDAB-1,
  401. $ AB( KV+1-JB, J+JB ), LDAB-1 )
  402. *
  403. IF( I2.GT.0 ) THEN
  404. *
  405. * Update A22
  406. *
  407. CALL DGEMM( 'No transpose', 'No transpose', I2, J2,
  408. $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  409. $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  410. $ AB( KV+1, J+JB ), LDAB-1 )
  411. END IF
  412. *
  413. IF( I3.GT.0 ) THEN
  414. *
  415. * Update A32
  416. *
  417. CALL DGEMM( 'No transpose', 'No transpose', I3, J2,
  418. $ JB, -ONE, WORK31, LDWORK,
  419. $ AB( KV+1-JB, J+JB ), LDAB-1, ONE,
  420. $ AB( KV+KL+1-JB, J+JB ), LDAB-1 )
  421. END IF
  422. END IF
  423. *
  424. IF( J3.GT.0 ) THEN
  425. *
  426. * Copy the lower triangle of A13 into the work array
  427. * WORK13
  428. *
  429. DO 130 JJ = 1, J3
  430. DO 120 II = JJ, JB
  431. WORK13( II, JJ ) = AB( II-JJ+1, JJ+J+KV-1 )
  432. 120 CONTINUE
  433. 130 CONTINUE
  434. *
  435. * Update A13 in the work array
  436. *
  437. CALL DTRSM( 'Left', 'Lower', 'No transpose', 'Unit',
  438. $ JB, J3, ONE, AB( KV+1, J ), LDAB-1,
  439. $ WORK13, LDWORK )
  440. *
  441. IF( I2.GT.0 ) THEN
  442. *
  443. * Update A23
  444. *
  445. CALL DGEMM( 'No transpose', 'No transpose', I2, J3,
  446. $ JB, -ONE, AB( KV+1+JB, J ), LDAB-1,
  447. $ WORK13, LDWORK, ONE, AB( 1+JB, J+KV ),
  448. $ LDAB-1 )
  449. END IF
  450. *
  451. IF( I3.GT.0 ) THEN
  452. *
  453. * Update A33
  454. *
  455. CALL DGEMM( 'No transpose', 'No transpose', I3, J3,
  456. $ JB, -ONE, WORK31, LDWORK, WORK13,
  457. $ LDWORK, ONE, AB( 1+KL, J+KV ), LDAB-1 )
  458. END IF
  459. *
  460. * Copy the lower triangle of A13 back into place
  461. *
  462. DO 150 JJ = 1, J3
  463. DO 140 II = JJ, JB
  464. AB( II-JJ+1, JJ+J+KV-1 ) = WORK13( II, JJ )
  465. 140 CONTINUE
  466. 150 CONTINUE
  467. END IF
  468. ELSE
  469. *
  470. * Adjust the pivot indices.
  471. *
  472. DO 160 I = J, J + JB - 1
  473. IPIV( I ) = IPIV( I ) + J - 1
  474. 160 CONTINUE
  475. END IF
  476. *
  477. * Partially undo the interchanges in the current block to
  478. * restore the upper triangular form of A31 and copy the upper
  479. * triangle of A31 back into place
  480. *
  481. DO 170 JJ = J + JB - 1, J, -1
  482. JP = IPIV( JJ ) - JJ + 1
  483. IF( JP.NE.1 ) THEN
  484. *
  485. * Apply interchange to columns J to JJ-1
  486. *
  487. IF( JP+JJ-1.LT.J+KL ) THEN
  488. *
  489. * The interchange does not affect A31
  490. *
  491. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  492. $ AB( KV+JP+JJ-J, J ), LDAB-1 )
  493. ELSE
  494. *
  495. * The interchange does affect A31
  496. *
  497. CALL DSWAP( JJ-J, AB( KV+1+JJ-J, J ), LDAB-1,
  498. $ WORK31( JP+JJ-J-KL, 1 ), LDWORK )
  499. END IF
  500. END IF
  501. *
  502. * Copy the current column of A31 back into place
  503. *
  504. NW = MIN( I3, JJ-J+1 )
  505. IF( NW.GT.0 )
  506. $ CALL DCOPY( NW, WORK31( 1, JJ-J+1 ), 1,
  507. $ AB( KV+KL+1-JJ+J, JJ ), 1 )
  508. 170 CONTINUE
  509. 180 CONTINUE
  510. END IF
  511. *
  512. RETURN
  513. *
  514. * End of DGBTRF
  515. *
  516. END