You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

cgetrf.f 8.3 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. C> \brief \b CGETRF VARIANT: iterative version of Sivan Toledo's recursive LU algorithm
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. * Definition:
  9. * ===========
  10. *
  11. * SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
  12. *
  13. * .. Scalar Arguments ..
  14. * INTEGER INFO, LDA, M, N
  15. * ..
  16. * .. Array Arguments ..
  17. * INTEGER IPIV( * )
  18. * COMPLEX A( LDA, * )
  19. * ..
  20. *
  21. * Purpose
  22. * =======
  23. *
  24. C>\details \b Purpose:
  25. C>\verbatim
  26. C>
  27. C> CGETRF computes an LU factorization of a general M-by-N matrix A
  28. C> using partial pivoting with row interchanges.
  29. C>
  30. C> The factorization has the form
  31. C> A = P * L * U
  32. C> where P is a permutation matrix, L is lower triangular with unit
  33. C> diagonal elements (lower trapezoidal if m > n), and U is upper
  34. C> triangular (upper trapezoidal if m < n).
  35. C>
  36. C> This code implements an iterative version of Sivan Toledo's recursive
  37. C> LU algorithm[1]. For square matrices, this iterative versions should
  38. C> be within a factor of two of the optimum number of memory transfers.
  39. C>
  40. C> The pattern is as follows, with the large blocks of U being updated
  41. C> in one call to DTRSM, and the dotted lines denoting sections that
  42. C> have had all pending permutations applied:
  43. C>
  44. C> 1 2 3 4 5 6 7 8
  45. C> +-+-+---+-------+------
  46. C> | |1| | |
  47. C> |.+-+ 2 | |
  48. C> | | | | |
  49. C> |.|.+-+-+ 4 |
  50. C> | | | |1| |
  51. C> | | |.+-+ |
  52. C> | | | | | |
  53. C> |.|.|.|.+-+-+---+ 8
  54. C> | | | | | |1| |
  55. C> | | | | |.+-+ 2 |
  56. C> | | | | | | | |
  57. C> | | | | |.|.+-+-+
  58. C> | | | | | | | |1|
  59. C> | | | | | | |.+-+
  60. C> | | | | | | | | |
  61. C> |.|.|.|.|.|.|.|.+-----
  62. C> | | | | | | | | |
  63. C>
  64. C> The 1-2-1-4-1-2-1-8-... pattern is the position of the last 1 bit in
  65. C> the binary expansion of the current column. Each Schur update is
  66. C> applied as soon as the necessary portion of U is available.
  67. C>
  68. C> [1] Toledo, S. 1997. Locality of Reference in LU Decomposition with
  69. C> Partial Pivoting. SIAM J. Matrix Anal. Appl. 18, 4 (Oct. 1997),
  70. C> 1065-1081. http://dx.doi.org/10.1137/S0895479896297744
  71. C>
  72. C>\endverbatim
  73. *
  74. * Arguments:
  75. * ==========
  76. *
  77. C> \param[in] M
  78. C> \verbatim
  79. C> M is INTEGER
  80. C> The number of rows of the matrix A. M >= 0.
  81. C> \endverbatim
  82. C>
  83. C> \param[in] N
  84. C> \verbatim
  85. C> N is INTEGER
  86. C> The number of columns of the matrix A. N >= 0.
  87. C> \endverbatim
  88. C>
  89. C> \param[in,out] A
  90. C> \verbatim
  91. C> A is COMPLEX array, dimension (LDA,N)
  92. C> On entry, the M-by-N matrix to be factored.
  93. C> On exit, the factors L and U from the factorization
  94. C> A = P*L*U; the unit diagonal elements of L are not stored.
  95. C> \endverbatim
  96. C>
  97. C> \param[in] LDA
  98. C> \verbatim
  99. C> LDA is INTEGER
  100. C> The leading dimension of the array A. LDA >= max(1,M).
  101. C> \endverbatim
  102. C>
  103. C> \param[out] IPIV
  104. C> \verbatim
  105. C> IPIV is INTEGER array, dimension (min(M,N))
  106. C> The pivot indices; for 1 <= i <= min(M,N), row i of the
  107. C> matrix was interchanged with row IPIV(i).
  108. C> \endverbatim
  109. C>
  110. C> \param[out] INFO
  111. C> \verbatim
  112. C> INFO is INTEGER
  113. C> = 0: successful exit
  114. C> < 0: if INFO = -i, the i-th argument had an illegal value
  115. C> > 0: if INFO = i, U(i,i) is exactly zero. The factorization
  116. C> has been completed, but the factor U is exactly
  117. C> singular, and division by zero will occur if it is used
  118. C> to solve a system of equations.
  119. C> \endverbatim
  120. C>
  121. *
  122. * Authors:
  123. * ========
  124. *
  125. C> \author Univ. of Tennessee
  126. C> \author Univ. of California Berkeley
  127. C> \author Univ. of Colorado Denver
  128. C> \author NAG Ltd.
  129. *
  130. C> \date November 2011
  131. *
  132. C> \ingroup variantsGEcomputational
  133. *
  134. * =====================================================================
  135. SUBROUTINE CGETRF( M, N, A, LDA, IPIV, INFO )
  136. *
  137. * -- LAPACK computational routine (version 3.X) --
  138. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  139. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  140. * November 2011
  141. *
  142. * .. Scalar Arguments ..
  143. INTEGER INFO, LDA, M, N
  144. * ..
  145. * .. Array Arguments ..
  146. INTEGER IPIV( * )
  147. COMPLEX A( LDA, * )
  148. * ..
  149. *
  150. * =====================================================================
  151. *
  152. * .. Parameters ..
  153. COMPLEX ONE, NEGONE
  154. REAL ZERO
  155. PARAMETER ( ONE = (1.0E+0, 0.0E+0) )
  156. PARAMETER ( NEGONE = (-1.0E+0, 0.0E+0) )
  157. PARAMETER ( ZERO = 0.0E+0 )
  158. * ..
  159. * .. Local Scalars ..
  160. REAL SFMIN, PIVMAG
  161. COMPLEX TMP
  162. INTEGER I, J, JP, NSTEP, NTOPIV, NPIVED, KAHEAD
  163. INTEGER KSTART, IPIVSTART, JPIVSTART, KCOLS
  164. * ..
  165. * .. External Functions ..
  166. REAL SLAMCH
  167. INTEGER ICAMAX
  168. LOGICAL SISNAN
  169. EXTERNAL SLAMCH, ICAMAX, SISNAN
  170. * ..
  171. * .. External Subroutines ..
  172. EXTERNAL CTRSM, CSCAL, XERBLA, CLASWP
  173. * ..
  174. * .. Intrinsic Functions ..
  175. INTRINSIC MAX, MIN, IAND, ABS
  176. * ..
  177. * .. Executable Statements ..
  178. *
  179. * Test the input parameters.
  180. *
  181. INFO = 0
  182. IF( M.LT.0 ) THEN
  183. INFO = -1
  184. ELSE IF( N.LT.0 ) THEN
  185. INFO = -2
  186. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  187. INFO = -4
  188. END IF
  189. IF( INFO.NE.0 ) THEN
  190. CALL XERBLA( 'CGETRF', -INFO )
  191. RETURN
  192. END IF
  193. *
  194. * Quick return if possible
  195. *
  196. IF( M.EQ.0 .OR. N.EQ.0 )
  197. $ RETURN
  198. *
  199. * Compute machine safe minimum
  200. *
  201. SFMIN = SLAMCH( 'S' )
  202. *
  203. NSTEP = MIN( M, N )
  204. DO J = 1, NSTEP
  205. KAHEAD = IAND( J, -J )
  206. KSTART = J + 1 - KAHEAD
  207. KCOLS = MIN( KAHEAD, M-J )
  208. *
  209. * Find pivot.
  210. *
  211. JP = J - 1 + ICAMAX( M-J+1, A( J, J ), 1 )
  212. IPIV( J ) = JP
  213. * Permute just this column.
  214. IF (JP .NE. J) THEN
  215. TMP = A( J, J )
  216. A( J, J ) = A( JP, J )
  217. A( JP, J ) = TMP
  218. END IF
  219. * Apply pending permutations to L
  220. NTOPIV = 1
  221. IPIVSTART = J
  222. JPIVSTART = J - NTOPIV
  223. DO WHILE ( NTOPIV .LT. KAHEAD )
  224. CALL CLASWP( NTOPIV, A( 1, JPIVSTART ), LDA, IPIVSTART, J,
  225. $ IPIV, 1 )
  226. IPIVSTART = IPIVSTART - NTOPIV;
  227. NTOPIV = NTOPIV * 2;
  228. JPIVSTART = JPIVSTART - NTOPIV;
  229. END DO
  230. * Permute U block to match L
  231. CALL CLASWP( KCOLS, A( 1,J+1 ), LDA, KSTART, J, IPIV, 1 )
  232. * Factor the current column
  233. PIVMAG = ABS( A( J, J ) )
  234. IF( PIVMAG.NE.ZERO .AND. .NOT.SISNAN( PIVMAG ) ) THEN
  235. IF( PIVMAG .GE. SFMIN ) THEN
  236. CALL CSCAL( M-J, ONE / A( J, J ), A( J+1, J ), 1 )
  237. ELSE
  238. DO I = 1, M-J
  239. A( J+I, J ) = A( J+I, J ) / A( J, J )
  240. END DO
  241. END IF
  242. ELSE IF( PIVMAG .EQ. ZERO .AND. INFO .EQ. 0 ) THEN
  243. INFO = J
  244. END IF
  245. * Solve for U block.
  246. CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', KAHEAD,
  247. $ KCOLS, ONE, A( KSTART, KSTART ), LDA,
  248. $ A( KSTART, J+1 ), LDA )
  249. * Schur complement.
  250. CALL CGEMM( 'No transpose', 'No transpose', M-J,
  251. $ KCOLS, KAHEAD, NEGONE, A( J+1, KSTART ), LDA,
  252. $ A( KSTART, J+1 ), LDA, ONE, A( J+1, J+1 ), LDA )
  253. END DO
  254. * Handle pivot permutations on the way out of the recursion
  255. NPIVED = IAND( NSTEP, -NSTEP )
  256. J = NSTEP - NPIVED
  257. DO WHILE ( J .GT. 0 )
  258. NTOPIV = IAND( J, -J )
  259. CALL CLASWP( NTOPIV, A( 1, J-NTOPIV+1 ), LDA, J+1, NSTEP,
  260. $ IPIV, 1 )
  261. J = J - NTOPIV
  262. END DO
  263. * If short and wide, handle the rest of the columns.
  264. IF ( M .LT. N ) THEN
  265. CALL CLASWP( N-M, A( 1, M+KCOLS+1 ), LDA, 1, M, IPIV, 1 )
  266. CALL CTRSM( 'Left', 'Lower', 'No transpose', 'Unit', M,
  267. $ N-M, ONE, A, LDA, A( 1,M+KCOLS+1 ), LDA )
  268. END IF
  269. RETURN
  270. *
  271. * End of CGETRF
  272. *
  273. END