You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dlaorhr_col_getrfnp2.f 8.9 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. *> \brief \b DLAORHR_COL_GETRFNP2
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download DLAORHR_GETRF2NP + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dlaorhr_col_getrfnp2.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
  22. *
  23. * .. Scalar Arguments ..
  24. * INTEGER INFO, LDA, M, N
  25. * ..
  26. * .. Array Arguments ..
  27. * DOUBLE PRECISION A( LDA, * ), D( * )
  28. * ..
  29. *
  30. *
  31. *> \par Purpose:
  32. * =============
  33. *>
  34. *> \verbatim
  35. *>
  36. *> DLAORHR_COL_GETRFNP2 computes the modified LU factorization without
  37. *> pivoting of a real general M-by-N matrix A. The factorization has
  38. *> the form:
  39. *>
  40. *> A - S = L * U,
  41. *>
  42. *> where:
  43. *> S is a m-by-n diagonal sign matrix with the diagonal D, so that
  44. *> D(i) = S(i,i), 1 <= i <= min(M,N). The diagonal D is constructed
  45. *> as D(i)=-SIGN(A(i,i)), where A(i,i) is the value after performing
  46. *> i-1 steps of Gaussian elimination. This means that the diagonal
  47. *> element at each step of "modified" Gaussian elimination is at
  48. *> least one in absolute value (so that division-by-zero not
  49. *> possible during the division by the diagonal element);
  50. *>
  51. *> L is a M-by-N lower triangular matrix with unit diagonal elements
  52. *> (lower trapezoidal if M > N);
  53. *>
  54. *> and U is a M-by-N upper triangular matrix
  55. *> (upper trapezoidal if M < N).
  56. *>
  57. *> This routine is an auxiliary routine used in the Householder
  58. *> reconstruction routine DORHR_COL. In DORHR_COL, this routine is
  59. *> applied to an M-by-N matrix A with orthonormal columns, where each
  60. *> element is bounded by one in absolute value. With the choice of
  61. *> the matrix S above, one can show that the diagonal element at each
  62. *> step of Gaussian elimination is the largest (in absolute value) in
  63. *> the column on or below the diagonal, so that no pivoting is required
  64. *> for numerical stability [1].
  65. *>
  66. *> For more details on the Householder reconstruction algorithm,
  67. *> including the modified LU factorization, see [1].
  68. *>
  69. *> This is the recursive version of the LU factorization algorithm.
  70. *> Denote A - S by B. The algorithm divides the matrix B into four
  71. *> submatrices:
  72. *>
  73. *> [ B11 | B12 ] where B11 is n1 by n1,
  74. *> B = [ -----|----- ] B21 is (m-n1) by n1,
  75. *> [ B21 | B22 ] B12 is n1 by n2,
  76. *> B22 is (m-n1) by n2,
  77. *> with n1 = min(m,n)/2, n2 = n-n1.
  78. *>
  79. *>
  80. *> The subroutine calls itself to factor B11, solves for B21,
  81. *> solves for B12, updates B22, then calls itself to factor B22.
  82. *>
  83. *> For more details on the recursive LU algorithm, see [2].
  84. *>
  85. *> DLAORHR_COL_GETRFNP2 is called to factorize a block by the blocked
  86. *> routine DLAORHR_COL_GETRFNP, which uses blocked code calling
  87. *. Level 3 BLAS to update the submatrix. However, DLAORHR_COL_GETRFNP2
  88. *> is self-sufficient and can be used without DLAORHR_COL_GETRFNP.
  89. *>
  90. *> [1] "Reconstructing Householder vectors from tall-skinny QR",
  91. *> G. Ballard, J. Demmel, L. Grigori, M. Jacquelin, H.D. Nguyen,
  92. *> E. Solomonik, J. Parallel Distrib. Comput.,
  93. *> vol. 85, pp. 3-31, 2015.
  94. *>
  95. *> [2] "Recursion leads to automatic variable blocking for dense linear
  96. *> algebra algorithms", F. Gustavson, IBM J. of Res. and Dev.,
  97. *> vol. 41, no. 6, pp. 737-755, 1997.
  98. *> \endverbatim
  99. *
  100. * Arguments:
  101. * ==========
  102. *
  103. *> \param[in] M
  104. *> \verbatim
  105. *> M is INTEGER
  106. *> The number of rows of the matrix A. M >= 0.
  107. *> \endverbatim
  108. *>
  109. *> \param[in] N
  110. *> \verbatim
  111. *> N is INTEGER
  112. *> The number of columns of the matrix A. N >= 0.
  113. *> \endverbatim
  114. *>
  115. *> \param[in,out] A
  116. *> \verbatim
  117. *> A is DOUBLE PRECISION array, dimension (LDA,N)
  118. *> On entry, the M-by-N matrix to be factored.
  119. *> On exit, the factors L and U from the factorization
  120. *> A-S=L*U; the unit diagonal elements of L are not stored.
  121. *> \endverbatim
  122. *>
  123. *> \param[in] LDA
  124. *> \verbatim
  125. *> LDA is INTEGER
  126. *> The leading dimension of the array A. LDA >= max(1,M).
  127. *> \endverbatim
  128. *>
  129. *> \param[out] D
  130. *> \verbatim
  131. *> D is DOUBLE PRECISION array, dimension min(M,N)
  132. *> The diagonal elements of the diagonal M-by-N sign matrix S,
  133. *> D(i) = S(i,i), where 1 <= i <= min(M,N). The elements can
  134. *> be only plus or minus one.
  135. *> \endverbatim
  136. *>
  137. *> \param[out] INFO
  138. *> \verbatim
  139. *> INFO is INTEGER
  140. *> = 0: successful exit
  141. *> < 0: if INFO = -i, the i-th argument had an illegal value
  142. *> \endverbatim
  143. *>
  144. * Authors:
  145. * ========
  146. *
  147. *> \author Univ. of Tennessee
  148. *> \author Univ. of California Berkeley
  149. *> \author Univ. of Colorado Denver
  150. *> \author NAG Ltd.
  151. *
  152. *> \date November 2019
  153. *
  154. *> \ingroup doubleGEcomputational
  155. *
  156. *> \par Contributors:
  157. * ==================
  158. *>
  159. *> \verbatim
  160. *>
  161. *> November 2019, Igor Kozachenko,
  162. *> Computer Science Division,
  163. *> University of California, Berkeley
  164. *>
  165. *> \endverbatim
  166. *
  167. * =====================================================================
  168. RECURSIVE SUBROUTINE DLAORHR_COL_GETRFNP2( M, N, A, LDA, D, INFO )
  169. IMPLICIT NONE
  170. *
  171. * -- LAPACK computational routine (version 3.9.0) --
  172. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  173. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  174. * November 2019
  175. *
  176. * .. Scalar Arguments ..
  177. INTEGER INFO, LDA, M, N
  178. * ..
  179. * .. Array Arguments ..
  180. DOUBLE PRECISION A( LDA, * ), D( * )
  181. * ..
  182. *
  183. * =====================================================================
  184. *
  185. * .. Parameters ..
  186. DOUBLE PRECISION ONE
  187. PARAMETER ( ONE = 1.0D+0 )
  188. * ..
  189. * .. Local Scalars ..
  190. DOUBLE PRECISION SFMIN
  191. INTEGER I, IINFO, N1, N2
  192. * ..
  193. * .. External Functions ..
  194. DOUBLE PRECISION DLAMCH
  195. EXTERNAL DLAMCH
  196. * ..
  197. * .. External Subroutines ..
  198. EXTERNAL DGEMM, DSCAL, DTRSM, XERBLA
  199. * ..
  200. * .. Intrinsic Functions ..
  201. INTRINSIC ABS, DSIGN, MAX, MIN
  202. * ..
  203. * .. Executable Statements ..
  204. *
  205. * Test the input parameters
  206. *
  207. INFO = 0
  208. IF( M.LT.0 ) THEN
  209. INFO = -1
  210. ELSE IF( N.LT.0 ) THEN
  211. INFO = -2
  212. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  213. INFO = -4
  214. END IF
  215. IF( INFO.NE.0 ) THEN
  216. CALL XERBLA( 'DLAORHR_COL_GETRFNP2', -INFO )
  217. RETURN
  218. END IF
  219. *
  220. * Quick return if possible
  221. *
  222. IF( MIN( M, N ).EQ.0 )
  223. $ RETURN
  224. IF ( M.EQ.1 ) THEN
  225. *
  226. * One row case, (also recursion termination case),
  227. * use unblocked code
  228. *
  229. * Transfer the sign
  230. *
  231. D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
  232. *
  233. * Construct the row of U
  234. *
  235. A( 1, 1 ) = A( 1, 1 ) - D( 1 )
  236. *
  237. ELSE IF( N.EQ.1 ) THEN
  238. *
  239. * One column case, (also recursion termination case),
  240. * use unblocked code
  241. *
  242. * Transfer the sign
  243. *
  244. D( 1 ) = -DSIGN( ONE, A( 1, 1 ) )
  245. *
  246. * Construct the row of U
  247. *
  248. A( 1, 1 ) = A( 1, 1 ) - D( 1 )
  249. *
  250. * Scale the elements 2:M of the column
  251. *
  252. * Determine machine safe minimum
  253. *
  254. SFMIN = DLAMCH('S')
  255. *
  256. * Construct the subdiagonal elements of L
  257. *
  258. IF( ABS( A( 1, 1 ) ) .GE. SFMIN ) THEN
  259. CALL DSCAL( M-1, ONE / A( 1, 1 ), A( 2, 1 ), 1 )
  260. ELSE
  261. DO I = 2, M
  262. A( I, 1 ) = A( I, 1 ) / A( 1, 1 )
  263. END DO
  264. END IF
  265. *
  266. ELSE
  267. *
  268. * Divide the matrix B into four submatrices
  269. *
  270. N1 = MIN( M, N ) / 2
  271. N2 = N-N1
  272. *
  273. * Factor B11, recursive call
  274. *
  275. CALL DLAORHR_COL_GETRFNP2( N1, N1, A, LDA, D, IINFO )
  276. *
  277. * Solve for B21
  278. *
  279. CALL DTRSM( 'R', 'U', 'N', 'N', M-N1, N1, ONE, A, LDA,
  280. $ A( N1+1, 1 ), LDA )
  281. *
  282. * Solve for B12
  283. *
  284. CALL DTRSM( 'L', 'L', 'N', 'U', N1, N2, ONE, A, LDA,
  285. $ A( 1, N1+1 ), LDA )
  286. *
  287. * Update B22, i.e. compute the Schur complement
  288. * B22 := B22 - B21*B12
  289. *
  290. CALL DGEMM( 'N', 'N', M-N1, N2, N1, -ONE, A( N1+1, 1 ), LDA,
  291. $ A( 1, N1+1 ), LDA, ONE, A( N1+1, N1+1 ), LDA )
  292. *
  293. * Factor B22, recursive call
  294. *
  295. CALL DLAORHR_COL_GETRFNP2( M-N1, N2, A( N1+1, N1+1 ), LDA,
  296. $ D( N1+1 ), IINFO )
  297. *
  298. END IF
  299. RETURN
  300. *
  301. * End of DLAORHR_COL_GETRFNP2
  302. *
  303. END