You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgeqrf.f 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. C> \brief \b SGEQRF VARIANT: left-looking Level 3 BLAS version of the algorithm.
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. * Definition:
  9. * ===========
  10. *
  11. * SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
  12. *
  13. * .. Scalar Arguments ..
  14. * INTEGER INFO, LDA, LWORK, M, N
  15. * ..
  16. * .. Array Arguments ..
  17. * REAL A( LDA, * ), TAU( * ), WORK( * )
  18. * ..
  19. *
  20. * Purpose
  21. * =======
  22. *
  23. C>\details \b Purpose:
  24. C>\verbatim
  25. C>
  26. C> SGEQRF computes a QR factorization of a real M-by-N matrix A:
  27. C> A = Q * R.
  28. C>
  29. C> This is the left-looking Level 3 BLAS version of the algorithm.
  30. C>
  31. C>\endverbatim
  32. *
  33. * Arguments:
  34. * ==========
  35. *
  36. C> \param[in] M
  37. C> \verbatim
  38. C> M is INTEGER
  39. C> The number of rows of the matrix A. M >= 0.
  40. C> \endverbatim
  41. C>
  42. C> \param[in] N
  43. C> \verbatim
  44. C> N is INTEGER
  45. C> The number of columns of the matrix A. N >= 0.
  46. C> \endverbatim
  47. C>
  48. C> \param[in,out] A
  49. C> \verbatim
  50. C> A is REAL array, dimension (LDA,N)
  51. C> On entry, the M-by-N matrix A.
  52. C> On exit, the elements on and above the diagonal of the array
  53. C> contain the min(M,N)-by-N upper trapezoidal matrix R (R is
  54. C> upper triangular if m >= n); the elements below the diagonal,
  55. C> with the array TAU, represent the orthogonal matrix Q as a
  56. C> product of min(m,n) elementary reflectors (see Further
  57. C> Details).
  58. C> \endverbatim
  59. C>
  60. C> \param[in] LDA
  61. C> \verbatim
  62. C> LDA is INTEGER
  63. C> The leading dimension of the array A. LDA >= max(1,M).
  64. C> \endverbatim
  65. C>
  66. C> \param[out] TAU
  67. C> \verbatim
  68. C> TAU is REAL array, dimension (min(M,N))
  69. C> The scalar factors of the elementary reflectors (see Further
  70. C> Details).
  71. C> \endverbatim
  72. C>
  73. C> \param[out] WORK
  74. C> \verbatim
  75. C> WORK is REAL array, dimension (MAX(1,LWORK))
  76. C> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  77. C> \endverbatim
  78. C>
  79. C> \param[in] LWORK
  80. C> \verbatim
  81. C> LWORK is INTEGER
  82. C> \endverbatim
  83. C> \verbatim
  84. C> The dimension of the array WORK. LWORK >= 1 if MIN(M,N) = 0,
  85. C> otherwise the dimension can be divided into three parts.
  86. C> \endverbatim
  87. C> \verbatim
  88. C> 1) The part for the triangular factor T. If the very last T is not bigger
  89. C> than any of the rest, then this part is NB x ceiling(K/NB), otherwise,
  90. C> NB x (K-NT), where K = min(M,N) and NT is the dimension of the very last T
  91. C> \endverbatim
  92. C> \verbatim
  93. C> 2) The part for the very last T when T is bigger than any of the rest T.
  94. C> The size of this part is NT x NT, where NT = K - ceiling ((K-NX)/NB) x NB,
  95. C> where K = min(M,N), NX is calculated by
  96. C> NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
  97. C> \endverbatim
  98. C> \verbatim
  99. C> 3) The part for dlarfb is of size max((N-M)*K, (N-M)*NB, K*NB, NB*NB)
  100. C> \endverbatim
  101. C> \verbatim
  102. C> So LWORK = part1 + part2 + part3
  103. C> \endverbatim
  104. C> \verbatim
  105. C> If LWORK = -1, then a workspace query is assumed; the routine
  106. C> only calculates the optimal size of the WORK array, returns
  107. C> this value as the first entry of the WORK array, and no error
  108. C> message related to LWORK is issued by XERBLA.
  109. C> \endverbatim
  110. C>
  111. C> \param[out] INFO
  112. C> \verbatim
  113. C> INFO is INTEGER
  114. C> = 0: successful exit
  115. C> < 0: if INFO = -i, the i-th argument had an illegal value
  116. C> \endverbatim
  117. C>
  118. *
  119. * Authors:
  120. * ========
  121. *
  122. C> \author Univ. of Tennessee
  123. C> \author Univ. of California Berkeley
  124. C> \author Univ. of Colorado Denver
  125. C> \author NAG Ltd.
  126. *
  127. C> \date December 2016
  128. *
  129. C> \ingroup variantsGEcomputational
  130. *
  131. * Further Details
  132. * ===============
  133. C>\details \b Further \b Details
  134. C> \verbatim
  135. C>
  136. C> The matrix Q is represented as a product of elementary reflectors
  137. C>
  138. C> Q = H(1) H(2) . . . H(k), where k = min(m,n).
  139. C>
  140. C> Each H(i) has the form
  141. C>
  142. C> H(i) = I - tau * v * v'
  143. C>
  144. C> where tau is a real scalar, and v is a real vector with
  145. C> v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
  146. C> and tau in TAU(i).
  147. C>
  148. C> \endverbatim
  149. C>
  150. * =====================================================================
  151. SUBROUTINE SGEQRF ( M, N, A, LDA, TAU, WORK, LWORK, INFO )
  152. *
  153. * -- LAPACK computational routine --
  154. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  155. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  156. *
  157. * .. Scalar Arguments ..
  158. INTEGER INFO, LDA, LWORK, M, N
  159. * ..
  160. * .. Array Arguments ..
  161. REAL A( LDA, * ), TAU( * ), WORK( * )
  162. * ..
  163. *
  164. * =====================================================================
  165. *
  166. * .. Local Scalars ..
  167. LOGICAL LQUERY
  168. INTEGER I, IB, IINFO, IWS, J, K, LWKOPT, NB,
  169. $ NBMIN, NX, LBWORK, NT, LLWORK
  170. * ..
  171. * .. External Subroutines ..
  172. EXTERNAL SGEQR2, SLARFB, SLARFT, XERBLA
  173. * ..
  174. * .. Intrinsic Functions ..
  175. INTRINSIC CEILING, MAX, MIN, REAL
  176. * ..
  177. * .. External Functions ..
  178. INTEGER ILAENV
  179. DOUBLE PRECISION DROUNDUP_LWORK
  180. EXTERNAL ILAENV, DROUNDUP_LWORK
  181. * ..
  182. * .. Executable Statements ..
  183. INFO = 0
  184. NBMIN = 2
  185. NX = 0
  186. IWS = N
  187. K = MIN( M, N )
  188. NB = ILAENV( 1, 'SGEQRF', ' ', M, N, -1, -1 )
  189. IF( NB.GT.1 .AND. NB.LT.K ) THEN
  190. *
  191. * Determine when to cross over from blocked to unblocked code.
  192. *
  193. NX = MAX( 0, ILAENV( 3, 'SGEQRF', ' ', M, N, -1, -1 ) )
  194. END IF
  195. *
  196. * Get NT, the size of the very last T, which is the left-over from in-between K-NX and K to K, eg.:
  197. *
  198. * NB=3 2NB=6 K=10
  199. * | | |
  200. * 1--2--3--4--5--6--7--8--9--10
  201. * | \________/
  202. * K-NX=5 NT=4
  203. *
  204. * So here 4 x 4 is the last T stored in the workspace
  205. *
  206. NT = K-CEILING(REAL(K-NX)/REAL(NB))*NB
  207. *
  208. * optimal workspace = space for dlarfb + space for normal T's + space for the last T
  209. *
  210. LLWORK = MAX (MAX((N-M)*K, (N-M)*NB), MAX(K*NB, NB*NB))
  211. LLWORK = CEILING(REAL(LLWORK)/REAL(NB))
  212. IF( K.EQ.0 ) THEN
  213. LBWORK = 0
  214. LWKOPT = 1
  215. WORK( 1 ) = LWKOPT
  216. ELSE IF ( NT.GT.NB ) THEN
  217. LBWORK = K-NT
  218. *
  219. * Optimal workspace for dlarfb = MAX(1,N)*NT
  220. *
  221. LWKOPT = (LBWORK+LLWORK)*NB
  222. WORK( 1 ) = DROUNDUP_LWORK(LWKOPT+NT*NT)
  223. ELSE
  224. LBWORK = CEILING(REAL(K)/REAL(NB))*NB
  225. LWKOPT = (LBWORK+LLWORK-NB)*NB
  226. WORK( 1 ) = DROUNDUP_LWORK(LWKOPT)
  227. END IF
  228. *
  229. * Test the input arguments
  230. *
  231. LQUERY = ( LWORK.EQ.-1 )
  232. IF( M.LT.0 ) THEN
  233. INFO = -1
  234. ELSE IF( N.LT.0 ) THEN
  235. INFO = -2
  236. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  237. INFO = -4
  238. ELSE IF ( .NOT.LQUERY ) THEN
  239. IF( LWORK.LE.0 .OR. ( M.GT.0 .AND. LWORK.LT.MAX( 1, N ) ) )
  240. $ INFO = -7
  241. END IF
  242. IF( INFO.NE.0 ) THEN
  243. CALL XERBLA( 'SGEQRF', -INFO )
  244. RETURN
  245. ELSE IF( LQUERY ) THEN
  246. RETURN
  247. END IF
  248. *
  249. * Quick return if possible
  250. *
  251. IF( K.EQ.0 ) THEN
  252. RETURN
  253. END IF
  254. *
  255. IF( NB.GT.1 .AND. NB.LT.K ) THEN
  256. IF( NX.LT.K ) THEN
  257. *
  258. * Determine if workspace is large enough for blocked code.
  259. *
  260. IF ( NT.LE.NB ) THEN
  261. IWS = (LBWORK+LLWORK-NB)*NB
  262. ELSE
  263. IWS = (LBWORK+LLWORK)*NB+NT*NT
  264. END IF
  265. IF( LWORK.LT.IWS ) THEN
  266. *
  267. * Not enough workspace to use optimal NB: reduce NB and
  268. * determine the minimum value of NB.
  269. *
  270. IF ( NT.LE.NB ) THEN
  271. NB = LWORK / (LLWORK+(LBWORK-NB))
  272. ELSE
  273. NB = (LWORK-NT*NT)/(LBWORK+LLWORK)
  274. END IF
  275. NBMIN = MAX( 2, ILAENV( 2, 'SGEQRF', ' ', M, N, -1,
  276. $ -1 ) )
  277. END IF
  278. END IF
  279. END IF
  280. *
  281. IF( NB.GE.NBMIN .AND. NB.LT.K .AND. NX.LT.K ) THEN
  282. *
  283. * Use blocked code initially
  284. *
  285. DO 10 I = 1, K - NX, NB
  286. IB = MIN( K-I+1, NB )
  287. *
  288. * Update the current column using old T's
  289. *
  290. DO 20 J = 1, I - NB, NB
  291. *
  292. * Apply H' to A(J:M,I:I+IB-1) from the left
  293. *
  294. CALL SLARFB( 'Left', 'Transpose', 'Forward',
  295. $ 'Columnwise', M-J+1, IB, NB,
  296. $ A( J, J ), LDA, WORK(J), LBWORK,
  297. $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
  298. $ IB)
  299. 20 CONTINUE
  300. *
  301. * Compute the QR factorization of the current block
  302. * A(I:M,I:I+IB-1)
  303. *
  304. CALL SGEQR2( M-I+1, IB, A( I, I ), LDA, TAU( I ),
  305. $ WORK(LBWORK*NB+NT*NT+1), IINFO )
  306. IF( I+IB.LE.N ) THEN
  307. *
  308. * Form the triangular factor of the block reflector
  309. * H = H(i) H(i+1) . . . H(i+ib-1)
  310. *
  311. CALL SLARFT( 'Forward', 'Columnwise', M-I+1, IB,
  312. $ A( I, I ), LDA, TAU( I ),
  313. $ WORK(I), LBWORK )
  314. *
  315. END IF
  316. 10 CONTINUE
  317. ELSE
  318. I = 1
  319. END IF
  320. *
  321. * Use unblocked code to factor the last or only block.
  322. *
  323. IF( I.LE.K ) THEN
  324. IF ( I .NE. 1 ) THEN
  325. DO 30 J = 1, I - NB, NB
  326. *
  327. * Apply H' to A(J:M,I:K) from the left
  328. *
  329. CALL SLARFB( 'Left', 'Transpose', 'Forward',
  330. $ 'Columnwise', M-J+1, K-I+1, NB,
  331. $ A( J, J ), LDA, WORK(J), LBWORK,
  332. $ A( J, I ), LDA, WORK(LBWORK*NB+NT*NT+1),
  333. $ K-I+1)
  334. 30 CONTINUE
  335. CALL SGEQR2( M-I+1, K-I+1, A( I, I ), LDA, TAU( I ),
  336. $ WORK(LBWORK*NB+NT*NT+1),IINFO )
  337. ELSE
  338. *
  339. * Use unblocked code to factor the last or only block.
  340. *
  341. CALL SGEQR2( M-I+1, N-I+1, A( I, I ), LDA, TAU( I ),
  342. $ WORK,IINFO )
  343. END IF
  344. END IF
  345. *
  346. * Apply update to the column M+1:N when N > M
  347. *
  348. IF ( M.LT.N .AND. I.NE.1) THEN
  349. *
  350. * Form the last triangular factor of the block reflector
  351. * H = H(i) H(i+1) . . . H(i+ib-1)
  352. *
  353. IF ( NT .LE. NB ) THEN
  354. CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
  355. $ A( I, I ), LDA, TAU( I ), WORK(I), LBWORK )
  356. ELSE
  357. CALL SLARFT( 'Forward', 'Columnwise', M-I+1, K-I+1,
  358. $ A( I, I ), LDA, TAU( I ),
  359. $ WORK(LBWORK*NB+1), NT )
  360. END IF
  361. *
  362. * Apply H' to A(1:M,M+1:N) from the left
  363. *
  364. DO 40 J = 1, K-NX, NB
  365. IB = MIN( K-J+1, NB )
  366. CALL SLARFB( 'Left', 'Transpose', 'Forward',
  367. $ 'Columnwise', M-J+1, N-M, IB,
  368. $ A( J, J ), LDA, WORK(J), LBWORK,
  369. $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
  370. $ N-M)
  371. 40 CONTINUE
  372. IF ( NT.LE.NB ) THEN
  373. CALL SLARFB( 'Left', 'Transpose', 'Forward',
  374. $ 'Columnwise', M-J+1, N-M, K-J+1,
  375. $ A( J, J ), LDA, WORK(J), LBWORK,
  376. $ A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
  377. $ N-M)
  378. ELSE
  379. CALL SLARFB( 'Left', 'Transpose', 'Forward',
  380. $ 'Columnwise', M-J+1, N-M, K-J+1,
  381. $ A( J, J ), LDA,
  382. $ WORK(LBWORK*NB+1),
  383. $ NT, A( J, M+1 ), LDA, WORK(LBWORK*NB+NT*NT+1),
  384. $ N-M)
  385. END IF
  386. END IF
  387. WORK( 1 ) = DROUNDUP_LWORK(IWS)
  388. RETURN
  389. *
  390. * End of SGEQRF
  391. *
  392. END