You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

slamswlq.f 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. *> \brief \b SLAMSWLQ
  2. *
  3. * Definition:
  4. * ===========
  5. *
  6. * SUBROUTINE SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T,
  7. * $ LDT, C, LDC, WORK, LWORK, INFO )
  8. *
  9. *
  10. * .. Scalar Arguments ..
  11. * CHARACTER SIDE, TRANS
  12. * INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC
  13. * ..
  14. * .. Array Arguments ..
  15. * DOUBLE A( LDA, * ), WORK( * ), C(LDC, * ),
  16. * $ T( LDT, * )
  17. *> \par Purpose:
  18. * =============
  19. *>
  20. *> \verbatim
  21. *>
  22. *> SLAMSWLQ overwrites the general real M-by-N matrix C with
  23. *>
  24. *>
  25. *> SIDE = 'L' SIDE = 'R'
  26. *> TRANS = 'N': Q * C C * Q
  27. *> TRANS = 'T': Q**T * C C * Q**T
  28. *> where Q is a real orthogonal matrix defined as the product of blocked
  29. *> elementary reflectors computed by short wide LQ
  30. *> factorization (SLASWLQ)
  31. *> \endverbatim
  32. *
  33. * Arguments:
  34. * ==========
  35. *
  36. *> \param[in] SIDE
  37. *> \verbatim
  38. *> SIDE is CHARACTER*1
  39. *> = 'L': apply Q or Q**T from the Left;
  40. *> = 'R': apply Q or Q**T from the Right.
  41. *> \endverbatim
  42. *>
  43. *> \param[in] TRANS
  44. *> \verbatim
  45. *> TRANS is CHARACTER*1
  46. *> = 'N': No transpose, apply Q;
  47. *> = 'T': Transpose, apply Q**T.
  48. *> \endverbatim
  49. *>
  50. *> \param[in] M
  51. *> \verbatim
  52. *> M is INTEGER
  53. *> The number of rows of the matrix C. M >=0.
  54. *> \endverbatim
  55. *>
  56. *> \param[in] N
  57. *> \verbatim
  58. *> N is INTEGER
  59. *> The number of columns of the matrix C. N >= 0.
  60. *> \endverbatim
  61. *>
  62. *> \param[in] K
  63. *> \verbatim
  64. *> K is INTEGER
  65. *> The number of elementary reflectors whose product defines
  66. *> the matrix Q.
  67. *> M >= K >= 0;
  68. *>
  69. *> \endverbatim
  70. *> \param[in] MB
  71. *> \verbatim
  72. *> MB is INTEGER
  73. *> The row block size to be used in the blocked LQ.
  74. *> M >= MB >= 1
  75. *> \endverbatim
  76. *>
  77. *> \param[in] NB
  78. *> \verbatim
  79. *> NB is INTEGER
  80. *> The column block size to be used in the blocked LQ.
  81. *> NB > M.
  82. *> \endverbatim
  83. *>
  84. *> \param[in] A
  85. *> \verbatim
  86. *> A is REAL array, dimension
  87. *> (LDA,M) if SIDE = 'L',
  88. *> (LDA,N) if SIDE = 'R'
  89. *> The i-th row must contain the vector which defines the blocked
  90. *> elementary reflector H(i), for i = 1,2,...,k, as returned by
  91. *> SLASWLQ in the first k rows of its array argument A.
  92. *> \endverbatim
  93. *>
  94. *> \param[in] LDA
  95. *> \verbatim
  96. *> LDA is INTEGER
  97. *> The leading dimension of the array A. LDA >= max(1,K).
  98. *> \endverbatim
  99. *>
  100. *> \param[in] T
  101. *> \verbatim
  102. *> T is REAL array, dimension
  103. *> ( M * Number of blocks(CEIL(N-K/NB-K)),
  104. *> The blocked upper triangular block reflectors stored in compact form
  105. *> as a sequence of upper triangular blocks. See below
  106. *> for further details.
  107. *> \endverbatim
  108. *>
  109. *> \param[in] LDT
  110. *> \verbatim
  111. *> LDT is INTEGER
  112. *> The leading dimension of the array T. LDT >= MB.
  113. *> \endverbatim
  114. *>
  115. *> \param[in,out] C
  116. *> \verbatim
  117. *> C is REAL array, dimension (LDC,N)
  118. *> On entry, the M-by-N matrix C.
  119. *> On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
  120. *> \endverbatim
  121. *>
  122. *> \param[in] LDC
  123. *> \verbatim
  124. *> LDC is INTEGER
  125. *> The leading dimension of the array C. LDC >= max(1,M).
  126. *> \endverbatim
  127. *>
  128. *> \param[out] WORK
  129. *> \verbatim
  130. *> (workspace) REAL array, dimension (MAX(1,LWORK))
  131. *> On exit, if INFO = 0, WORK(1) returns the minimal LWORK.
  132. *> \endverbatim
  133. *>
  134. *> \param[in] LWORK
  135. *> \verbatim
  136. *> LWORK is INTEGER
  137. *> The dimension of the array WORK.
  138. *>
  139. *> If MIN(M,N,K) = 0, LWORK >= 1.
  140. *> If SIDE = 'L', LWORK >= max(1,NB*MB).
  141. *> If SIDE = 'R', LWORK >= max(1,M*MB).
  142. *> If LWORK = -1, then a workspace query is assumed; the routine
  143. *> only calculates the minimal size of the WORK array, returns
  144. *> this value as the first entry of the WORK array, and no error
  145. *> message related to LWORK is issued by XERBLA.
  146. *> \endverbatim
  147. *>
  148. *> \param[out] INFO
  149. *> \verbatim
  150. *> INFO is INTEGER
  151. *> = 0: successful exit
  152. *> < 0: if INFO = -i, the i-th argument had an illegal value
  153. *> \endverbatim
  154. *
  155. * Authors:
  156. * ========
  157. *
  158. *> \author Univ. of Tennessee
  159. *> \author Univ. of California Berkeley
  160. *> \author Univ. of Colorado Denver
  161. *> \author NAG Ltd.
  162. *
  163. *> \par Further Details:
  164. * =====================
  165. *>
  166. *> \verbatim
  167. *> Short-Wide LQ (SWLQ) performs LQ by a sequence of orthogonal transformations,
  168. *> representing Q as a product of other orthogonal matrices
  169. *> Q = Q(1) * Q(2) * . . . * Q(k)
  170. *> where each Q(i) zeros out upper diagonal entries of a block of NB rows of A:
  171. *> Q(1) zeros out the upper diagonal entries of rows 1:NB of A
  172. *> Q(2) zeros out the bottom MB-N rows of rows [1:M,NB+1:2*NB-M] of A
  173. *> Q(3) zeros out the bottom MB-N rows of rows [1:M,2*NB-M+1:3*NB-2*M] of A
  174. *> . . .
  175. *>
  176. *> Q(1) is computed by GELQT, which represents Q(1) by Householder vectors
  177. *> stored under the diagonal of rows 1:MB of A, and by upper triangular
  178. *> block reflectors, stored in array T(1:LDT,1:N).
  179. *> For more information see Further Details in GELQT.
  180. *>
  181. *> Q(i) for i>1 is computed by TPLQT, which represents Q(i) by Householder vectors
  182. *> stored in columns [(i-1)*(NB-M)+M+1:i*(NB-M)+M] of A, and by upper triangular
  183. *> block reflectors, stored in array T(1:LDT,(i-1)*M+1:i*M).
  184. *> The last Q(k) may use fewer rows.
  185. *> For more information see Further Details in TPLQT.
  186. *>
  187. *> For more details of the overall algorithm, see the description of
  188. *> Sequential TSQR in Section 2.2 of [1].
  189. *>
  190. *> [1] “Communication-Optimal Parallel and Sequential QR and LU Factorizations,”
  191. *> J. Demmel, L. Grigori, M. Hoemmen, J. Langou,
  192. *> SIAM J. Sci. Comput, vol. 34, no. 1, 2012
  193. *> \endverbatim
  194. *>
  195. *> \ingroup lamswlq
  196. *>
  197. * =====================================================================
  198. SUBROUTINE SLAMSWLQ( SIDE, TRANS, M, N, K, MB, NB, A, LDA, T,
  199. $ LDT, C, LDC, WORK, LWORK, INFO )
  200. *
  201. * -- LAPACK computational routine --
  202. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  203. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  204. *
  205. * .. Scalar Arguments ..
  206. CHARACTER SIDE, TRANS
  207. INTEGER INFO, LDA, M, N, K, MB, NB, LDT, LWORK, LDC
  208. * ..
  209. * .. Array Arguments ..
  210. REAL A( LDA, * ), WORK( * ), C( LDC, * ),
  211. $ T( LDT, * )
  212. * ..
  213. *
  214. * =====================================================================
  215. *
  216. * ..
  217. * .. Local Scalars ..
  218. LOGICAL LEFT, RIGHT, TRAN, NOTRAN, LQUERY
  219. INTEGER I, II, KK, LW, CTR, MINMNK, LWMIN
  220. * ..
  221. * .. External Functions ..
  222. LOGICAL LSAME
  223. EXTERNAL LSAME
  224. REAL SROUNDUP_LWORK
  225. EXTERNAL SROUNDUP_LWORK
  226. * ..
  227. * .. External Subroutines ..
  228. EXTERNAL STPMLQT, SGEMLQT, XERBLA
  229. * ..
  230. * .. Executable Statements ..
  231. *
  232. * Test the input arguments
  233. *
  234. LQUERY = ( LWORK.EQ.-1 )
  235. NOTRAN = LSAME( TRANS, 'N' )
  236. TRAN = LSAME( TRANS, 'T' )
  237. LEFT = LSAME( SIDE, 'L' )
  238. RIGHT = LSAME( SIDE, 'R' )
  239. IF( LEFT ) THEN
  240. LW = N * MB
  241. ELSE
  242. LW = M * MB
  243. END IF
  244. *
  245. MINMNK = MIN( M, N, K )
  246. IF( MINMNK.EQ.0 ) THEN
  247. LWMIN = 1
  248. ELSE
  249. LWMIN = MAX( 1, LW )
  250. END IF
  251. *
  252. INFO = 0
  253. IF( .NOT.LEFT .AND. .NOT.RIGHT ) THEN
  254. INFO = -1
  255. ELSE IF( .NOT.TRAN .AND. .NOT.NOTRAN ) THEN
  256. INFO = -2
  257. ELSE IF( K.LT.0 ) THEN
  258. INFO = -5
  259. ELSE IF( M.LT.K ) THEN
  260. INFO = -3
  261. ELSE IF( N.LT.0 ) THEN
  262. INFO = -4
  263. ELSE IF( K.LT.MB .OR. MB.LT.1 ) THEN
  264. INFO = -6
  265. ELSE IF( LDA.LT.MAX( 1, K ) ) THEN
  266. INFO = -9
  267. ELSE IF( LDT.LT.MAX( 1, MB ) ) THEN
  268. INFO = -11
  269. ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
  270. INFO = -13
  271. ELSE IF( LWORK.LT.LWMIN .AND. (.NOT.LQUERY) ) THEN
  272. INFO = -15
  273. END IF
  274. *
  275. IF( INFO.EQ.0 ) THEN
  276. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  277. END IF
  278. IF( INFO.NE.0 ) THEN
  279. CALL XERBLA( 'SLAMSWLQ', -INFO )
  280. RETURN
  281. ELSE IF( LQUERY ) THEN
  282. RETURN
  283. END IF
  284. *
  285. * Quick return if possible
  286. *
  287. IF( MINMNK.EQ.0 ) THEN
  288. RETURN
  289. END IF
  290. *
  291. IF((NB.LE.K).OR.(NB.GE.MAX(M,N,K))) THEN
  292. CALL SGEMLQT( SIDE, TRANS, M, N, K, MB, A, LDA,
  293. $ T, LDT, C, LDC, WORK, INFO)
  294. RETURN
  295. END IF
  296. *
  297. IF(LEFT.AND.TRAN) THEN
  298. *
  299. * Multiply Q to the last block of C
  300. *
  301. KK = MOD((M-K),(NB-K))
  302. CTR = (M-K)/(NB-K)
  303. *
  304. IF (KK.GT.0) THEN
  305. II=M-KK+1
  306. CALL STPMLQT('L','T',KK , N, K, 0, MB, A(1,II), LDA,
  307. $ T(1,CTR*K+1), LDT, C(1,1), LDC,
  308. $ C(II,1), LDC, WORK, INFO )
  309. ELSE
  310. II=M+1
  311. END IF
  312. *
  313. DO I=II-(NB-K),NB+1,-(NB-K)
  314. *
  315. * Multiply Q to the current block of C (1:M,I:I+NB)
  316. *
  317. CTR = CTR - 1
  318. CALL STPMLQT('L','T',NB-K , N, K, 0,MB, A(1,I), LDA,
  319. $ T(1,CTR*K+1),LDT, C(1,1), LDC,
  320. $ C(I,1), LDC, WORK, INFO )
  321. END DO
  322. *
  323. * Multiply Q to the first block of C (1:M,1:NB)
  324. *
  325. CALL SGEMLQT('L','T',NB , N, K, MB, A(1,1), LDA, T
  326. $ ,LDT ,C(1,1), LDC, WORK, INFO )
  327. *
  328. ELSE IF (LEFT.AND.NOTRAN) THEN
  329. *
  330. * Multiply Q to the first block of C
  331. *
  332. KK = MOD((M-K),(NB-K))
  333. II=M-KK+1
  334. CTR = 1
  335. CALL SGEMLQT('L','N',NB , N, K, MB, A(1,1), LDA, T
  336. $ ,LDT ,C(1,1), LDC, WORK, INFO )
  337. *
  338. DO I=NB+1,II-NB+K,(NB-K)
  339. *
  340. * Multiply Q to the current block of C (I:I+NB,1:N)
  341. *
  342. CALL STPMLQT('L','N',NB-K , N, K, 0,MB, A(1,I), LDA,
  343. $ T(1,CTR * K+1), LDT, C(1,1), LDC,
  344. $ C(I,1), LDC, WORK, INFO )
  345. CTR = CTR + 1
  346. *
  347. END DO
  348. IF(II.LE.M) THEN
  349. *
  350. * Multiply Q to the last block of C
  351. *
  352. CALL STPMLQT('L','N',KK , N, K, 0, MB, A(1,II), LDA,
  353. $ T(1,CTR*K+1), LDT, C(1,1), LDC,
  354. $ C(II,1), LDC, WORK, INFO )
  355. *
  356. END IF
  357. *
  358. ELSE IF(RIGHT.AND.NOTRAN) THEN
  359. *
  360. * Multiply Q to the last block of C
  361. *
  362. KK = MOD((N-K),(NB-K))
  363. CTR = (N-K)/(NB-K)
  364. IF (KK.GT.0) THEN
  365. II=N-KK+1
  366. CALL STPMLQT('R','N',M , KK, K, 0, MB, A(1, II), LDA,
  367. $ T(1,CTR*K+1), LDT, C(1,1), LDC,
  368. $ C(1,II), LDC, WORK, INFO )
  369. ELSE
  370. II=N+1
  371. END IF
  372. *
  373. DO I=II-(NB-K),NB+1,-(NB-K)
  374. *
  375. * Multiply Q to the current block of C (1:M,I:I+MB)
  376. *
  377. CTR = CTR - 1
  378. CALL STPMLQT('R','N', M, NB-K, K, 0, MB, A(1, I), LDA,
  379. $ T(1,CTR*K+1), LDT, C(1,1), LDC,
  380. $ C(1,I), LDC, WORK, INFO )
  381. END DO
  382. *
  383. * Multiply Q to the first block of C (1:M,1:MB)
  384. *
  385. CALL SGEMLQT('R','N',M , NB, K, MB, A(1,1), LDA, T
  386. $ ,LDT ,C(1,1), LDC, WORK, INFO )
  387. *
  388. ELSE IF (RIGHT.AND.TRAN) THEN
  389. *
  390. * Multiply Q to the first block of C
  391. *
  392. KK = MOD((N-K),(NB-K))
  393. II=N-KK+1
  394. CTR = 1
  395. CALL SGEMLQT('R','T',M , NB, K, MB, A(1,1), LDA, T
  396. $ ,LDT ,C(1,1), LDC, WORK, INFO )
  397. *
  398. DO I=NB+1,II-NB+K,(NB-K)
  399. *
  400. * Multiply Q to the current block of C (1:M,I:I+MB)
  401. *
  402. CALL STPMLQT('R','T',M , NB-K, K, 0,MB, A(1,I), LDA,
  403. $ T(1, CTR*K+1), LDT, C(1,1), LDC,
  404. $ C(1,I), LDC, WORK, INFO )
  405. CTR = CTR + 1
  406. *
  407. END DO
  408. IF(II.LE.N) THEN
  409. *
  410. * Multiply Q to the last block of C
  411. *
  412. CALL STPMLQT('R','T',M , KK, K, 0,MB, A(1,II), LDA,
  413. $ T(1,CTR*K+1),LDT, C(1,1), LDC,
  414. $ C(1,II), LDC, WORK, INFO )
  415. *
  416. END IF
  417. *
  418. END IF
  419. *
  420. WORK( 1 ) = SROUNDUP_LWORK( LWMIN )
  421. RETURN
  422. *
  423. * End of SLAMSWLQ
  424. *
  425. END