You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

sgesdd.f 60 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548
  1. *> \brief \b SGESDD
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download SGESDD + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/sgesdd.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/sgesdd.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/sgesdd.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
  22. * WORK, LWORK, IWORK, INFO )
  23. *
  24. * .. Scalar Arguments ..
  25. * CHARACTER JOBZ
  26. * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
  27. * ..
  28. * .. Array Arguments ..
  29. * INTEGER IWORK( * )
  30. * REAL A( LDA, * ), S( * ), U( LDU, * ),
  31. * $ VT( LDVT, * ), WORK( * )
  32. * ..
  33. *
  34. *
  35. *> \par Purpose:
  36. * =============
  37. *>
  38. *> \verbatim
  39. *>
  40. *> SGESDD computes the singular value decomposition (SVD) of a real
  41. *> M-by-N matrix A, optionally computing the left and right singular
  42. *> vectors. If singular vectors are desired, it uses a
  43. *> divide-and-conquer algorithm.
  44. *>
  45. *> The SVD is written
  46. *>
  47. *> A = U * SIGMA * transpose(V)
  48. *>
  49. *> where SIGMA is an M-by-N matrix which is zero except for its
  50. *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
  51. *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
  52. *> are the singular values of A; they are real and non-negative, and
  53. *> are returned in descending order. The first min(m,n) columns of
  54. *> U and V are the left and right singular vectors of A.
  55. *>
  56. *> Note that the routine returns VT = V**T, not V.
  57. *>
  58. *> The divide and conquer algorithm makes very mild assumptions about
  59. *> floating point arithmetic. It will work on machines with a guard
  60. *> digit in add/subtract, or on those binary machines without guard
  61. *> digits which subtract like the Cray X-MP, Cray Y-MP, Cray C-90, or
  62. *> Cray-2. It could conceivably fail on hexadecimal or decimal machines
  63. *> without guard digits, but we know of none.
  64. *> \endverbatim
  65. *
  66. * Arguments:
  67. * ==========
  68. *
  69. *> \param[in] JOBZ
  70. *> \verbatim
  71. *> JOBZ is CHARACTER*1
  72. *> Specifies options for computing all or part of the matrix U:
  73. *> = 'A': all M columns of U and all N rows of V**T are
  74. *> returned in the arrays U and VT;
  75. *> = 'S': the first min(M,N) columns of U and the first
  76. *> min(M,N) rows of V**T are returned in the arrays U
  77. *> and VT;
  78. *> = 'O': If M >= N, the first N columns of U are overwritten
  79. *> on the array A and all rows of V**T are returned in
  80. *> the array VT;
  81. *> otherwise, all columns of U are returned in the
  82. *> array U and the first M rows of V**T are overwritten
  83. *> in the array A;
  84. *> = 'N': no columns of U or rows of V**T are computed.
  85. *> \endverbatim
  86. *>
  87. *> \param[in] M
  88. *> \verbatim
  89. *> M is INTEGER
  90. *> The number of rows of the input matrix A. M >= 0.
  91. *> \endverbatim
  92. *>
  93. *> \param[in] N
  94. *> \verbatim
  95. *> N is INTEGER
  96. *> The number of columns of the input matrix A. N >= 0.
  97. *> \endverbatim
  98. *>
  99. *> \param[in,out] A
  100. *> \verbatim
  101. *> A is REAL array, dimension (LDA,N)
  102. *> On entry, the M-by-N matrix A.
  103. *> On exit,
  104. *> if JOBZ = 'O', A is overwritten with the first N columns
  105. *> of U (the left singular vectors, stored
  106. *> columnwise) if M >= N;
  107. *> A is overwritten with the first M rows
  108. *> of V**T (the right singular vectors, stored
  109. *> rowwise) otherwise.
  110. *> if JOBZ .ne. 'O', the contents of A are destroyed.
  111. *> \endverbatim
  112. *>
  113. *> \param[in] LDA
  114. *> \verbatim
  115. *> LDA is INTEGER
  116. *> The leading dimension of the array A. LDA >= max(1,M).
  117. *> \endverbatim
  118. *>
  119. *> \param[out] S
  120. *> \verbatim
  121. *> S is REAL array, dimension (min(M,N))
  122. *> The singular values of A, sorted so that S(i) >= S(i+1).
  123. *> \endverbatim
  124. *>
  125. *> \param[out] U
  126. *> \verbatim
  127. *> U is REAL array, dimension (LDU,UCOL)
  128. *> UCOL = M if JOBZ = 'A' or JOBZ = 'O' and M < N;
  129. *> UCOL = min(M,N) if JOBZ = 'S'.
  130. *> If JOBZ = 'A' or JOBZ = 'O' and M < N, U contains the M-by-M
  131. *> orthogonal matrix U;
  132. *> if JOBZ = 'S', U contains the first min(M,N) columns of U
  133. *> (the left singular vectors, stored columnwise);
  134. *> if JOBZ = 'O' and M >= N, or JOBZ = 'N', U is not referenced.
  135. *> \endverbatim
  136. *>
  137. *> \param[in] LDU
  138. *> \verbatim
  139. *> LDU is INTEGER
  140. *> The leading dimension of the array U. LDU >= 1; if
  141. *> JOBZ = 'S' or 'A' or JOBZ = 'O' and M < N, LDU >= M.
  142. *> \endverbatim
  143. *>
  144. *> \param[out] VT
  145. *> \verbatim
  146. *> VT is REAL array, dimension (LDVT,N)
  147. *> If JOBZ = 'A' or JOBZ = 'O' and M >= N, VT contains the
  148. *> N-by-N orthogonal matrix V**T;
  149. *> if JOBZ = 'S', VT contains the first min(M,N) rows of
  150. *> V**T (the right singular vectors, stored rowwise);
  151. *> if JOBZ = 'O' and M < N, or JOBZ = 'N', VT is not referenced.
  152. *> \endverbatim
  153. *>
  154. *> \param[in] LDVT
  155. *> \verbatim
  156. *> LDVT is INTEGER
  157. *> The leading dimension of the array VT. LDVT >= 1;
  158. *> if JOBZ = 'A' or JOBZ = 'O' and M >= N, LDVT >= N;
  159. *> if JOBZ = 'S', LDVT >= min(M,N).
  160. *> \endverbatim
  161. *>
  162. *> \param[out] WORK
  163. *> \verbatim
  164. *> WORK is REAL array, dimension (MAX(1,LWORK))
  165. *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
  166. *> \endverbatim
  167. *>
  168. *> \param[in] LWORK
  169. *> \verbatim
  170. *> LWORK is INTEGER
  171. *> The dimension of the array WORK. LWORK >= 1.
  172. *> If LWORK = -1, a workspace query is assumed. The optimal
  173. *> size for the WORK array is calculated and stored in WORK(1),
  174. *> and no other work except argument checking is performed.
  175. *>
  176. *> Let mx = max(M,N) and mn = min(M,N).
  177. *> If JOBZ = 'N', LWORK >= 3*mn + max( mx, 7*mn ).
  178. *> If JOBZ = 'O', LWORK >= 3*mn + max( mx, 5*mn*mn + 4*mn ).
  179. *> If JOBZ = 'S', LWORK >= 4*mn*mn + 7*mn.
  180. *> If JOBZ = 'A', LWORK >= 4*mn*mn + 6*mn + mx.
  181. *> These are not tight minimums in all cases; see comments inside code.
  182. *> For good performance, LWORK should generally be larger;
  183. *> a query is recommended.
  184. *> \endverbatim
  185. *>
  186. *> \param[out] IWORK
  187. *> \verbatim
  188. *> IWORK is INTEGER array, dimension (8*min(M,N))
  189. *> \endverbatim
  190. *>
  191. *> \param[out] INFO
  192. *> \verbatim
  193. *> INFO is INTEGER
  194. *> = 0: successful exit.
  195. *> < 0: if INFO = -i, the i-th argument had an illegal value.
  196. *> > 0: SBDSDC did not converge, updating process failed.
  197. *> \endverbatim
  198. *
  199. * Authors:
  200. * ========
  201. *
  202. *> \author Univ. of Tennessee
  203. *> \author Univ. of California Berkeley
  204. *> \author Univ. of Colorado Denver
  205. *> \author NAG Ltd.
  206. *
  207. *> \date June 2016
  208. *
  209. *> \ingroup realGEsing
  210. *
  211. *> \par Contributors:
  212. * ==================
  213. *>
  214. *> Ming Gu and Huan Ren, Computer Science Division, University of
  215. *> California at Berkeley, USA
  216. *>
  217. * =====================================================================
  218. SUBROUTINE SGESDD( JOBZ, M, N, A, LDA, S, U, LDU, VT, LDVT,
  219. $ WORK, LWORK, IWORK, INFO )
  220. implicit none
  221. *
  222. * -- LAPACK driver routine (version 3.7.0) --
  223. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  224. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  225. * June 2016
  226. *
  227. * .. Scalar Arguments ..
  228. CHARACTER JOBZ
  229. INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
  230. * ..
  231. * .. Array Arguments ..
  232. INTEGER IWORK( * )
  233. REAL A( LDA, * ), S( * ), U( LDU, * ),
  234. $ VT( LDVT, * ), WORK( * )
  235. * ..
  236. *
  237. * =====================================================================
  238. *
  239. * .. Parameters ..
  240. REAL ZERO, ONE
  241. PARAMETER ( ZERO = 0.0E0, ONE = 1.0E0 )
  242. * ..
  243. * .. Local Scalars ..
  244. LOGICAL LQUERY, WNTQA, WNTQAS, WNTQN, WNTQO, WNTQS
  245. INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IL,
  246. $ IR, ISCL, ITAU, ITAUP, ITAUQ, IU, IVT, LDWKVT,
  247. $ LDWRKL, LDWRKR, LDWRKU, MAXWRK, MINMN, MINWRK,
  248. $ MNTHR, NWORK, WRKBL
  249. INTEGER LWORK_SGEBRD_MN, LWORK_SGEBRD_MM,
  250. $ LWORK_SGEBRD_NN, LWORK_SGELQF_MN,
  251. $ LWORK_SGEQRF_MN,
  252. $ LWORK_SORGBR_P_MM, LWORK_SORGBR_Q_NN,
  253. $ LWORK_SORGLQ_MN, LWORK_SORGLQ_NN,
  254. $ LWORK_SORGQR_MM, LWORK_SORGQR_MN,
  255. $ LWORK_SORMBR_PRT_MM, LWORK_SORMBR_QLN_MM,
  256. $ LWORK_SORMBR_PRT_MN, LWORK_SORMBR_QLN_MN,
  257. $ LWORK_SORMBR_PRT_NN, LWORK_SORMBR_QLN_NN
  258. REAL ANRM, BIGNUM, EPS, SMLNUM
  259. * ..
  260. * .. Local Arrays ..
  261. INTEGER IDUM( 1 )
  262. REAL DUM( 1 )
  263. * ..
  264. * .. External Subroutines ..
  265. EXTERNAL SBDSDC, SGEBRD, SGELQF, SGEMM, SGEQRF, SLACPY,
  266. $ SLASCL, SLASET, SORGBR, SORGLQ, SORGQR, SORMBR,
  267. $ XERBLA
  268. * ..
  269. * .. External Functions ..
  270. LOGICAL LSAME
  271. REAL SLAMCH, SLANGE
  272. EXTERNAL SLAMCH, SLANGE, LSAME
  273. * ..
  274. * .. Intrinsic Functions ..
  275. INTRINSIC INT, MAX, MIN, SQRT
  276. * ..
  277. * .. Executable Statements ..
  278. *
  279. * Test the input arguments
  280. *
  281. INFO = 0
  282. MINMN = MIN( M, N )
  283. WNTQA = LSAME( JOBZ, 'A' )
  284. WNTQS = LSAME( JOBZ, 'S' )
  285. WNTQAS = WNTQA .OR. WNTQS
  286. WNTQO = LSAME( JOBZ, 'O' )
  287. WNTQN = LSAME( JOBZ, 'N' )
  288. LQUERY = ( LWORK.EQ.-1 )
  289. *
  290. IF( .NOT.( WNTQA .OR. WNTQS .OR. WNTQO .OR. WNTQN ) ) THEN
  291. INFO = -1
  292. ELSE IF( M.LT.0 ) THEN
  293. INFO = -2
  294. ELSE IF( N.LT.0 ) THEN
  295. INFO = -3
  296. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  297. INFO = -5
  298. ELSE IF( LDU.LT.1 .OR. ( WNTQAS .AND. LDU.LT.M ) .OR.
  299. $ ( WNTQO .AND. M.LT.N .AND. LDU.LT.M ) ) THEN
  300. INFO = -8
  301. ELSE IF( LDVT.LT.1 .OR. ( WNTQA .AND. LDVT.LT.N ) .OR.
  302. $ ( WNTQS .AND. LDVT.LT.MINMN ) .OR.
  303. $ ( WNTQO .AND. M.GE.N .AND. LDVT.LT.N ) ) THEN
  304. INFO = -10
  305. END IF
  306. *
  307. * Compute workspace
  308. * Note: Comments in the code beginning "Workspace:" describe the
  309. * minimal amount of workspace allocated at that point in the code,
  310. * as well as the preferred amount for good performance.
  311. * NB refers to the optimal block size for the immediately
  312. * following subroutine, as returned by ILAENV.
  313. *
  314. IF( INFO.EQ.0 ) THEN
  315. MINWRK = 1
  316. MAXWRK = 1
  317. BDSPAC = 0
  318. MNTHR = INT( MINMN*11.0E0 / 6.0E0 )
  319. IF( M.GE.N .AND. MINMN.GT.0 ) THEN
  320. *
  321. * Compute space needed for SBDSDC
  322. *
  323. IF( WNTQN ) THEN
  324. * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
  325. * keep 7*N for backwards compatability.
  326. BDSPAC = 7*N
  327. ELSE
  328. BDSPAC = 3*N*N + 4*N
  329. END IF
  330. *
  331. * Compute space preferred for each routine
  332. CALL SGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
  333. $ DUM(1), DUM(1), -1, IERR )
  334. LWORK_SGEBRD_MN = INT( DUM(1) )
  335. *
  336. CALL SGEBRD( N, N, DUM(1), N, DUM(1), DUM(1), DUM(1),
  337. $ DUM(1), DUM(1), -1, IERR )
  338. LWORK_SGEBRD_NN = INT( DUM(1) )
  339. *
  340. CALL SGEQRF( M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  341. LWORK_SGEQRF_MN = INT( DUM(1) )
  342. *
  343. CALL SORGBR( 'Q', N, N, N, DUM(1), N, DUM(1), DUM(1), -1,
  344. $ IERR )
  345. LWORK_SORGBR_Q_NN = INT( DUM(1) )
  346. *
  347. CALL SORGQR( M, M, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  348. LWORK_SORGQR_MM = INT( DUM(1) )
  349. *
  350. CALL SORGQR( M, N, N, DUM(1), M, DUM(1), DUM(1), -1, IERR )
  351. LWORK_SORGQR_MN = INT( DUM(1) )
  352. *
  353. CALL SORMBR( 'P', 'R', 'T', N, N, N, DUM(1), N,
  354. $ DUM(1), DUM(1), N, DUM(1), -1, IERR )
  355. LWORK_SORMBR_PRT_NN = INT( DUM(1) )
  356. *
  357. CALL SORMBR( 'Q', 'L', 'N', N, N, N, DUM(1), N,
  358. $ DUM(1), DUM(1), N, DUM(1), -1, IERR )
  359. LWORK_SORMBR_QLN_NN = INT( DUM(1) )
  360. *
  361. CALL SORMBR( 'Q', 'L', 'N', M, N, N, DUM(1), M,
  362. $ DUM(1), DUM(1), M, DUM(1), -1, IERR )
  363. LWORK_SORMBR_QLN_MN = INT( DUM(1) )
  364. *
  365. CALL SORMBR( 'Q', 'L', 'N', M, M, N, DUM(1), M,
  366. $ DUM(1), DUM(1), M, DUM(1), -1, IERR )
  367. LWORK_SORMBR_QLN_MM = INT( DUM(1) )
  368. *
  369. IF( M.GE.MNTHR ) THEN
  370. IF( WNTQN ) THEN
  371. *
  372. * Path 1 (M >> N, JOBZ='N')
  373. *
  374. WRKBL = N + LWORK_SGEQRF_MN
  375. WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN )
  376. MAXWRK = MAX( WRKBL, BDSPAC + N )
  377. MINWRK = BDSPAC + N
  378. ELSE IF( WNTQO ) THEN
  379. *
  380. * Path 2 (M >> N, JOBZ='O')
  381. *
  382. WRKBL = N + LWORK_SGEQRF_MN
  383. WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MN )
  384. WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN )
  385. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN )
  386. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  387. WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  388. MAXWRK = WRKBL + 2*N*N
  389. MINWRK = BDSPAC + 2*N*N + 3*N
  390. ELSE IF( WNTQS ) THEN
  391. *
  392. * Path 3 (M >> N, JOBZ='S')
  393. *
  394. WRKBL = N + LWORK_SGEQRF_MN
  395. WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MN )
  396. WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN )
  397. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN )
  398. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  399. WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  400. MAXWRK = WRKBL + N*N
  401. MINWRK = BDSPAC + N*N + 3*N
  402. ELSE IF( WNTQA ) THEN
  403. *
  404. * Path 4 (M >> N, JOBZ='A')
  405. *
  406. WRKBL = N + LWORK_SGEQRF_MN
  407. WRKBL = MAX( WRKBL, N + LWORK_SORGQR_MM )
  408. WRKBL = MAX( WRKBL, 3*N + LWORK_SGEBRD_NN )
  409. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_NN )
  410. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  411. WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  412. MAXWRK = WRKBL + N*N
  413. MINWRK = N*N + MAX( 3*N + BDSPAC, N + M )
  414. END IF
  415. ELSE
  416. *
  417. * Path 5 (M >= N, but not much larger)
  418. *
  419. WRKBL = 3*N + LWORK_SGEBRD_MN
  420. IF( WNTQN ) THEN
  421. * Path 5n (M >= N, jobz='N')
  422. MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  423. MINWRK = 3*N + MAX( M, BDSPAC )
  424. ELSE IF( WNTQO ) THEN
  425. * Path 5o (M >= N, jobz='O')
  426. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  427. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MN )
  428. WRKBL = MAX( WRKBL, 3*N + BDSPAC )
  429. MAXWRK = WRKBL + M*N
  430. MINWRK = 3*N + MAX( M, N*N + BDSPAC )
  431. ELSE IF( WNTQS ) THEN
  432. * Path 5s (M >= N, jobz='S')
  433. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MN )
  434. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  435. MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  436. MINWRK = 3*N + MAX( M, BDSPAC )
  437. ELSE IF( WNTQA ) THEN
  438. * Path 5a (M >= N, jobz='A')
  439. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_QLN_MM )
  440. WRKBL = MAX( WRKBL, 3*N + LWORK_SORMBR_PRT_NN )
  441. MAXWRK = MAX( WRKBL, 3*N + BDSPAC )
  442. MINWRK = 3*N + MAX( M, BDSPAC )
  443. END IF
  444. END IF
  445. ELSE IF( MINMN.GT.0 ) THEN
  446. *
  447. * Compute space needed for SBDSDC
  448. *
  449. IF( WNTQN ) THEN
  450. * sbdsdc needs only 4*N (or 6*N for uplo=L for LAPACK <= 3.6)
  451. * keep 7*N for backwards compatability.
  452. BDSPAC = 7*M
  453. ELSE
  454. BDSPAC = 3*M*M + 4*M
  455. END IF
  456. *
  457. * Compute space preferred for each routine
  458. CALL SGEBRD( M, N, DUM(1), M, DUM(1), DUM(1), DUM(1),
  459. $ DUM(1), DUM(1), -1, IERR )
  460. LWORK_SGEBRD_MN = INT( DUM(1) )
  461. *
  462. CALL SGEBRD( M, M, A, M, S, DUM(1), DUM(1),
  463. $ DUM(1), DUM(1), -1, IERR )
  464. LWORK_SGEBRD_MM = INT( DUM(1) )
  465. *
  466. CALL SGELQF( M, N, A, M, DUM(1), DUM(1), -1, IERR )
  467. LWORK_SGELQF_MN = INT( DUM(1) )
  468. *
  469. CALL SORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
  470. LWORK_SORGLQ_NN = INT( DUM(1) )
  471. *
  472. CALL SORGLQ( M, N, M, A, M, DUM(1), DUM(1), -1, IERR )
  473. LWORK_SORGLQ_MN = INT( DUM(1) )
  474. *
  475. CALL SORGBR( 'P', M, M, M, A, N, DUM(1), DUM(1), -1, IERR )
  476. LWORK_SORGBR_P_MM = INT( DUM(1) )
  477. *
  478. CALL SORMBR( 'P', 'R', 'T', M, M, M, DUM(1), M,
  479. $ DUM(1), DUM(1), M, DUM(1), -1, IERR )
  480. LWORK_SORMBR_PRT_MM = INT( DUM(1) )
  481. *
  482. CALL SORMBR( 'P', 'R', 'T', M, N, M, DUM(1), M,
  483. $ DUM(1), DUM(1), M, DUM(1), -1, IERR )
  484. LWORK_SORMBR_PRT_MN = INT( DUM(1) )
  485. *
  486. CALL SORMBR( 'P', 'R', 'T', N, N, M, DUM(1), N,
  487. $ DUM(1), DUM(1), N, DUM(1), -1, IERR )
  488. LWORK_SORMBR_PRT_NN = INT( DUM(1) )
  489. *
  490. CALL SORMBR( 'Q', 'L', 'N', M, M, M, DUM(1), M,
  491. $ DUM(1), DUM(1), M, DUM(1), -1, IERR )
  492. LWORK_SORMBR_QLN_MM = INT( DUM(1) )
  493. *
  494. IF( N.GE.MNTHR ) THEN
  495. IF( WNTQN ) THEN
  496. *
  497. * Path 1t (N >> M, JOBZ='N')
  498. *
  499. WRKBL = M + LWORK_SGELQF_MN
  500. WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM )
  501. MAXWRK = MAX( WRKBL, BDSPAC + M )
  502. MINWRK = BDSPAC + M
  503. ELSE IF( WNTQO ) THEN
  504. *
  505. * Path 2t (N >> M, JOBZ='O')
  506. *
  507. WRKBL = M + LWORK_SGELQF_MN
  508. WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_MN )
  509. WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM )
  510. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  511. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM )
  512. WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  513. MAXWRK = WRKBL + 2*M*M
  514. MINWRK = BDSPAC + 2*M*M + 3*M
  515. ELSE IF( WNTQS ) THEN
  516. *
  517. * Path 3t (N >> M, JOBZ='S')
  518. *
  519. WRKBL = M + LWORK_SGELQF_MN
  520. WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_MN )
  521. WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM )
  522. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  523. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM )
  524. WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  525. MAXWRK = WRKBL + M*M
  526. MINWRK = BDSPAC + M*M + 3*M
  527. ELSE IF( WNTQA ) THEN
  528. *
  529. * Path 4t (N >> M, JOBZ='A')
  530. *
  531. WRKBL = M + LWORK_SGELQF_MN
  532. WRKBL = MAX( WRKBL, M + LWORK_SORGLQ_NN )
  533. WRKBL = MAX( WRKBL, 3*M + LWORK_SGEBRD_MM )
  534. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  535. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MM )
  536. WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  537. MAXWRK = WRKBL + M*M
  538. MINWRK = M*M + MAX( 3*M + BDSPAC, M + N )
  539. END IF
  540. ELSE
  541. *
  542. * Path 5t (N > M, but not much larger)
  543. *
  544. WRKBL = 3*M + LWORK_SGEBRD_MN
  545. IF( WNTQN ) THEN
  546. * Path 5tn (N > M, jobz='N')
  547. MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  548. MINWRK = 3*M + MAX( N, BDSPAC )
  549. ELSE IF( WNTQO ) THEN
  550. * Path 5to (N > M, jobz='O')
  551. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  552. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MN )
  553. WRKBL = MAX( WRKBL, 3*M + BDSPAC )
  554. MAXWRK = WRKBL + M*N
  555. MINWRK = 3*M + MAX( N, M*M + BDSPAC )
  556. ELSE IF( WNTQS ) THEN
  557. * Path 5ts (N > M, jobz='S')
  558. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  559. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_MN )
  560. MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  561. MINWRK = 3*M + MAX( N, BDSPAC )
  562. ELSE IF( WNTQA ) THEN
  563. * Path 5ta (N > M, jobz='A')
  564. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_QLN_MM )
  565. WRKBL = MAX( WRKBL, 3*M + LWORK_SORMBR_PRT_NN )
  566. MAXWRK = MAX( WRKBL, 3*M + BDSPAC )
  567. MINWRK = 3*M + MAX( N, BDSPAC )
  568. END IF
  569. END IF
  570. END IF
  571. MAXWRK = MAX( MAXWRK, MINWRK )
  572. WORK( 1 ) = MAXWRK
  573. *
  574. IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
  575. INFO = -12
  576. END IF
  577. END IF
  578. *
  579. IF( INFO.NE.0 ) THEN
  580. CALL XERBLA( 'SGESDD', -INFO )
  581. RETURN
  582. ELSE IF( LQUERY ) THEN
  583. RETURN
  584. END IF
  585. *
  586. * Quick return if possible
  587. *
  588. IF( M.EQ.0 .OR. N.EQ.0 ) THEN
  589. RETURN
  590. END IF
  591. *
  592. * Get machine constants
  593. *
  594. EPS = SLAMCH( 'P' )
  595. SMLNUM = SQRT( SLAMCH( 'S' ) ) / EPS
  596. BIGNUM = ONE / SMLNUM
  597. *
  598. * Scale A if max element outside range [SMLNUM,BIGNUM]
  599. *
  600. ANRM = SLANGE( 'M', M, N, A, LDA, DUM )
  601. ISCL = 0
  602. IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
  603. ISCL = 1
  604. CALL SLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
  605. ELSE IF( ANRM.GT.BIGNUM ) THEN
  606. ISCL = 1
  607. CALL SLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
  608. END IF
  609. *
  610. IF( M.GE.N ) THEN
  611. *
  612. * A has at least as many rows as columns. If A has sufficiently
  613. * more rows than columns, first reduce using the QR
  614. * decomposition (if sufficient workspace available)
  615. *
  616. IF( M.GE.MNTHR ) THEN
  617. *
  618. IF( WNTQN ) THEN
  619. *
  620. * Path 1 (M >> N, JOBZ='N')
  621. * No singular vectors to be computed
  622. *
  623. ITAU = 1
  624. NWORK = ITAU + N
  625. *
  626. * Compute A=Q*R
  627. * Workspace: need N [tau] + N [work]
  628. * Workspace: prefer N [tau] + N*NB [work]
  629. *
  630. CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  631. $ LWORK - NWORK + 1, IERR )
  632. *
  633. * Zero out below R
  634. *
  635. CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
  636. IE = 1
  637. ITAUQ = IE + N
  638. ITAUP = ITAUQ + N
  639. NWORK = ITAUP + N
  640. *
  641. * Bidiagonalize R in A
  642. * Workspace: need 3*N [e, tauq, taup] + N [work]
  643. * Workspace: prefer 3*N [e, tauq, taup] + 2*N*NB [work]
  644. *
  645. CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  646. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  647. $ IERR )
  648. NWORK = IE + N
  649. *
  650. * Perform bidiagonal SVD, computing singular values only
  651. * Workspace: need N [e] + BDSPAC
  652. *
  653. CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
  654. $ DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  655. *
  656. ELSE IF( WNTQO ) THEN
  657. *
  658. * Path 2 (M >> N, JOBZ = 'O')
  659. * N left singular vectors to be overwritten on A and
  660. * N right singular vectors to be computed in VT
  661. *
  662. IR = 1
  663. *
  664. * WORK(IR) is LDWRKR by N
  665. *
  666. IF( LWORK .GE. LDA*N + N*N + 3*N + BDSPAC ) THEN
  667. LDWRKR = LDA
  668. ELSE
  669. LDWRKR = ( LWORK - N*N - 3*N - BDSPAC ) / N
  670. END IF
  671. ITAU = IR + LDWRKR*N
  672. NWORK = ITAU + N
  673. *
  674. * Compute A=Q*R
  675. * Workspace: need N*N [R] + N [tau] + N [work]
  676. * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  677. *
  678. CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  679. $ LWORK - NWORK + 1, IERR )
  680. *
  681. * Copy R to WORK(IR), zeroing out below it
  682. *
  683. CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
  684. CALL SLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
  685. $ LDWRKR )
  686. *
  687. * Generate Q in A
  688. * Workspace: need N*N [R] + N [tau] + N [work]
  689. * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  690. *
  691. CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
  692. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  693. IE = ITAU
  694. ITAUQ = IE + N
  695. ITAUP = ITAUQ + N
  696. NWORK = ITAUP + N
  697. *
  698. * Bidiagonalize R in WORK(IR)
  699. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
  700. * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
  701. *
  702. CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
  703. $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  704. $ LWORK - NWORK + 1, IERR )
  705. *
  706. * WORK(IU) is N by N
  707. *
  708. IU = NWORK
  709. NWORK = IU + N*N
  710. *
  711. * Perform bidiagonal SVD, computing left singular vectors
  712. * of bidiagonal matrix in WORK(IU) and computing right
  713. * singular vectors of bidiagonal matrix in VT
  714. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + BDSPAC
  715. *
  716. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
  717. $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  718. $ INFO )
  719. *
  720. * Overwrite WORK(IU) by left singular vectors of R
  721. * and VT by right singular vectors of R
  722. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N [work]
  723. * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  724. *
  725. CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
  726. $ WORK( ITAUQ ), WORK( IU ), N, WORK( NWORK ),
  727. $ LWORK - NWORK + 1, IERR )
  728. CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
  729. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  730. $ LWORK - NWORK + 1, IERR )
  731. *
  732. * Multiply Q in A by left singular vectors of R in
  733. * WORK(IU), storing result in WORK(IR) and copying to A
  734. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N*N [U]
  735. * Workspace: prefer M*N [R] + 3*N [e, tauq, taup] + N*N [U]
  736. *
  737. DO 10 I = 1, M, LDWRKR
  738. CHUNK = MIN( M - I + 1, LDWRKR )
  739. CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
  740. $ LDA, WORK( IU ), N, ZERO, WORK( IR ),
  741. $ LDWRKR )
  742. CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
  743. $ A( I, 1 ), LDA )
  744. 10 CONTINUE
  745. *
  746. ELSE IF( WNTQS ) THEN
  747. *
  748. * Path 3 (M >> N, JOBZ='S')
  749. * N left singular vectors to be computed in U and
  750. * N right singular vectors to be computed in VT
  751. *
  752. IR = 1
  753. *
  754. * WORK(IR) is N by N
  755. *
  756. LDWRKR = N
  757. ITAU = IR + LDWRKR*N
  758. NWORK = ITAU + N
  759. *
  760. * Compute A=Q*R
  761. * Workspace: need N*N [R] + N [tau] + N [work]
  762. * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  763. *
  764. CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  765. $ LWORK - NWORK + 1, IERR )
  766. *
  767. * Copy R to WORK(IR), zeroing out below it
  768. *
  769. CALL SLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
  770. CALL SLASET( 'L', N - 1, N - 1, ZERO, ZERO, WORK(IR+1),
  771. $ LDWRKR )
  772. *
  773. * Generate Q in A
  774. * Workspace: need N*N [R] + N [tau] + N [work]
  775. * Workspace: prefer N*N [R] + N [tau] + N*NB [work]
  776. *
  777. CALL SORGQR( M, N, N, A, LDA, WORK( ITAU ),
  778. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  779. IE = ITAU
  780. ITAUQ = IE + N
  781. ITAUP = ITAUQ + N
  782. NWORK = ITAUP + N
  783. *
  784. * Bidiagonalize R in WORK(IR)
  785. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
  786. * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + 2*N*NB [work]
  787. *
  788. CALL SGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
  789. $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  790. $ LWORK - NWORK + 1, IERR )
  791. *
  792. * Perform bidiagonal SVD, computing left singular vectors
  793. * of bidiagoal matrix in U and computing right singular
  794. * vectors of bidiagonal matrix in VT
  795. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + BDSPAC
  796. *
  797. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
  798. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  799. $ INFO )
  800. *
  801. * Overwrite U by left singular vectors of R and VT
  802. * by right singular vectors of R
  803. * Workspace: need N*N [R] + 3*N [e, tauq, taup] + N [work]
  804. * Workspace: prefer N*N [R] + 3*N [e, tauq, taup] + N*NB [work]
  805. *
  806. CALL SORMBR( 'Q', 'L', 'N', N, N, N, WORK( IR ), LDWRKR,
  807. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  808. $ LWORK - NWORK + 1, IERR )
  809. *
  810. CALL SORMBR( 'P', 'R', 'T', N, N, N, WORK( IR ), LDWRKR,
  811. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  812. $ LWORK - NWORK + 1, IERR )
  813. *
  814. * Multiply Q in A by left singular vectors of R in
  815. * WORK(IR), storing result in U
  816. * Workspace: need N*N [R]
  817. *
  818. CALL SLACPY( 'F', N, N, U, LDU, WORK( IR ), LDWRKR )
  819. CALL SGEMM( 'N', 'N', M, N, N, ONE, A, LDA, WORK( IR ),
  820. $ LDWRKR, ZERO, U, LDU )
  821. *
  822. ELSE IF( WNTQA ) THEN
  823. *
  824. * Path 4 (M >> N, JOBZ='A')
  825. * M left singular vectors to be computed in U and
  826. * N right singular vectors to be computed in VT
  827. *
  828. IU = 1
  829. *
  830. * WORK(IU) is N by N
  831. *
  832. LDWRKU = N
  833. ITAU = IU + LDWRKU*N
  834. NWORK = ITAU + N
  835. *
  836. * Compute A=Q*R, copying result to U
  837. * Workspace: need N*N [U] + N [tau] + N [work]
  838. * Workspace: prefer N*N [U] + N [tau] + N*NB [work]
  839. *
  840. CALL SGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  841. $ LWORK - NWORK + 1, IERR )
  842. CALL SLACPY( 'L', M, N, A, LDA, U, LDU )
  843. *
  844. * Generate Q in U
  845. * Workspace: need N*N [U] + N [tau] + M [work]
  846. * Workspace: prefer N*N [U] + N [tau] + M*NB [work]
  847. CALL SORGQR( M, M, N, U, LDU, WORK( ITAU ),
  848. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  849. *
  850. * Produce R in A, zeroing out other entries
  851. *
  852. CALL SLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ), LDA )
  853. IE = ITAU
  854. ITAUQ = IE + N
  855. ITAUP = ITAUQ + N
  856. NWORK = ITAUP + N
  857. *
  858. * Bidiagonalize R in A
  859. * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
  860. * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + 2*N*NB [work]
  861. *
  862. CALL SGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  863. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  864. $ IERR )
  865. *
  866. * Perform bidiagonal SVD, computing left singular vectors
  867. * of bidiagonal matrix in WORK(IU) and computing right
  868. * singular vectors of bidiagonal matrix in VT
  869. * Workspace: need N*N [U] + 3*N [e, tauq, taup] + BDSPAC
  870. *
  871. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ), N,
  872. $ VT, LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  873. $ INFO )
  874. *
  875. * Overwrite WORK(IU) by left singular vectors of R and VT
  876. * by right singular vectors of R
  877. * Workspace: need N*N [U] + 3*N [e, tauq, taup] + N [work]
  878. * Workspace: prefer N*N [U] + 3*N [e, tauq, taup] + N*NB [work]
  879. *
  880. CALL SORMBR( 'Q', 'L', 'N', N, N, N, A, LDA,
  881. $ WORK( ITAUQ ), WORK( IU ), LDWRKU,
  882. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  883. CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
  884. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  885. $ LWORK - NWORK + 1, IERR )
  886. *
  887. * Multiply Q in U by left singular vectors of R in
  888. * WORK(IU), storing result in A
  889. * Workspace: need N*N [U]
  890. *
  891. CALL SGEMM( 'N', 'N', M, N, N, ONE, U, LDU, WORK( IU ),
  892. $ LDWRKU, ZERO, A, LDA )
  893. *
  894. * Copy left singular vectors of A from A to U
  895. *
  896. CALL SLACPY( 'F', M, N, A, LDA, U, LDU )
  897. *
  898. END IF
  899. *
  900. ELSE
  901. *
  902. * M .LT. MNTHR
  903. *
  904. * Path 5 (M >= N, but not much larger)
  905. * Reduce to bidiagonal form without QR decomposition
  906. *
  907. IE = 1
  908. ITAUQ = IE + N
  909. ITAUP = ITAUQ + N
  910. NWORK = ITAUP + N
  911. *
  912. * Bidiagonalize A
  913. * Workspace: need 3*N [e, tauq, taup] + M [work]
  914. * Workspace: prefer 3*N [e, tauq, taup] + (M+N)*NB [work]
  915. *
  916. CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  917. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  918. $ IERR )
  919. IF( WNTQN ) THEN
  920. *
  921. * Path 5n (M >= N, JOBZ='N')
  922. * Perform bidiagonal SVD, only computing singular values
  923. * Workspace: need 3*N [e, tauq, taup] + BDSPAC
  924. *
  925. CALL SBDSDC( 'U', 'N', N, S, WORK( IE ), DUM, 1, DUM, 1,
  926. $ DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  927. ELSE IF( WNTQO ) THEN
  928. * Path 5o (M >= N, JOBZ='O')
  929. IU = NWORK
  930. IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
  931. *
  932. * WORK( IU ) is M by N
  933. *
  934. LDWRKU = M
  935. NWORK = IU + LDWRKU*N
  936. CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IU ),
  937. $ LDWRKU )
  938. * IR is unused; silence compile warnings
  939. IR = -1
  940. ELSE
  941. *
  942. * WORK( IU ) is N by N
  943. *
  944. LDWRKU = N
  945. NWORK = IU + LDWRKU*N
  946. *
  947. * WORK(IR) is LDWRKR by N
  948. *
  949. IR = NWORK
  950. LDWRKR = ( LWORK - N*N - 3*N ) / N
  951. END IF
  952. NWORK = IU + LDWRKU*N
  953. *
  954. * Perform bidiagonal SVD, computing left singular vectors
  955. * of bidiagonal matrix in WORK(IU) and computing right
  956. * singular vectors of bidiagonal matrix in VT
  957. * Workspace: need 3*N [e, tauq, taup] + N*N [U] + BDSPAC
  958. *
  959. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), WORK( IU ),
  960. $ LDWRKU, VT, LDVT, DUM, IDUM, WORK( NWORK ),
  961. $ IWORK, INFO )
  962. *
  963. * Overwrite VT by right singular vectors of A
  964. * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
  965. * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  966. *
  967. CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
  968. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  969. $ LWORK - NWORK + 1, IERR )
  970. *
  971. IF( LWORK .GE. M*N + 3*N + BDSPAC ) THEN
  972. *
  973. * Path 5o-fast
  974. * Overwrite WORK(IU) by left singular vectors of A
  975. * Workspace: need 3*N [e, tauq, taup] + M*N [U] + N [work]
  976. * Workspace: prefer 3*N [e, tauq, taup] + M*N [U] + N*NB [work]
  977. *
  978. CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
  979. $ WORK( ITAUQ ), WORK( IU ), LDWRKU,
  980. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  981. *
  982. * Copy left singular vectors of A from WORK(IU) to A
  983. *
  984. CALL SLACPY( 'F', M, N, WORK( IU ), LDWRKU, A, LDA )
  985. ELSE
  986. *
  987. * Path 5o-slow
  988. * Generate Q in A
  989. * Workspace: need 3*N [e, tauq, taup] + N*N [U] + N [work]
  990. * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + N*NB [work]
  991. *
  992. CALL SORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
  993. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  994. *
  995. * Multiply Q in A by left singular vectors of
  996. * bidiagonal matrix in WORK(IU), storing result in
  997. * WORK(IR) and copying to A
  998. * Workspace: need 3*N [e, tauq, taup] + N*N [U] + NB*N [R]
  999. * Workspace: prefer 3*N [e, tauq, taup] + N*N [U] + M*N [R]
  1000. *
  1001. DO 20 I = 1, M, LDWRKR
  1002. CHUNK = MIN( M - I + 1, LDWRKR )
  1003. CALL SGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
  1004. $ LDA, WORK( IU ), LDWRKU, ZERO,
  1005. $ WORK( IR ), LDWRKR )
  1006. CALL SLACPY( 'F', CHUNK, N, WORK( IR ), LDWRKR,
  1007. $ A( I, 1 ), LDA )
  1008. 20 CONTINUE
  1009. END IF
  1010. *
  1011. ELSE IF( WNTQS ) THEN
  1012. *
  1013. * Path 5s (M >= N, JOBZ='S')
  1014. * Perform bidiagonal SVD, computing left singular vectors
  1015. * of bidiagonal matrix in U and computing right singular
  1016. * vectors of bidiagonal matrix in VT
  1017. * Workspace: need 3*N [e, tauq, taup] + BDSPAC
  1018. *
  1019. CALL SLASET( 'F', M, N, ZERO, ZERO, U, LDU )
  1020. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
  1021. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  1022. $ INFO )
  1023. *
  1024. * Overwrite U by left singular vectors of A and VT
  1025. * by right singular vectors of A
  1026. * Workspace: need 3*N [e, tauq, taup] + N [work]
  1027. * Workspace: prefer 3*N [e, tauq, taup] + N*NB [work]
  1028. *
  1029. CALL SORMBR( 'Q', 'L', 'N', M, N, N, A, LDA,
  1030. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1031. $ LWORK - NWORK + 1, IERR )
  1032. CALL SORMBR( 'P', 'R', 'T', N, N, N, A, LDA,
  1033. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  1034. $ LWORK - NWORK + 1, IERR )
  1035. ELSE IF( WNTQA ) THEN
  1036. *
  1037. * Path 5a (M >= N, JOBZ='A')
  1038. * Perform bidiagonal SVD, computing left singular vectors
  1039. * of bidiagonal matrix in U and computing right singular
  1040. * vectors of bidiagonal matrix in VT
  1041. * Workspace: need 3*N [e, tauq, taup] + BDSPAC
  1042. *
  1043. CALL SLASET( 'F', M, M, ZERO, ZERO, U, LDU )
  1044. CALL SBDSDC( 'U', 'I', N, S, WORK( IE ), U, LDU, VT,
  1045. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  1046. $ INFO )
  1047. *
  1048. * Set the right corner of U to identity matrix
  1049. *
  1050. IF( M.GT.N ) THEN
  1051. CALL SLASET( 'F', M - N, M - N, ZERO, ONE, U(N+1,N+1),
  1052. $ LDU )
  1053. END IF
  1054. *
  1055. * Overwrite U by left singular vectors of A and VT
  1056. * by right singular vectors of A
  1057. * Workspace: need 3*N [e, tauq, taup] + M [work]
  1058. * Workspace: prefer 3*N [e, tauq, taup] + M*NB [work]
  1059. *
  1060. CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
  1061. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1062. $ LWORK - NWORK + 1, IERR )
  1063. CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
  1064. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  1065. $ LWORK - NWORK + 1, IERR )
  1066. END IF
  1067. *
  1068. END IF
  1069. *
  1070. ELSE
  1071. *
  1072. * A has more columns than rows. If A has sufficiently more
  1073. * columns than rows, first reduce using the LQ decomposition (if
  1074. * sufficient workspace available)
  1075. *
  1076. IF( N.GE.MNTHR ) THEN
  1077. *
  1078. IF( WNTQN ) THEN
  1079. *
  1080. * Path 1t (N >> M, JOBZ='N')
  1081. * No singular vectors to be computed
  1082. *
  1083. ITAU = 1
  1084. NWORK = ITAU + M
  1085. *
  1086. * Compute A=L*Q
  1087. * Workspace: need M [tau] + M [work]
  1088. * Workspace: prefer M [tau] + M*NB [work]
  1089. *
  1090. CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  1091. $ LWORK - NWORK + 1, IERR )
  1092. *
  1093. * Zero out above L
  1094. *
  1095. CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
  1096. IE = 1
  1097. ITAUQ = IE + M
  1098. ITAUP = ITAUQ + M
  1099. NWORK = ITAUP + M
  1100. *
  1101. * Bidiagonalize L in A
  1102. * Workspace: need 3*M [e, tauq, taup] + M [work]
  1103. * Workspace: prefer 3*M [e, tauq, taup] + 2*M*NB [work]
  1104. *
  1105. CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  1106. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  1107. $ IERR )
  1108. NWORK = IE + M
  1109. *
  1110. * Perform bidiagonal SVD, computing singular values only
  1111. * Workspace: need M [e] + BDSPAC
  1112. *
  1113. CALL SBDSDC( 'U', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
  1114. $ DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  1115. *
  1116. ELSE IF( WNTQO ) THEN
  1117. *
  1118. * Path 2t (N >> M, JOBZ='O')
  1119. * M right singular vectors to be overwritten on A and
  1120. * M left singular vectors to be computed in U
  1121. *
  1122. IVT = 1
  1123. *
  1124. * WORK(IVT) is M by M
  1125. * WORK(IL) is M by M; it is later resized to M by chunk for gemm
  1126. *
  1127. IL = IVT + M*M
  1128. IF( LWORK .GE. M*N + M*M + 3*M + BDSPAC ) THEN
  1129. LDWRKL = M
  1130. CHUNK = N
  1131. ELSE
  1132. LDWRKL = M
  1133. CHUNK = ( LWORK - M*M ) / M
  1134. END IF
  1135. ITAU = IL + LDWRKL*M
  1136. NWORK = ITAU + M
  1137. *
  1138. * Compute A=L*Q
  1139. * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
  1140. * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
  1141. *
  1142. CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  1143. $ LWORK - NWORK + 1, IERR )
  1144. *
  1145. * Copy L to WORK(IL), zeroing about above it
  1146. *
  1147. CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
  1148. CALL SLASET( 'U', M - 1, M - 1, ZERO, ZERO,
  1149. $ WORK( IL + LDWRKL ), LDWRKL )
  1150. *
  1151. * Generate Q in A
  1152. * Workspace: need M*M [VT] + M*M [L] + M [tau] + M [work]
  1153. * Workspace: prefer M*M [VT] + M*M [L] + M [tau] + M*NB [work]
  1154. *
  1155. CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  1156. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1157. IE = ITAU
  1158. ITAUQ = IE + M
  1159. ITAUP = ITAUQ + M
  1160. NWORK = ITAUP + M
  1161. *
  1162. * Bidiagonalize L in WORK(IL)
  1163. * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
  1164. * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
  1165. *
  1166. CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
  1167. $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  1168. $ LWORK - NWORK + 1, IERR )
  1169. *
  1170. * Perform bidiagonal SVD, computing left singular vectors
  1171. * of bidiagonal matrix in U, and computing right singular
  1172. * vectors of bidiagonal matrix in WORK(IVT)
  1173. * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + BDSPAC
  1174. *
  1175. CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
  1176. $ WORK( IVT ), M, DUM, IDUM, WORK( NWORK ),
  1177. $ IWORK, INFO )
  1178. *
  1179. * Overwrite U by left singular vectors of L and WORK(IVT)
  1180. * by right singular vectors of L
  1181. * Workspace: need M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M [work]
  1182. * Workspace: prefer M*M [VT] + M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
  1183. *
  1184. CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
  1185. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1186. $ LWORK - NWORK + 1, IERR )
  1187. CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
  1188. $ WORK( ITAUP ), WORK( IVT ), M,
  1189. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1190. *
  1191. * Multiply right singular vectors of L in WORK(IVT) by Q
  1192. * in A, storing result in WORK(IL) and copying to A
  1193. * Workspace: need M*M [VT] + M*M [L]
  1194. * Workspace: prefer M*M [VT] + M*N [L]
  1195. * At this point, L is resized as M by chunk.
  1196. *
  1197. DO 30 I = 1, N, CHUNK
  1198. BLK = MIN( N - I + 1, CHUNK )
  1199. CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ), M,
  1200. $ A( 1, I ), LDA, ZERO, WORK( IL ), LDWRKL )
  1201. CALL SLACPY( 'F', M, BLK, WORK( IL ), LDWRKL,
  1202. $ A( 1, I ), LDA )
  1203. 30 CONTINUE
  1204. *
  1205. ELSE IF( WNTQS ) THEN
  1206. *
  1207. * Path 3t (N >> M, JOBZ='S')
  1208. * M right singular vectors to be computed in VT and
  1209. * M left singular vectors to be computed in U
  1210. *
  1211. IL = 1
  1212. *
  1213. * WORK(IL) is M by M
  1214. *
  1215. LDWRKL = M
  1216. ITAU = IL + LDWRKL*M
  1217. NWORK = ITAU + M
  1218. *
  1219. * Compute A=L*Q
  1220. * Workspace: need M*M [L] + M [tau] + M [work]
  1221. * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
  1222. *
  1223. CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  1224. $ LWORK - NWORK + 1, IERR )
  1225. *
  1226. * Copy L to WORK(IL), zeroing out above it
  1227. *
  1228. CALL SLACPY( 'L', M, M, A, LDA, WORK( IL ), LDWRKL )
  1229. CALL SLASET( 'U', M - 1, M - 1, ZERO, ZERO,
  1230. $ WORK( IL + LDWRKL ), LDWRKL )
  1231. *
  1232. * Generate Q in A
  1233. * Workspace: need M*M [L] + M [tau] + M [work]
  1234. * Workspace: prefer M*M [L] + M [tau] + M*NB [work]
  1235. *
  1236. CALL SORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  1237. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1238. IE = ITAU
  1239. ITAUQ = IE + M
  1240. ITAUP = ITAUQ + M
  1241. NWORK = ITAUP + M
  1242. *
  1243. * Bidiagonalize L in WORK(IU).
  1244. * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
  1245. * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + 2*M*NB [work]
  1246. *
  1247. CALL SGEBRD( M, M, WORK( IL ), LDWRKL, S, WORK( IE ),
  1248. $ WORK( ITAUQ ), WORK( ITAUP ), WORK( NWORK ),
  1249. $ LWORK - NWORK + 1, IERR )
  1250. *
  1251. * Perform bidiagonal SVD, computing left singular vectors
  1252. * of bidiagonal matrix in U and computing right singular
  1253. * vectors of bidiagonal matrix in VT
  1254. * Workspace: need M*M [L] + 3*M [e, tauq, taup] + BDSPAC
  1255. *
  1256. CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU, VT,
  1257. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  1258. $ INFO )
  1259. *
  1260. * Overwrite U by left singular vectors of L and VT
  1261. * by right singular vectors of L
  1262. * Workspace: need M*M [L] + 3*M [e, tauq, taup] + M [work]
  1263. * Workspace: prefer M*M [L] + 3*M [e, tauq, taup] + M*NB [work]
  1264. *
  1265. CALL SORMBR( 'Q', 'L', 'N', M, M, M, WORK( IL ), LDWRKL,
  1266. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1267. $ LWORK - NWORK + 1, IERR )
  1268. CALL SORMBR( 'P', 'R', 'T', M, M, M, WORK( IL ), LDWRKL,
  1269. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  1270. $ LWORK - NWORK + 1, IERR )
  1271. *
  1272. * Multiply right singular vectors of L in WORK(IL) by
  1273. * Q in A, storing result in VT
  1274. * Workspace: need M*M [L]
  1275. *
  1276. CALL SLACPY( 'F', M, M, VT, LDVT, WORK( IL ), LDWRKL )
  1277. CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IL ), LDWRKL,
  1278. $ A, LDA, ZERO, VT, LDVT )
  1279. *
  1280. ELSE IF( WNTQA ) THEN
  1281. *
  1282. * Path 4t (N >> M, JOBZ='A')
  1283. * N right singular vectors to be computed in VT and
  1284. * M left singular vectors to be computed in U
  1285. *
  1286. IVT = 1
  1287. *
  1288. * WORK(IVT) is M by M
  1289. *
  1290. LDWKVT = M
  1291. ITAU = IVT + LDWKVT*M
  1292. NWORK = ITAU + M
  1293. *
  1294. * Compute A=L*Q, copying result to VT
  1295. * Workspace: need M*M [VT] + M [tau] + M [work]
  1296. * Workspace: prefer M*M [VT] + M [tau] + M*NB [work]
  1297. *
  1298. CALL SGELQF( M, N, A, LDA, WORK( ITAU ), WORK( NWORK ),
  1299. $ LWORK - NWORK + 1, IERR )
  1300. CALL SLACPY( 'U', M, N, A, LDA, VT, LDVT )
  1301. *
  1302. * Generate Q in VT
  1303. * Workspace: need M*M [VT] + M [tau] + N [work]
  1304. * Workspace: prefer M*M [VT] + M [tau] + N*NB [work]
  1305. *
  1306. CALL SORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  1307. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1308. *
  1309. * Produce L in A, zeroing out other entries
  1310. *
  1311. CALL SLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
  1312. IE = ITAU
  1313. ITAUQ = IE + M
  1314. ITAUP = ITAUQ + M
  1315. NWORK = ITAUP + M
  1316. *
  1317. * Bidiagonalize L in A
  1318. * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + M [work]
  1319. * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup] + 2*M*NB [work]
  1320. *
  1321. CALL SGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  1322. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  1323. $ IERR )
  1324. *
  1325. * Perform bidiagonal SVD, computing left singular vectors
  1326. * of bidiagonal matrix in U and computing right singular
  1327. * vectors of bidiagonal matrix in WORK(IVT)
  1328. * Workspace: need M*M [VT] + 3*M [e, tauq, taup] + BDSPAC
  1329. *
  1330. CALL SBDSDC( 'U', 'I', M, S, WORK( IE ), U, LDU,
  1331. $ WORK( IVT ), LDWKVT, DUM, IDUM,
  1332. $ WORK( NWORK ), IWORK, INFO )
  1333. *
  1334. * Overwrite U by left singular vectors of L and WORK(IVT)
  1335. * by right singular vectors of L
  1336. * Workspace: need M*M [VT] + 3*M [e, tauq, taup]+ M [work]
  1337. * Workspace: prefer M*M [VT] + 3*M [e, tauq, taup]+ M*NB [work]
  1338. *
  1339. CALL SORMBR( 'Q', 'L', 'N', M, M, M, A, LDA,
  1340. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1341. $ LWORK - NWORK + 1, IERR )
  1342. CALL SORMBR( 'P', 'R', 'T', M, M, M, A, LDA,
  1343. $ WORK( ITAUP ), WORK( IVT ), LDWKVT,
  1344. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1345. *
  1346. * Multiply right singular vectors of L in WORK(IVT) by
  1347. * Q in VT, storing result in A
  1348. * Workspace: need M*M [VT]
  1349. *
  1350. CALL SGEMM( 'N', 'N', M, N, M, ONE, WORK( IVT ), LDWKVT,
  1351. $ VT, LDVT, ZERO, A, LDA )
  1352. *
  1353. * Copy right singular vectors of A from A to VT
  1354. *
  1355. CALL SLACPY( 'F', M, N, A, LDA, VT, LDVT )
  1356. *
  1357. END IF
  1358. *
  1359. ELSE
  1360. *
  1361. * N .LT. MNTHR
  1362. *
  1363. * Path 5t (N > M, but not much larger)
  1364. * Reduce to bidiagonal form without LQ decomposition
  1365. *
  1366. IE = 1
  1367. ITAUQ = IE + M
  1368. ITAUP = ITAUQ + M
  1369. NWORK = ITAUP + M
  1370. *
  1371. * Bidiagonalize A
  1372. * Workspace: need 3*M [e, tauq, taup] + N [work]
  1373. * Workspace: prefer 3*M [e, tauq, taup] + (M+N)*NB [work]
  1374. *
  1375. CALL SGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  1376. $ WORK( ITAUP ), WORK( NWORK ), LWORK-NWORK+1,
  1377. $ IERR )
  1378. IF( WNTQN ) THEN
  1379. *
  1380. * Path 5tn (N > M, JOBZ='N')
  1381. * Perform bidiagonal SVD, only computing singular values
  1382. * Workspace: need 3*M [e, tauq, taup] + BDSPAC
  1383. *
  1384. CALL SBDSDC( 'L', 'N', M, S, WORK( IE ), DUM, 1, DUM, 1,
  1385. $ DUM, IDUM, WORK( NWORK ), IWORK, INFO )
  1386. ELSE IF( WNTQO ) THEN
  1387. * Path 5to (N > M, JOBZ='O')
  1388. LDWKVT = M
  1389. IVT = NWORK
  1390. IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
  1391. *
  1392. * WORK( IVT ) is M by N
  1393. *
  1394. CALL SLASET( 'F', M, N, ZERO, ZERO, WORK( IVT ),
  1395. $ LDWKVT )
  1396. NWORK = IVT + LDWKVT*N
  1397. * IL is unused; silence compile warnings
  1398. IL = -1
  1399. ELSE
  1400. *
  1401. * WORK( IVT ) is M by M
  1402. *
  1403. NWORK = IVT + LDWKVT*M
  1404. IL = NWORK
  1405. *
  1406. * WORK(IL) is M by CHUNK
  1407. *
  1408. CHUNK = ( LWORK - M*M - 3*M ) / M
  1409. END IF
  1410. *
  1411. * Perform bidiagonal SVD, computing left singular vectors
  1412. * of bidiagonal matrix in U and computing right singular
  1413. * vectors of bidiagonal matrix in WORK(IVT)
  1414. * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + BDSPAC
  1415. *
  1416. CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU,
  1417. $ WORK( IVT ), LDWKVT, DUM, IDUM,
  1418. $ WORK( NWORK ), IWORK, INFO )
  1419. *
  1420. * Overwrite U by left singular vectors of A
  1421. * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
  1422. * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
  1423. *
  1424. CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
  1425. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1426. $ LWORK - NWORK + 1, IERR )
  1427. *
  1428. IF( LWORK .GE. M*N + 3*M + BDSPAC ) THEN
  1429. *
  1430. * Path 5to-fast
  1431. * Overwrite WORK(IVT) by left singular vectors of A
  1432. * Workspace: need 3*M [e, tauq, taup] + M*N [VT] + M [work]
  1433. * Workspace: prefer 3*M [e, tauq, taup] + M*N [VT] + M*NB [work]
  1434. *
  1435. CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
  1436. $ WORK( ITAUP ), WORK( IVT ), LDWKVT,
  1437. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1438. *
  1439. * Copy right singular vectors of A from WORK(IVT) to A
  1440. *
  1441. CALL SLACPY( 'F', M, N, WORK( IVT ), LDWKVT, A, LDA )
  1442. ELSE
  1443. *
  1444. * Path 5to-slow
  1445. * Generate P**T in A
  1446. * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M [work]
  1447. * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*NB [work]
  1448. *
  1449. CALL SORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
  1450. $ WORK( NWORK ), LWORK - NWORK + 1, IERR )
  1451. *
  1452. * Multiply Q in A by right singular vectors of
  1453. * bidiagonal matrix in WORK(IVT), storing result in
  1454. * WORK(IL) and copying to A
  1455. * Workspace: need 3*M [e, tauq, taup] + M*M [VT] + M*NB [L]
  1456. * Workspace: prefer 3*M [e, tauq, taup] + M*M [VT] + M*N [L]
  1457. *
  1458. DO 40 I = 1, N, CHUNK
  1459. BLK = MIN( N - I + 1, CHUNK )
  1460. CALL SGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IVT ),
  1461. $ LDWKVT, A( 1, I ), LDA, ZERO,
  1462. $ WORK( IL ), M )
  1463. CALL SLACPY( 'F', M, BLK, WORK( IL ), M, A( 1, I ),
  1464. $ LDA )
  1465. 40 CONTINUE
  1466. END IF
  1467. ELSE IF( WNTQS ) THEN
  1468. *
  1469. * Path 5ts (N > M, JOBZ='S')
  1470. * Perform bidiagonal SVD, computing left singular vectors
  1471. * of bidiagonal matrix in U and computing right singular
  1472. * vectors of bidiagonal matrix in VT
  1473. * Workspace: need 3*M [e, tauq, taup] + BDSPAC
  1474. *
  1475. CALL SLASET( 'F', M, N, ZERO, ZERO, VT, LDVT )
  1476. CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
  1477. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  1478. $ INFO )
  1479. *
  1480. * Overwrite U by left singular vectors of A and VT
  1481. * by right singular vectors of A
  1482. * Workspace: need 3*M [e, tauq, taup] + M [work]
  1483. * Workspace: prefer 3*M [e, tauq, taup] + M*NB [work]
  1484. *
  1485. CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
  1486. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1487. $ LWORK - NWORK + 1, IERR )
  1488. CALL SORMBR( 'P', 'R', 'T', M, N, M, A, LDA,
  1489. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  1490. $ LWORK - NWORK + 1, IERR )
  1491. ELSE IF( WNTQA ) THEN
  1492. *
  1493. * Path 5ta (N > M, JOBZ='A')
  1494. * Perform bidiagonal SVD, computing left singular vectors
  1495. * of bidiagonal matrix in U and computing right singular
  1496. * vectors of bidiagonal matrix in VT
  1497. * Workspace: need 3*M [e, tauq, taup] + BDSPAC
  1498. *
  1499. CALL SLASET( 'F', N, N, ZERO, ZERO, VT, LDVT )
  1500. CALL SBDSDC( 'L', 'I', M, S, WORK( IE ), U, LDU, VT,
  1501. $ LDVT, DUM, IDUM, WORK( NWORK ), IWORK,
  1502. $ INFO )
  1503. *
  1504. * Set the right corner of VT to identity matrix
  1505. *
  1506. IF( N.GT.M ) THEN
  1507. CALL SLASET( 'F', N-M, N-M, ZERO, ONE, VT(M+1,M+1),
  1508. $ LDVT )
  1509. END IF
  1510. *
  1511. * Overwrite U by left singular vectors of A and VT
  1512. * by right singular vectors of A
  1513. * Workspace: need 3*M [e, tauq, taup] + N [work]
  1514. * Workspace: prefer 3*M [e, tauq, taup] + N*NB [work]
  1515. *
  1516. CALL SORMBR( 'Q', 'L', 'N', M, M, N, A, LDA,
  1517. $ WORK( ITAUQ ), U, LDU, WORK( NWORK ),
  1518. $ LWORK - NWORK + 1, IERR )
  1519. CALL SORMBR( 'P', 'R', 'T', N, N, M, A, LDA,
  1520. $ WORK( ITAUP ), VT, LDVT, WORK( NWORK ),
  1521. $ LWORK - NWORK + 1, IERR )
  1522. END IF
  1523. *
  1524. END IF
  1525. *
  1526. END IF
  1527. *
  1528. * Undo scaling if necessary
  1529. *
  1530. IF( ISCL.EQ.1 ) THEN
  1531. IF( ANRM.GT.BIGNUM )
  1532. $ CALL SLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
  1533. $ IERR )
  1534. IF( ANRM.LT.SMLNUM )
  1535. $ CALL SLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
  1536. $ IERR )
  1537. END IF
  1538. *
  1539. * Return optimal workspace in WORK(1)
  1540. *
  1541. WORK( 1 ) = MAXWRK
  1542. *
  1543. RETURN
  1544. *
  1545. * End of SGESDD
  1546. *
  1547. END