You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dgesvd.f 136 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501
  1. *> \brief <b> DGESVD computes the singular value decomposition (SVD) for GE matrices</b>
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download DGESVD + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dgesvd.f">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dgesvd.f">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dgesvd.f">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU, VT, LDVT,
  22. * WORK, LWORK, INFO )
  23. *
  24. * .. Scalar Arguments ..
  25. * CHARACTER JOBU, JOBVT
  26. * INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
  27. * ..
  28. * .. Array Arguments ..
  29. * DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
  30. * $ VT( LDVT, * ), WORK( * )
  31. * ..
  32. *
  33. *
  34. *> \par Purpose:
  35. * =============
  36. *>
  37. *> \verbatim
  38. *>
  39. *> DGESVD computes the singular value decomposition (SVD) of a real
  40. *> M-by-N matrix A, optionally computing the left and/or right singular
  41. *> vectors. The SVD is written
  42. *>
  43. *> A = U * SIGMA * transpose(V)
  44. *>
  45. *> where SIGMA is an M-by-N matrix which is zero except for its
  46. *> min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
  47. *> V is an N-by-N orthogonal matrix. The diagonal elements of SIGMA
  48. *> are the singular values of A; they are real and non-negative, and
  49. *> are returned in descending order. The first min(m,n) columns of
  50. *> U and V are the left and right singular vectors of A.
  51. *>
  52. *> Note that the routine returns V**T, not V.
  53. *> \endverbatim
  54. *
  55. * Arguments:
  56. * ==========
  57. *
  58. *> \param[in] JOBU
  59. *> \verbatim
  60. *> JOBU is CHARACTER*1
  61. *> Specifies options for computing all or part of the matrix U:
  62. *> = 'A': all M columns of U are returned in array U:
  63. *> = 'S': the first min(m,n) columns of U (the left singular
  64. *> vectors) are returned in the array U;
  65. *> = 'O': the first min(m,n) columns of U (the left singular
  66. *> vectors) are overwritten on the array A;
  67. *> = 'N': no columns of U (no left singular vectors) are
  68. *> computed.
  69. *> \endverbatim
  70. *>
  71. *> \param[in] JOBVT
  72. *> \verbatim
  73. *> JOBVT is CHARACTER*1
  74. *> Specifies options for computing all or part of the matrix
  75. *> V**T:
  76. *> = 'A': all N rows of V**T are returned in the array VT;
  77. *> = 'S': the first min(m,n) rows of V**T (the right singular
  78. *> vectors) are returned in the array VT;
  79. *> = 'O': the first min(m,n) rows of V**T (the right singular
  80. *> vectors) are overwritten on the array A;
  81. *> = 'N': no rows of V**T (no right singular vectors) are
  82. *> computed.
  83. *>
  84. *> JOBVT and JOBU cannot both be 'O'.
  85. *> \endverbatim
  86. *>
  87. *> \param[in] M
  88. *> \verbatim
  89. *> M is INTEGER
  90. *> The number of rows of the input matrix A. M >= 0.
  91. *> \endverbatim
  92. *>
  93. *> \param[in] N
  94. *> \verbatim
  95. *> N is INTEGER
  96. *> The number of columns of the input matrix A. N >= 0.
  97. *> \endverbatim
  98. *>
  99. *> \param[in,out] A
  100. *> \verbatim
  101. *> A is DOUBLE PRECISION array, dimension (LDA,N)
  102. *> On entry, the M-by-N matrix A.
  103. *> On exit,
  104. *> if JOBU = 'O', A is overwritten with the first min(m,n)
  105. *> columns of U (the left singular vectors,
  106. *> stored columnwise);
  107. *> if JOBVT = 'O', A is overwritten with the first min(m,n)
  108. *> rows of V**T (the right singular vectors,
  109. *> stored rowwise);
  110. *> if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
  111. *> are destroyed.
  112. *> \endverbatim
  113. *>
  114. *> \param[in] LDA
  115. *> \verbatim
  116. *> LDA is INTEGER
  117. *> The leading dimension of the array A. LDA >= max(1,M).
  118. *> \endverbatim
  119. *>
  120. *> \param[out] S
  121. *> \verbatim
  122. *> S is DOUBLE PRECISION array, dimension (min(M,N))
  123. *> The singular values of A, sorted so that S(i) >= S(i+1).
  124. *> \endverbatim
  125. *>
  126. *> \param[out] U
  127. *> \verbatim
  128. *> U is DOUBLE PRECISION array, dimension (LDU,UCOL)
  129. *> (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
  130. *> If JOBU = 'A', U contains the M-by-M orthogonal matrix U;
  131. *> if JOBU = 'S', U contains the first min(m,n) columns of U
  132. *> (the left singular vectors, stored columnwise);
  133. *> if JOBU = 'N' or 'O', U is not referenced.
  134. *> \endverbatim
  135. *>
  136. *> \param[in] LDU
  137. *> \verbatim
  138. *> LDU is INTEGER
  139. *> The leading dimension of the array U. LDU >= 1; if
  140. *> JOBU = 'S' or 'A', LDU >= M.
  141. *> \endverbatim
  142. *>
  143. *> \param[out] VT
  144. *> \verbatim
  145. *> VT is DOUBLE PRECISION array, dimension (LDVT,N)
  146. *> If JOBVT = 'A', VT contains the N-by-N orthogonal matrix
  147. *> V**T;
  148. *> if JOBVT = 'S', VT contains the first min(m,n) rows of
  149. *> V**T (the right singular vectors, stored rowwise);
  150. *> if JOBVT = 'N' or 'O', VT is not referenced.
  151. *> \endverbatim
  152. *>
  153. *> \param[in] LDVT
  154. *> \verbatim
  155. *> LDVT is INTEGER
  156. *> The leading dimension of the array VT. LDVT >= 1; if
  157. *> JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
  158. *> \endverbatim
  159. *>
  160. *> \param[out] WORK
  161. *> \verbatim
  162. *> WORK is DOUBLE PRECISION array, dimension (MAX(1,LWORK))
  163. *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK;
  164. *> if INFO > 0, WORK(2:MIN(M,N)) contains the unconverged
  165. *> superdiagonal elements of an upper bidiagonal matrix B
  166. *> whose diagonal is in S (not necessarily sorted). B
  167. *> satisfies A = U * B * VT, so it has the same singular values
  168. *> as A, and singular vectors related by U and VT.
  169. *> \endverbatim
  170. *>
  171. *> \param[in] LWORK
  172. *> \verbatim
  173. *> LWORK is INTEGER
  174. *> The dimension of the array WORK.
  175. *> LWORK >= MAX(1,5*MIN(M,N)) for the paths (see comments inside code):
  176. *> - PATH 1 (M much larger than N, JOBU='N')
  177. *> - PATH 1t (N much larger than M, JOBVT='N')
  178. *> LWORK >= MAX(1,3*MIN(M,N) + MAX(M,N),5*MIN(M,N)) for the other paths
  179. *> For good performance, LWORK should generally be larger.
  180. *>
  181. *> If LWORK = -1, then a workspace query is assumed; the routine
  182. *> only calculates the optimal size of the WORK array, returns
  183. *> this value as the first entry of the WORK array, and no error
  184. *> message related to LWORK is issued by XERBLA.
  185. *> \endverbatim
  186. *>
  187. *> \param[out] INFO
  188. *> \verbatim
  189. *> INFO is INTEGER
  190. *> = 0: successful exit.
  191. *> < 0: if INFO = -i, the i-th argument had an illegal value.
  192. *> > 0: if DBDSQR did not converge, INFO specifies how many
  193. *> superdiagonals of an intermediate bidiagonal form B
  194. *> did not converge to zero. See the description of WORK
  195. *> above for details.
  196. *> \endverbatim
  197. *
  198. * Authors:
  199. * ========
  200. *
  201. *> \author Univ. of Tennessee
  202. *> \author Univ. of California Berkeley
  203. *> \author Univ. of Colorado Denver
  204. *> \author NAG Ltd.
  205. *
  206. *> \ingroup doubleGEsing
  207. *
  208. * =====================================================================
  209. SUBROUTINE DGESVD( JOBU, JOBVT, M, N, A, LDA, S, U, LDU,
  210. $ VT, LDVT, WORK, LWORK, INFO )
  211. *
  212. * -- LAPACK driver routine --
  213. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  214. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  215. *
  216. * .. Scalar Arguments ..
  217. CHARACTER JOBU, JOBVT
  218. INTEGER INFO, LDA, LDU, LDVT, LWORK, M, N
  219. * ..
  220. * .. Array Arguments ..
  221. DOUBLE PRECISION A( LDA, * ), S( * ), U( LDU, * ),
  222. $ VT( LDVT, * ), WORK( * )
  223. * ..
  224. *
  225. * =====================================================================
  226. *
  227. * .. Parameters ..
  228. DOUBLE PRECISION ZERO, ONE
  229. PARAMETER ( ZERO = 0.0D0, ONE = 1.0D0 )
  230. * ..
  231. * .. Local Scalars ..
  232. LOGICAL LQUERY, WNTUA, WNTUAS, WNTUN, WNTUO, WNTUS,
  233. $ WNTVA, WNTVAS, WNTVN, WNTVO, WNTVS
  234. INTEGER BDSPAC, BLK, CHUNK, I, IE, IERR, IR, ISCL,
  235. $ ITAU, ITAUP, ITAUQ, IU, IWORK, LDWRKR, LDWRKU,
  236. $ MAXWRK, MINMN, MINWRK, MNTHR, NCU, NCVT, NRU,
  237. $ NRVT, WRKBL
  238. INTEGER LWORK_DGEQRF, LWORK_DORGQR_N, LWORK_DORGQR_M,
  239. $ LWORK_DGEBRD, LWORK_DORGBR_P, LWORK_DORGBR_Q,
  240. $ LWORK_DGELQF, LWORK_DORGLQ_N, LWORK_DORGLQ_M
  241. DOUBLE PRECISION ANRM, BIGNUM, EPS, SMLNUM
  242. * ..
  243. * .. Local Arrays ..
  244. DOUBLE PRECISION DUM( 1 )
  245. * ..
  246. * .. External Subroutines ..
  247. EXTERNAL DBDSQR, DGEBRD, DGELQF, DGEMM, DGEQRF, DLACPY,
  248. $ DLASCL, DLASET, DORGBR, DORGLQ, DORGQR, DORMBR,
  249. $ XERBLA
  250. * ..
  251. * .. External Functions ..
  252. LOGICAL LSAME
  253. INTEGER ILAENV
  254. DOUBLE PRECISION DLAMCH, DLANGE
  255. EXTERNAL LSAME, ILAENV, DLAMCH, DLANGE
  256. * ..
  257. * .. Intrinsic Functions ..
  258. INTRINSIC MAX, MIN, SQRT
  259. * ..
  260. * .. Executable Statements ..
  261. *
  262. * Test the input arguments
  263. *
  264. INFO = 0
  265. MINMN = MIN( M, N )
  266. WNTUA = LSAME( JOBU, 'A' )
  267. WNTUS = LSAME( JOBU, 'S' )
  268. WNTUAS = WNTUA .OR. WNTUS
  269. WNTUO = LSAME( JOBU, 'O' )
  270. WNTUN = LSAME( JOBU, 'N' )
  271. WNTVA = LSAME( JOBVT, 'A' )
  272. WNTVS = LSAME( JOBVT, 'S' )
  273. WNTVAS = WNTVA .OR. WNTVS
  274. WNTVO = LSAME( JOBVT, 'O' )
  275. WNTVN = LSAME( JOBVT, 'N' )
  276. LQUERY = ( LWORK.EQ.-1 )
  277. *
  278. IF( .NOT.( WNTUA .OR. WNTUS .OR. WNTUO .OR. WNTUN ) ) THEN
  279. INFO = -1
  280. ELSE IF( .NOT.( WNTVA .OR. WNTVS .OR. WNTVO .OR. WNTVN ) .OR.
  281. $ ( WNTVO .AND. WNTUO ) ) THEN
  282. INFO = -2
  283. ELSE IF( M.LT.0 ) THEN
  284. INFO = -3
  285. ELSE IF( N.LT.0 ) THEN
  286. INFO = -4
  287. ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
  288. INFO = -6
  289. ELSE IF( LDU.LT.1 .OR. ( WNTUAS .AND. LDU.LT.M ) ) THEN
  290. INFO = -9
  291. ELSE IF( LDVT.LT.1 .OR. ( WNTVA .AND. LDVT.LT.N ) .OR.
  292. $ ( WNTVS .AND. LDVT.LT.MINMN ) ) THEN
  293. INFO = -11
  294. END IF
  295. *
  296. * Compute workspace
  297. * (Note: Comments in the code beginning "Workspace:" describe the
  298. * minimal amount of workspace needed at that point in the code,
  299. * as well as the preferred amount for good performance.
  300. * NB refers to the optimal block size for the immediately
  301. * following subroutine, as returned by ILAENV.)
  302. *
  303. IF( INFO.EQ.0 ) THEN
  304. MINWRK = 1
  305. MAXWRK = 1
  306. IF( M.GE.N .AND. MINMN.GT.0 ) THEN
  307. *
  308. * Compute space needed for DBDSQR
  309. *
  310. MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
  311. BDSPAC = 5*N
  312. * Compute space needed for DGEQRF
  313. CALL DGEQRF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
  314. LWORK_DGEQRF = INT( DUM(1) )
  315. * Compute space needed for DORGQR
  316. CALL DORGQR( M, N, N, A, LDA, DUM(1), DUM(1), -1, IERR )
  317. LWORK_DORGQR_N = INT( DUM(1) )
  318. CALL DORGQR( M, M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
  319. LWORK_DORGQR_M = INT( DUM(1) )
  320. * Compute space needed for DGEBRD
  321. CALL DGEBRD( N, N, A, LDA, S, DUM(1), DUM(1),
  322. $ DUM(1), DUM(1), -1, IERR )
  323. LWORK_DGEBRD = INT( DUM(1) )
  324. * Compute space needed for DORGBR P
  325. CALL DORGBR( 'P', N, N, N, A, LDA, DUM(1),
  326. $ DUM(1), -1, IERR )
  327. LWORK_DORGBR_P = INT( DUM(1) )
  328. * Compute space needed for DORGBR Q
  329. CALL DORGBR( 'Q', N, N, N, A, LDA, DUM(1),
  330. $ DUM(1), -1, IERR )
  331. LWORK_DORGBR_Q = INT( DUM(1) )
  332. *
  333. IF( M.GE.MNTHR ) THEN
  334. IF( WNTUN ) THEN
  335. *
  336. * Path 1 (M much larger than N, JOBU='N')
  337. *
  338. MAXWRK = N + LWORK_DGEQRF
  339. MAXWRK = MAX( MAXWRK, 3*N + LWORK_DGEBRD )
  340. IF( WNTVO .OR. WNTVAS )
  341. $ MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
  342. MAXWRK = MAX( MAXWRK, BDSPAC )
  343. MINWRK = MAX( 4*N, BDSPAC )
  344. ELSE IF( WNTUO .AND. WNTVN ) THEN
  345. *
  346. * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
  347. *
  348. WRKBL = N + LWORK_DGEQRF
  349. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
  350. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  351. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  352. WRKBL = MAX( WRKBL, BDSPAC )
  353. MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
  354. MINWRK = MAX( 3*N + M, BDSPAC )
  355. ELSE IF( WNTUO .AND. WNTVAS ) THEN
  356. *
  357. * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or
  358. * 'A')
  359. *
  360. WRKBL = N + LWORK_DGEQRF
  361. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
  362. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  363. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  364. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
  365. WRKBL = MAX( WRKBL, BDSPAC )
  366. MAXWRK = MAX( N*N + WRKBL, N*N + M*N + N )
  367. MINWRK = MAX( 3*N + M, BDSPAC )
  368. ELSE IF( WNTUS .AND. WNTVN ) THEN
  369. *
  370. * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
  371. *
  372. WRKBL = N + LWORK_DGEQRF
  373. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
  374. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  375. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  376. WRKBL = MAX( WRKBL, BDSPAC )
  377. MAXWRK = N*N + WRKBL
  378. MINWRK = MAX( 3*N + M, BDSPAC )
  379. ELSE IF( WNTUS .AND. WNTVO ) THEN
  380. *
  381. * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
  382. *
  383. WRKBL = N + LWORK_DGEQRF
  384. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
  385. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  386. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  387. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
  388. WRKBL = MAX( WRKBL, BDSPAC )
  389. MAXWRK = 2*N*N + WRKBL
  390. MINWRK = MAX( 3*N + M, BDSPAC )
  391. ELSE IF( WNTUS .AND. WNTVAS ) THEN
  392. *
  393. * Path 6 (M much larger than N, JOBU='S', JOBVT='S' or
  394. * 'A')
  395. *
  396. WRKBL = N + LWORK_DGEQRF
  397. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_N )
  398. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  399. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  400. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
  401. WRKBL = MAX( WRKBL, BDSPAC )
  402. MAXWRK = N*N + WRKBL
  403. MINWRK = MAX( 3*N + M, BDSPAC )
  404. ELSE IF( WNTUA .AND. WNTVN ) THEN
  405. *
  406. * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
  407. *
  408. WRKBL = N + LWORK_DGEQRF
  409. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
  410. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  411. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  412. WRKBL = MAX( WRKBL, BDSPAC )
  413. MAXWRK = N*N + WRKBL
  414. MINWRK = MAX( 3*N + M, BDSPAC )
  415. ELSE IF( WNTUA .AND. WNTVO ) THEN
  416. *
  417. * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
  418. *
  419. WRKBL = N + LWORK_DGEQRF
  420. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
  421. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  422. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  423. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
  424. WRKBL = MAX( WRKBL, BDSPAC )
  425. MAXWRK = 2*N*N + WRKBL
  426. MINWRK = MAX( 3*N + M, BDSPAC )
  427. ELSE IF( WNTUA .AND. WNTVAS ) THEN
  428. *
  429. * Path 9 (M much larger than N, JOBU='A', JOBVT='S' or
  430. * 'A')
  431. *
  432. WRKBL = N + LWORK_DGEQRF
  433. WRKBL = MAX( WRKBL, N + LWORK_DORGQR_M )
  434. WRKBL = MAX( WRKBL, 3*N + LWORK_DGEBRD )
  435. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_Q )
  436. WRKBL = MAX( WRKBL, 3*N + LWORK_DORGBR_P )
  437. WRKBL = MAX( WRKBL, BDSPAC )
  438. MAXWRK = N*N + WRKBL
  439. MINWRK = MAX( 3*N + M, BDSPAC )
  440. END IF
  441. ELSE
  442. *
  443. * Path 10 (M at least N, but not much larger)
  444. *
  445. CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
  446. $ DUM(1), DUM(1), -1, IERR )
  447. LWORK_DGEBRD = INT( DUM(1) )
  448. MAXWRK = 3*N + LWORK_DGEBRD
  449. IF( WNTUS .OR. WNTUO ) THEN
  450. CALL DORGBR( 'Q', M, N, N, A, LDA, DUM(1),
  451. $ DUM(1), -1, IERR )
  452. LWORK_DORGBR_Q = INT( DUM(1) )
  453. MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
  454. END IF
  455. IF( WNTUA ) THEN
  456. CALL DORGBR( 'Q', M, M, N, A, LDA, DUM(1),
  457. $ DUM(1), -1, IERR )
  458. LWORK_DORGBR_Q = INT( DUM(1) )
  459. MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_Q )
  460. END IF
  461. IF( .NOT.WNTVN ) THEN
  462. MAXWRK = MAX( MAXWRK, 3*N + LWORK_DORGBR_P )
  463. END IF
  464. MAXWRK = MAX( MAXWRK, BDSPAC )
  465. MINWRK = MAX( 3*N + M, BDSPAC )
  466. END IF
  467. ELSE IF( MINMN.GT.0 ) THEN
  468. *
  469. * Compute space needed for DBDSQR
  470. *
  471. MNTHR = ILAENV( 6, 'DGESVD', JOBU // JOBVT, M, N, 0, 0 )
  472. BDSPAC = 5*M
  473. * Compute space needed for DGELQF
  474. CALL DGELQF( M, N, A, LDA, DUM(1), DUM(1), -1, IERR )
  475. LWORK_DGELQF = INT( DUM(1) )
  476. * Compute space needed for DORGLQ
  477. CALL DORGLQ( N, N, M, DUM(1), N, DUM(1), DUM(1), -1, IERR )
  478. LWORK_DORGLQ_N = INT( DUM(1) )
  479. CALL DORGLQ( M, N, M, A, LDA, DUM(1), DUM(1), -1, IERR )
  480. LWORK_DORGLQ_M = INT( DUM(1) )
  481. * Compute space needed for DGEBRD
  482. CALL DGEBRD( M, M, A, LDA, S, DUM(1), DUM(1),
  483. $ DUM(1), DUM(1), -1, IERR )
  484. LWORK_DGEBRD = INT( DUM(1) )
  485. * Compute space needed for DORGBR P
  486. CALL DORGBR( 'P', M, M, M, A, N, DUM(1),
  487. $ DUM(1), -1, IERR )
  488. LWORK_DORGBR_P = INT( DUM(1) )
  489. * Compute space needed for DORGBR Q
  490. CALL DORGBR( 'Q', M, M, M, A, N, DUM(1),
  491. $ DUM(1), -1, IERR )
  492. LWORK_DORGBR_Q = INT( DUM(1) )
  493. IF( N.GE.MNTHR ) THEN
  494. IF( WNTVN ) THEN
  495. *
  496. * Path 1t(N much larger than M, JOBVT='N')
  497. *
  498. MAXWRK = M + LWORK_DGELQF
  499. MAXWRK = MAX( MAXWRK, 3*M + LWORK_DGEBRD )
  500. IF( WNTUO .OR. WNTUAS )
  501. $ MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
  502. MAXWRK = MAX( MAXWRK, BDSPAC )
  503. MINWRK = MAX( 4*M, BDSPAC )
  504. ELSE IF( WNTVO .AND. WNTUN ) THEN
  505. *
  506. * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
  507. *
  508. WRKBL = M + LWORK_DGELQF
  509. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
  510. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  511. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  512. WRKBL = MAX( WRKBL, BDSPAC )
  513. MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
  514. MINWRK = MAX( 3*M + N, BDSPAC )
  515. ELSE IF( WNTVO .AND. WNTUAS ) THEN
  516. *
  517. * Path 3t(N much larger than M, JOBU='S' or 'A',
  518. * JOBVT='O')
  519. *
  520. WRKBL = M + LWORK_DGELQF
  521. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
  522. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  523. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  524. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
  525. WRKBL = MAX( WRKBL, BDSPAC )
  526. MAXWRK = MAX( M*M + WRKBL, M*M + M*N + M )
  527. MINWRK = MAX( 3*M + N, BDSPAC )
  528. ELSE IF( WNTVS .AND. WNTUN ) THEN
  529. *
  530. * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
  531. *
  532. WRKBL = M + LWORK_DGELQF
  533. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
  534. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  535. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  536. WRKBL = MAX( WRKBL, BDSPAC )
  537. MAXWRK = M*M + WRKBL
  538. MINWRK = MAX( 3*M + N, BDSPAC )
  539. ELSE IF( WNTVS .AND. WNTUO ) THEN
  540. *
  541. * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
  542. *
  543. WRKBL = M + LWORK_DGELQF
  544. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
  545. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  546. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  547. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
  548. WRKBL = MAX( WRKBL, BDSPAC )
  549. MAXWRK = 2*M*M + WRKBL
  550. MINWRK = MAX( 3*M + N, BDSPAC )
  551. ELSE IF( WNTVS .AND. WNTUAS ) THEN
  552. *
  553. * Path 6t(N much larger than M, JOBU='S' or 'A',
  554. * JOBVT='S')
  555. *
  556. WRKBL = M + LWORK_DGELQF
  557. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_M )
  558. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  559. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  560. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
  561. WRKBL = MAX( WRKBL, BDSPAC )
  562. MAXWRK = M*M + WRKBL
  563. MINWRK = MAX( 3*M + N, BDSPAC )
  564. ELSE IF( WNTVA .AND. WNTUN ) THEN
  565. *
  566. * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
  567. *
  568. WRKBL = M + LWORK_DGELQF
  569. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
  570. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  571. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  572. WRKBL = MAX( WRKBL, BDSPAC )
  573. MAXWRK = M*M + WRKBL
  574. MINWRK = MAX( 3*M + N, BDSPAC )
  575. ELSE IF( WNTVA .AND. WNTUO ) THEN
  576. *
  577. * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
  578. *
  579. WRKBL = M + LWORK_DGELQF
  580. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
  581. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  582. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  583. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
  584. WRKBL = MAX( WRKBL, BDSPAC )
  585. MAXWRK = 2*M*M + WRKBL
  586. MINWRK = MAX( 3*M + N, BDSPAC )
  587. ELSE IF( WNTVA .AND. WNTUAS ) THEN
  588. *
  589. * Path 9t(N much larger than M, JOBU='S' or 'A',
  590. * JOBVT='A')
  591. *
  592. WRKBL = M + LWORK_DGELQF
  593. WRKBL = MAX( WRKBL, M + LWORK_DORGLQ_N )
  594. WRKBL = MAX( WRKBL, 3*M + LWORK_DGEBRD )
  595. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_P )
  596. WRKBL = MAX( WRKBL, 3*M + LWORK_DORGBR_Q )
  597. WRKBL = MAX( WRKBL, BDSPAC )
  598. MAXWRK = M*M + WRKBL
  599. MINWRK = MAX( 3*M + N, BDSPAC )
  600. END IF
  601. ELSE
  602. *
  603. * Path 10t(N greater than M, but not much larger)
  604. *
  605. CALL DGEBRD( M, N, A, LDA, S, DUM(1), DUM(1),
  606. $ DUM(1), DUM(1), -1, IERR )
  607. LWORK_DGEBRD = INT( DUM(1) )
  608. MAXWRK = 3*M + LWORK_DGEBRD
  609. IF( WNTVS .OR. WNTVO ) THEN
  610. * Compute space needed for DORGBR P
  611. CALL DORGBR( 'P', M, N, M, A, N, DUM(1),
  612. $ DUM(1), -1, IERR )
  613. LWORK_DORGBR_P = INT( DUM(1) )
  614. MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
  615. END IF
  616. IF( WNTVA ) THEN
  617. CALL DORGBR( 'P', N, N, M, A, N, DUM(1),
  618. $ DUM(1), -1, IERR )
  619. LWORK_DORGBR_P = INT( DUM(1) )
  620. MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_P )
  621. END IF
  622. IF( .NOT.WNTUN ) THEN
  623. MAXWRK = MAX( MAXWRK, 3*M + LWORK_DORGBR_Q )
  624. END IF
  625. MAXWRK = MAX( MAXWRK, BDSPAC )
  626. MINWRK = MAX( 3*M + N, BDSPAC )
  627. END IF
  628. END IF
  629. MAXWRK = MAX( MAXWRK, MINWRK )
  630. WORK( 1 ) = MAXWRK
  631. *
  632. IF( LWORK.LT.MINWRK .AND. .NOT.LQUERY ) THEN
  633. INFO = -13
  634. END IF
  635. END IF
  636. *
  637. IF( INFO.NE.0 ) THEN
  638. CALL XERBLA( 'DGESVD', -INFO )
  639. RETURN
  640. ELSE IF( LQUERY ) THEN
  641. RETURN
  642. END IF
  643. *
  644. * Quick return if possible
  645. *
  646. IF( M.EQ.0 .OR. N.EQ.0 ) THEN
  647. RETURN
  648. END IF
  649. *
  650. * Get machine constants
  651. *
  652. EPS = DLAMCH( 'P' )
  653. SMLNUM = SQRT( DLAMCH( 'S' ) ) / EPS
  654. BIGNUM = ONE / SMLNUM
  655. *
  656. * Scale A if max element outside range [SMLNUM,BIGNUM]
  657. *
  658. ANRM = DLANGE( 'M', M, N, A, LDA, DUM )
  659. ISCL = 0
  660. IF( ANRM.GT.ZERO .AND. ANRM.LT.SMLNUM ) THEN
  661. ISCL = 1
  662. CALL DLASCL( 'G', 0, 0, ANRM, SMLNUM, M, N, A, LDA, IERR )
  663. ELSE IF( ANRM.GT.BIGNUM ) THEN
  664. ISCL = 1
  665. CALL DLASCL( 'G', 0, 0, ANRM, BIGNUM, M, N, A, LDA, IERR )
  666. END IF
  667. *
  668. IF( M.GE.N ) THEN
  669. *
  670. * A has at least as many rows as columns. If A has sufficiently
  671. * more rows than columns, first reduce using the QR
  672. * decomposition (if sufficient workspace available)
  673. *
  674. IF( M.GE.MNTHR ) THEN
  675. *
  676. IF( WNTUN ) THEN
  677. *
  678. * Path 1 (M much larger than N, JOBU='N')
  679. * No left singular vectors to be computed
  680. *
  681. ITAU = 1
  682. IWORK = ITAU + N
  683. *
  684. * Compute A=Q*R
  685. * (Workspace: need 2*N, prefer N + N*NB)
  686. *
  687. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
  688. $ LWORK-IWORK+1, IERR )
  689. *
  690. * Zero out below R
  691. *
  692. IF( N .GT. 1 ) THEN
  693. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, A( 2, 1 ),
  694. $ LDA )
  695. END IF
  696. IE = 1
  697. ITAUQ = IE + N
  698. ITAUP = ITAUQ + N
  699. IWORK = ITAUP + N
  700. *
  701. * Bidiagonalize R in A
  702. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  703. *
  704. CALL DGEBRD( N, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  705. $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
  706. $ IERR )
  707. NCVT = 0
  708. IF( WNTVO .OR. WNTVAS ) THEN
  709. *
  710. * If right singular vectors desired, generate P'.
  711. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  712. *
  713. CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
  714. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  715. NCVT = N
  716. END IF
  717. IWORK = IE + N
  718. *
  719. * Perform bidiagonal QR iteration, computing right
  720. * singular vectors of A in A if desired
  721. * (Workspace: need BDSPAC)
  722. *
  723. CALL DBDSQR( 'U', N, NCVT, 0, 0, S, WORK( IE ), A, LDA,
  724. $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
  725. *
  726. * If right singular vectors desired in VT, copy them there
  727. *
  728. IF( WNTVAS )
  729. $ CALL DLACPY( 'F', N, N, A, LDA, VT, LDVT )
  730. *
  731. ELSE IF( WNTUO .AND. WNTVN ) THEN
  732. *
  733. * Path 2 (M much larger than N, JOBU='O', JOBVT='N')
  734. * N left singular vectors to be overwritten on A and
  735. * no right singular vectors to be computed
  736. *
  737. IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
  738. *
  739. * Sufficient workspace for a fast algorithm
  740. *
  741. IR = 1
  742. IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
  743. *
  744. * WORK(IU) is LDA by N, WORK(IR) is LDA by N
  745. *
  746. LDWRKU = LDA
  747. LDWRKR = LDA
  748. ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
  749. *
  750. * WORK(IU) is LDA by N, WORK(IR) is N by N
  751. *
  752. LDWRKU = LDA
  753. LDWRKR = N
  754. ELSE
  755. *
  756. * WORK(IU) is LDWRKU by N, WORK(IR) is N by N
  757. *
  758. LDWRKU = ( LWORK-N*N-N ) / N
  759. LDWRKR = N
  760. END IF
  761. ITAU = IR + LDWRKR*N
  762. IWORK = ITAU + N
  763. *
  764. * Compute A=Q*R
  765. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  766. *
  767. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  768. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  769. *
  770. * Copy R to WORK(IR) and zero out below it
  771. *
  772. CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ), LDWRKR )
  773. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO, WORK( IR+1 ),
  774. $ LDWRKR )
  775. *
  776. * Generate Q in A
  777. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  778. *
  779. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  780. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  781. IE = ITAU
  782. ITAUQ = IE + N
  783. ITAUP = ITAUQ + N
  784. IWORK = ITAUP + N
  785. *
  786. * Bidiagonalize R in WORK(IR)
  787. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  788. *
  789. CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S, WORK( IE ),
  790. $ WORK( ITAUQ ), WORK( ITAUP ),
  791. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  792. *
  793. * Generate left vectors bidiagonalizing R
  794. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  795. *
  796. CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
  797. $ WORK( ITAUQ ), WORK( IWORK ),
  798. $ LWORK-IWORK+1, IERR )
  799. IWORK = IE + N
  800. *
  801. * Perform bidiagonal QR iteration, computing left
  802. * singular vectors of R in WORK(IR)
  803. * (Workspace: need N*N + BDSPAC)
  804. *
  805. CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM, 1,
  806. $ WORK( IR ), LDWRKR, DUM, 1,
  807. $ WORK( IWORK ), INFO )
  808. IU = IE + N
  809. *
  810. * Multiply Q in A by left singular vectors of R in
  811. * WORK(IR), storing result in WORK(IU) and copying to A
  812. * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
  813. *
  814. DO 10 I = 1, M, LDWRKU
  815. CHUNK = MIN( M-I+1, LDWRKU )
  816. CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
  817. $ LDA, WORK( IR ), LDWRKR, ZERO,
  818. $ WORK( IU ), LDWRKU )
  819. CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
  820. $ A( I, 1 ), LDA )
  821. 10 CONTINUE
  822. *
  823. ELSE
  824. *
  825. * Insufficient workspace for a fast algorithm
  826. *
  827. IE = 1
  828. ITAUQ = IE + N
  829. ITAUP = ITAUQ + N
  830. IWORK = ITAUP + N
  831. *
  832. * Bidiagonalize A
  833. * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
  834. *
  835. CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
  836. $ WORK( ITAUQ ), WORK( ITAUP ),
  837. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  838. *
  839. * Generate left vectors bidiagonalizing A
  840. * (Workspace: need 4*N, prefer 3*N + N*NB)
  841. *
  842. CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
  843. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  844. IWORK = IE + N
  845. *
  846. * Perform bidiagonal QR iteration, computing left
  847. * singular vectors of A in A
  848. * (Workspace: need BDSPAC)
  849. *
  850. CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM, 1,
  851. $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
  852. *
  853. END IF
  854. *
  855. ELSE IF( WNTUO .AND. WNTVAS ) THEN
  856. *
  857. * Path 3 (M much larger than N, JOBU='O', JOBVT='S' or 'A')
  858. * N left singular vectors to be overwritten on A and
  859. * N right singular vectors to be computed in VT
  860. *
  861. IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
  862. *
  863. * Sufficient workspace for a fast algorithm
  864. *
  865. IR = 1
  866. IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + LDA*N ) THEN
  867. *
  868. * WORK(IU) is LDA by N and WORK(IR) is LDA by N
  869. *
  870. LDWRKU = LDA
  871. LDWRKR = LDA
  872. ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + N ) + N*N ) THEN
  873. *
  874. * WORK(IU) is LDA by N and WORK(IR) is N by N
  875. *
  876. LDWRKU = LDA
  877. LDWRKR = N
  878. ELSE
  879. *
  880. * WORK(IU) is LDWRKU by N and WORK(IR) is N by N
  881. *
  882. LDWRKU = ( LWORK-N*N-N ) / N
  883. LDWRKR = N
  884. END IF
  885. ITAU = IR + LDWRKR*N
  886. IWORK = ITAU + N
  887. *
  888. * Compute A=Q*R
  889. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  890. *
  891. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  892. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  893. *
  894. * Copy R to VT, zeroing out below it
  895. *
  896. CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
  897. IF( N.GT.1 )
  898. $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  899. $ VT( 2, 1 ), LDVT )
  900. *
  901. * Generate Q in A
  902. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  903. *
  904. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  905. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  906. IE = ITAU
  907. ITAUQ = IE + N
  908. ITAUP = ITAUQ + N
  909. IWORK = ITAUP + N
  910. *
  911. * Bidiagonalize R in VT, copying result to WORK(IR)
  912. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  913. *
  914. CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
  915. $ WORK( ITAUQ ), WORK( ITAUP ),
  916. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  917. CALL DLACPY( 'L', N, N, VT, LDVT, WORK( IR ), LDWRKR )
  918. *
  919. * Generate left vectors bidiagonalizing R in WORK(IR)
  920. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  921. *
  922. CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
  923. $ WORK( ITAUQ ), WORK( IWORK ),
  924. $ LWORK-IWORK+1, IERR )
  925. *
  926. * Generate right vectors bidiagonalizing R in VT
  927. * (Workspace: need N*N + 4*N-1, prefer N*N + 3*N + (N-1)*NB)
  928. *
  929. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  930. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  931. IWORK = IE + N
  932. *
  933. * Perform bidiagonal QR iteration, computing left
  934. * singular vectors of R in WORK(IR) and computing right
  935. * singular vectors of R in VT
  936. * (Workspace: need N*N + BDSPAC)
  937. *
  938. CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT, LDVT,
  939. $ WORK( IR ), LDWRKR, DUM, 1,
  940. $ WORK( IWORK ), INFO )
  941. IU = IE + N
  942. *
  943. * Multiply Q in A by left singular vectors of R in
  944. * WORK(IR), storing result in WORK(IU) and copying to A
  945. * (Workspace: need N*N + 2*N, prefer N*N + M*N + N)
  946. *
  947. DO 20 I = 1, M, LDWRKU
  948. CHUNK = MIN( M-I+1, LDWRKU )
  949. CALL DGEMM( 'N', 'N', CHUNK, N, N, ONE, A( I, 1 ),
  950. $ LDA, WORK( IR ), LDWRKR, ZERO,
  951. $ WORK( IU ), LDWRKU )
  952. CALL DLACPY( 'F', CHUNK, N, WORK( IU ), LDWRKU,
  953. $ A( I, 1 ), LDA )
  954. 20 CONTINUE
  955. *
  956. ELSE
  957. *
  958. * Insufficient workspace for a fast algorithm
  959. *
  960. ITAU = 1
  961. IWORK = ITAU + N
  962. *
  963. * Compute A=Q*R
  964. * (Workspace: need 2*N, prefer N + N*NB)
  965. *
  966. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  967. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  968. *
  969. * Copy R to VT, zeroing out below it
  970. *
  971. CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
  972. IF( N.GT.1 )
  973. $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  974. $ VT( 2, 1 ), LDVT )
  975. *
  976. * Generate Q in A
  977. * (Workspace: need 2*N, prefer N + N*NB)
  978. *
  979. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  980. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  981. IE = ITAU
  982. ITAUQ = IE + N
  983. ITAUP = ITAUQ + N
  984. IWORK = ITAUP + N
  985. *
  986. * Bidiagonalize R in VT
  987. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  988. *
  989. CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
  990. $ WORK( ITAUQ ), WORK( ITAUP ),
  991. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  992. *
  993. * Multiply Q in A by left vectors bidiagonalizing R
  994. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  995. *
  996. CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
  997. $ WORK( ITAUQ ), A, LDA, WORK( IWORK ),
  998. $ LWORK-IWORK+1, IERR )
  999. *
  1000. * Generate right vectors bidiagonalizing R in VT
  1001. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  1002. *
  1003. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  1004. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1005. IWORK = IE + N
  1006. *
  1007. * Perform bidiagonal QR iteration, computing left
  1008. * singular vectors of A in A and computing right
  1009. * singular vectors of A in VT
  1010. * (Workspace: need BDSPAC)
  1011. *
  1012. CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT, LDVT,
  1013. $ A, LDA, DUM, 1, WORK( IWORK ), INFO )
  1014. *
  1015. END IF
  1016. *
  1017. ELSE IF( WNTUS ) THEN
  1018. *
  1019. IF( WNTVN ) THEN
  1020. *
  1021. * Path 4 (M much larger than N, JOBU='S', JOBVT='N')
  1022. * N left singular vectors to be computed in U and
  1023. * no right singular vectors to be computed
  1024. *
  1025. IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
  1026. *
  1027. * Sufficient workspace for a fast algorithm
  1028. *
  1029. IR = 1
  1030. IF( LWORK.GE.WRKBL+LDA*N ) THEN
  1031. *
  1032. * WORK(IR) is LDA by N
  1033. *
  1034. LDWRKR = LDA
  1035. ELSE
  1036. *
  1037. * WORK(IR) is N by N
  1038. *
  1039. LDWRKR = N
  1040. END IF
  1041. ITAU = IR + LDWRKR*N
  1042. IWORK = ITAU + N
  1043. *
  1044. * Compute A=Q*R
  1045. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1046. *
  1047. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1048. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1049. *
  1050. * Copy R to WORK(IR), zeroing out below it
  1051. *
  1052. CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
  1053. $ LDWRKR )
  1054. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1055. $ WORK( IR+1 ), LDWRKR )
  1056. *
  1057. * Generate Q in A
  1058. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1059. *
  1060. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  1061. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1062. IE = ITAU
  1063. ITAUQ = IE + N
  1064. ITAUP = ITAUQ + N
  1065. IWORK = ITAUP + N
  1066. *
  1067. * Bidiagonalize R in WORK(IR)
  1068. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  1069. *
  1070. CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
  1071. $ WORK( IE ), WORK( ITAUQ ),
  1072. $ WORK( ITAUP ), WORK( IWORK ),
  1073. $ LWORK-IWORK+1, IERR )
  1074. *
  1075. * Generate left vectors bidiagonalizing R in WORK(IR)
  1076. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  1077. *
  1078. CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
  1079. $ WORK( ITAUQ ), WORK( IWORK ),
  1080. $ LWORK-IWORK+1, IERR )
  1081. IWORK = IE + N
  1082. *
  1083. * Perform bidiagonal QR iteration, computing left
  1084. * singular vectors of R in WORK(IR)
  1085. * (Workspace: need N*N + BDSPAC)
  1086. *
  1087. CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
  1088. $ 1, WORK( IR ), LDWRKR, DUM, 1,
  1089. $ WORK( IWORK ), INFO )
  1090. *
  1091. * Multiply Q in A by left singular vectors of R in
  1092. * WORK(IR), storing result in U
  1093. * (Workspace: need N*N)
  1094. *
  1095. CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
  1096. $ WORK( IR ), LDWRKR, ZERO, U, LDU )
  1097. *
  1098. ELSE
  1099. *
  1100. * Insufficient workspace for a fast algorithm
  1101. *
  1102. ITAU = 1
  1103. IWORK = ITAU + N
  1104. *
  1105. * Compute A=Q*R, copying result to U
  1106. * (Workspace: need 2*N, prefer N + N*NB)
  1107. *
  1108. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1109. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1110. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1111. *
  1112. * Generate Q in U
  1113. * (Workspace: need 2*N, prefer N + N*NB)
  1114. *
  1115. CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
  1116. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1117. IE = ITAU
  1118. ITAUQ = IE + N
  1119. ITAUP = ITAUQ + N
  1120. IWORK = ITAUP + N
  1121. *
  1122. * Zero out below R in A
  1123. *
  1124. IF( N .GT. 1 ) THEN
  1125. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1126. $ A( 2, 1 ), LDA )
  1127. END IF
  1128. *
  1129. * Bidiagonalize R in A
  1130. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1131. *
  1132. CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
  1133. $ WORK( ITAUQ ), WORK( ITAUP ),
  1134. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1135. *
  1136. * Multiply Q in U by left vectors bidiagonalizing R
  1137. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1138. *
  1139. CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
  1140. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1141. $ LWORK-IWORK+1, IERR )
  1142. IWORK = IE + N
  1143. *
  1144. * Perform bidiagonal QR iteration, computing left
  1145. * singular vectors of A in U
  1146. * (Workspace: need BDSPAC)
  1147. *
  1148. CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
  1149. $ 1, U, LDU, DUM, 1, WORK( IWORK ),
  1150. $ INFO )
  1151. *
  1152. END IF
  1153. *
  1154. ELSE IF( WNTVO ) THEN
  1155. *
  1156. * Path 5 (M much larger than N, JOBU='S', JOBVT='O')
  1157. * N left singular vectors to be computed in U and
  1158. * N right singular vectors to be overwritten on A
  1159. *
  1160. IF( LWORK.GE.2*N*N+MAX( 4*N, BDSPAC ) ) THEN
  1161. *
  1162. * Sufficient workspace for a fast algorithm
  1163. *
  1164. IU = 1
  1165. IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
  1166. *
  1167. * WORK(IU) is LDA by N and WORK(IR) is LDA by N
  1168. *
  1169. LDWRKU = LDA
  1170. IR = IU + LDWRKU*N
  1171. LDWRKR = LDA
  1172. ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
  1173. *
  1174. * WORK(IU) is LDA by N and WORK(IR) is N by N
  1175. *
  1176. LDWRKU = LDA
  1177. IR = IU + LDWRKU*N
  1178. LDWRKR = N
  1179. ELSE
  1180. *
  1181. * WORK(IU) is N by N and WORK(IR) is N by N
  1182. *
  1183. LDWRKU = N
  1184. IR = IU + LDWRKU*N
  1185. LDWRKR = N
  1186. END IF
  1187. ITAU = IR + LDWRKR*N
  1188. IWORK = ITAU + N
  1189. *
  1190. * Compute A=Q*R
  1191. * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
  1192. *
  1193. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1194. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1195. *
  1196. * Copy R to WORK(IU), zeroing out below it
  1197. *
  1198. CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
  1199. $ LDWRKU )
  1200. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1201. $ WORK( IU+1 ), LDWRKU )
  1202. *
  1203. * Generate Q in A
  1204. * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
  1205. *
  1206. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  1207. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1208. IE = ITAU
  1209. ITAUQ = IE + N
  1210. ITAUP = ITAUQ + N
  1211. IWORK = ITAUP + N
  1212. *
  1213. * Bidiagonalize R in WORK(IU), copying result to
  1214. * WORK(IR)
  1215. * (Workspace: need 2*N*N + 4*N,
  1216. * prefer 2*N*N+3*N+2*N*NB)
  1217. *
  1218. CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
  1219. $ WORK( IE ), WORK( ITAUQ ),
  1220. $ WORK( ITAUP ), WORK( IWORK ),
  1221. $ LWORK-IWORK+1, IERR )
  1222. CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
  1223. $ WORK( IR ), LDWRKR )
  1224. *
  1225. * Generate left bidiagonalizing vectors in WORK(IU)
  1226. * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
  1227. *
  1228. CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
  1229. $ WORK( ITAUQ ), WORK( IWORK ),
  1230. $ LWORK-IWORK+1, IERR )
  1231. *
  1232. * Generate right bidiagonalizing vectors in WORK(IR)
  1233. * (Workspace: need 2*N*N + 4*N-1,
  1234. * prefer 2*N*N+3*N+(N-1)*NB)
  1235. *
  1236. CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
  1237. $ WORK( ITAUP ), WORK( IWORK ),
  1238. $ LWORK-IWORK+1, IERR )
  1239. IWORK = IE + N
  1240. *
  1241. * Perform bidiagonal QR iteration, computing left
  1242. * singular vectors of R in WORK(IU) and computing
  1243. * right singular vectors of R in WORK(IR)
  1244. * (Workspace: need 2*N*N + BDSPAC)
  1245. *
  1246. CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
  1247. $ WORK( IR ), LDWRKR, WORK( IU ),
  1248. $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
  1249. *
  1250. * Multiply Q in A by left singular vectors of R in
  1251. * WORK(IU), storing result in U
  1252. * (Workspace: need N*N)
  1253. *
  1254. CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
  1255. $ WORK( IU ), LDWRKU, ZERO, U, LDU )
  1256. *
  1257. * Copy right singular vectors of R to A
  1258. * (Workspace: need N*N)
  1259. *
  1260. CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
  1261. $ LDA )
  1262. *
  1263. ELSE
  1264. *
  1265. * Insufficient workspace for a fast algorithm
  1266. *
  1267. ITAU = 1
  1268. IWORK = ITAU + N
  1269. *
  1270. * Compute A=Q*R, copying result to U
  1271. * (Workspace: need 2*N, prefer N + N*NB)
  1272. *
  1273. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1274. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1275. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1276. *
  1277. * Generate Q in U
  1278. * (Workspace: need 2*N, prefer N + N*NB)
  1279. *
  1280. CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
  1281. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1282. IE = ITAU
  1283. ITAUQ = IE + N
  1284. ITAUP = ITAUQ + N
  1285. IWORK = ITAUP + N
  1286. *
  1287. * Zero out below R in A
  1288. *
  1289. IF( N .GT. 1 ) THEN
  1290. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1291. $ A( 2, 1 ), LDA )
  1292. END IF
  1293. *
  1294. * Bidiagonalize R in A
  1295. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1296. *
  1297. CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
  1298. $ WORK( ITAUQ ), WORK( ITAUP ),
  1299. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1300. *
  1301. * Multiply Q in U by left vectors bidiagonalizing R
  1302. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1303. *
  1304. CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
  1305. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1306. $ LWORK-IWORK+1, IERR )
  1307. *
  1308. * Generate right vectors bidiagonalizing R in A
  1309. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  1310. *
  1311. CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
  1312. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1313. IWORK = IE + N
  1314. *
  1315. * Perform bidiagonal QR iteration, computing left
  1316. * singular vectors of A in U and computing right
  1317. * singular vectors of A in A
  1318. * (Workspace: need BDSPAC)
  1319. *
  1320. CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
  1321. $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
  1322. $ INFO )
  1323. *
  1324. END IF
  1325. *
  1326. ELSE IF( WNTVAS ) THEN
  1327. *
  1328. * Path 6 (M much larger than N, JOBU='S', JOBVT='S'
  1329. * or 'A')
  1330. * N left singular vectors to be computed in U and
  1331. * N right singular vectors to be computed in VT
  1332. *
  1333. IF( LWORK.GE.N*N+MAX( 4*N, BDSPAC ) ) THEN
  1334. *
  1335. * Sufficient workspace for a fast algorithm
  1336. *
  1337. IU = 1
  1338. IF( LWORK.GE.WRKBL+LDA*N ) THEN
  1339. *
  1340. * WORK(IU) is LDA by N
  1341. *
  1342. LDWRKU = LDA
  1343. ELSE
  1344. *
  1345. * WORK(IU) is N by N
  1346. *
  1347. LDWRKU = N
  1348. END IF
  1349. ITAU = IU + LDWRKU*N
  1350. IWORK = ITAU + N
  1351. *
  1352. * Compute A=Q*R
  1353. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1354. *
  1355. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1356. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1357. *
  1358. * Copy R to WORK(IU), zeroing out below it
  1359. *
  1360. CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
  1361. $ LDWRKU )
  1362. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1363. $ WORK( IU+1 ), LDWRKU )
  1364. *
  1365. * Generate Q in A
  1366. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1367. *
  1368. CALL DORGQR( M, N, N, A, LDA, WORK( ITAU ),
  1369. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1370. IE = ITAU
  1371. ITAUQ = IE + N
  1372. ITAUP = ITAUQ + N
  1373. IWORK = ITAUP + N
  1374. *
  1375. * Bidiagonalize R in WORK(IU), copying result to VT
  1376. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  1377. *
  1378. CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
  1379. $ WORK( IE ), WORK( ITAUQ ),
  1380. $ WORK( ITAUP ), WORK( IWORK ),
  1381. $ LWORK-IWORK+1, IERR )
  1382. CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
  1383. $ LDVT )
  1384. *
  1385. * Generate left bidiagonalizing vectors in WORK(IU)
  1386. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  1387. *
  1388. CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
  1389. $ WORK( ITAUQ ), WORK( IWORK ),
  1390. $ LWORK-IWORK+1, IERR )
  1391. *
  1392. * Generate right bidiagonalizing vectors in VT
  1393. * (Workspace: need N*N + 4*N-1,
  1394. * prefer N*N+3*N+(N-1)*NB)
  1395. *
  1396. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  1397. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1398. IWORK = IE + N
  1399. *
  1400. * Perform bidiagonal QR iteration, computing left
  1401. * singular vectors of R in WORK(IU) and computing
  1402. * right singular vectors of R in VT
  1403. * (Workspace: need N*N + BDSPAC)
  1404. *
  1405. CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
  1406. $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
  1407. $ WORK( IWORK ), INFO )
  1408. *
  1409. * Multiply Q in A by left singular vectors of R in
  1410. * WORK(IU), storing result in U
  1411. * (Workspace: need N*N)
  1412. *
  1413. CALL DGEMM( 'N', 'N', M, N, N, ONE, A, LDA,
  1414. $ WORK( IU ), LDWRKU, ZERO, U, LDU )
  1415. *
  1416. ELSE
  1417. *
  1418. * Insufficient workspace for a fast algorithm
  1419. *
  1420. ITAU = 1
  1421. IWORK = ITAU + N
  1422. *
  1423. * Compute A=Q*R, copying result to U
  1424. * (Workspace: need 2*N, prefer N + N*NB)
  1425. *
  1426. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1427. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1428. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1429. *
  1430. * Generate Q in U
  1431. * (Workspace: need 2*N, prefer N + N*NB)
  1432. *
  1433. CALL DORGQR( M, N, N, U, LDU, WORK( ITAU ),
  1434. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1435. *
  1436. * Copy R to VT, zeroing out below it
  1437. *
  1438. CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
  1439. IF( N.GT.1 )
  1440. $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1441. $ VT( 2, 1 ), LDVT )
  1442. IE = ITAU
  1443. ITAUQ = IE + N
  1444. ITAUP = ITAUQ + N
  1445. IWORK = ITAUP + N
  1446. *
  1447. * Bidiagonalize R in VT
  1448. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1449. *
  1450. CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
  1451. $ WORK( ITAUQ ), WORK( ITAUP ),
  1452. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1453. *
  1454. * Multiply Q in U by left bidiagonalizing vectors
  1455. * in VT
  1456. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1457. *
  1458. CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
  1459. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1460. $ LWORK-IWORK+1, IERR )
  1461. *
  1462. * Generate right bidiagonalizing vectors in VT
  1463. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  1464. *
  1465. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  1466. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1467. IWORK = IE + N
  1468. *
  1469. * Perform bidiagonal QR iteration, computing left
  1470. * singular vectors of A in U and computing right
  1471. * singular vectors of A in VT
  1472. * (Workspace: need BDSPAC)
  1473. *
  1474. CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
  1475. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
  1476. $ INFO )
  1477. *
  1478. END IF
  1479. *
  1480. END IF
  1481. *
  1482. ELSE IF( WNTUA ) THEN
  1483. *
  1484. IF( WNTVN ) THEN
  1485. *
  1486. * Path 7 (M much larger than N, JOBU='A', JOBVT='N')
  1487. * M left singular vectors to be computed in U and
  1488. * no right singular vectors to be computed
  1489. *
  1490. IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
  1491. *
  1492. * Sufficient workspace for a fast algorithm
  1493. *
  1494. IR = 1
  1495. IF( LWORK.GE.WRKBL+LDA*N ) THEN
  1496. *
  1497. * WORK(IR) is LDA by N
  1498. *
  1499. LDWRKR = LDA
  1500. ELSE
  1501. *
  1502. * WORK(IR) is N by N
  1503. *
  1504. LDWRKR = N
  1505. END IF
  1506. ITAU = IR + LDWRKR*N
  1507. IWORK = ITAU + N
  1508. *
  1509. * Compute A=Q*R, copying result to U
  1510. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1511. *
  1512. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1513. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1514. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1515. *
  1516. * Copy R to WORK(IR), zeroing out below it
  1517. *
  1518. CALL DLACPY( 'U', N, N, A, LDA, WORK( IR ),
  1519. $ LDWRKR )
  1520. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1521. $ WORK( IR+1 ), LDWRKR )
  1522. *
  1523. * Generate Q in U
  1524. * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
  1525. *
  1526. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1527. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1528. IE = ITAU
  1529. ITAUQ = IE + N
  1530. ITAUP = ITAUQ + N
  1531. IWORK = ITAUP + N
  1532. *
  1533. * Bidiagonalize R in WORK(IR)
  1534. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  1535. *
  1536. CALL DGEBRD( N, N, WORK( IR ), LDWRKR, S,
  1537. $ WORK( IE ), WORK( ITAUQ ),
  1538. $ WORK( ITAUP ), WORK( IWORK ),
  1539. $ LWORK-IWORK+1, IERR )
  1540. *
  1541. * Generate left bidiagonalizing vectors in WORK(IR)
  1542. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  1543. *
  1544. CALL DORGBR( 'Q', N, N, N, WORK( IR ), LDWRKR,
  1545. $ WORK( ITAUQ ), WORK( IWORK ),
  1546. $ LWORK-IWORK+1, IERR )
  1547. IWORK = IE + N
  1548. *
  1549. * Perform bidiagonal QR iteration, computing left
  1550. * singular vectors of R in WORK(IR)
  1551. * (Workspace: need N*N + BDSPAC)
  1552. *
  1553. CALL DBDSQR( 'U', N, 0, N, 0, S, WORK( IE ), DUM,
  1554. $ 1, WORK( IR ), LDWRKR, DUM, 1,
  1555. $ WORK( IWORK ), INFO )
  1556. *
  1557. * Multiply Q in U by left singular vectors of R in
  1558. * WORK(IR), storing result in A
  1559. * (Workspace: need N*N)
  1560. *
  1561. CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
  1562. $ WORK( IR ), LDWRKR, ZERO, A, LDA )
  1563. *
  1564. * Copy left singular vectors of A from A to U
  1565. *
  1566. CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
  1567. *
  1568. ELSE
  1569. *
  1570. * Insufficient workspace for a fast algorithm
  1571. *
  1572. ITAU = 1
  1573. IWORK = ITAU + N
  1574. *
  1575. * Compute A=Q*R, copying result to U
  1576. * (Workspace: need 2*N, prefer N + N*NB)
  1577. *
  1578. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1579. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1580. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1581. *
  1582. * Generate Q in U
  1583. * (Workspace: need N + M, prefer N + M*NB)
  1584. *
  1585. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1586. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1587. IE = ITAU
  1588. ITAUQ = IE + N
  1589. ITAUP = ITAUQ + N
  1590. IWORK = ITAUP + N
  1591. *
  1592. * Zero out below R in A
  1593. *
  1594. IF( N .GT. 1 ) THEN
  1595. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1596. $ A( 2, 1 ), LDA )
  1597. END IF
  1598. *
  1599. * Bidiagonalize R in A
  1600. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1601. *
  1602. CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
  1603. $ WORK( ITAUQ ), WORK( ITAUP ),
  1604. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1605. *
  1606. * Multiply Q in U by left bidiagonalizing vectors
  1607. * in A
  1608. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1609. *
  1610. CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
  1611. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1612. $ LWORK-IWORK+1, IERR )
  1613. IWORK = IE + N
  1614. *
  1615. * Perform bidiagonal QR iteration, computing left
  1616. * singular vectors of A in U
  1617. * (Workspace: need BDSPAC)
  1618. *
  1619. CALL DBDSQR( 'U', N, 0, M, 0, S, WORK( IE ), DUM,
  1620. $ 1, U, LDU, DUM, 1, WORK( IWORK ),
  1621. $ INFO )
  1622. *
  1623. END IF
  1624. *
  1625. ELSE IF( WNTVO ) THEN
  1626. *
  1627. * Path 8 (M much larger than N, JOBU='A', JOBVT='O')
  1628. * M left singular vectors to be computed in U and
  1629. * N right singular vectors to be overwritten on A
  1630. *
  1631. IF( LWORK.GE.2*N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
  1632. *
  1633. * Sufficient workspace for a fast algorithm
  1634. *
  1635. IU = 1
  1636. IF( LWORK.GE.WRKBL+2*LDA*N ) THEN
  1637. *
  1638. * WORK(IU) is LDA by N and WORK(IR) is LDA by N
  1639. *
  1640. LDWRKU = LDA
  1641. IR = IU + LDWRKU*N
  1642. LDWRKR = LDA
  1643. ELSE IF( LWORK.GE.WRKBL+( LDA + N )*N ) THEN
  1644. *
  1645. * WORK(IU) is LDA by N and WORK(IR) is N by N
  1646. *
  1647. LDWRKU = LDA
  1648. IR = IU + LDWRKU*N
  1649. LDWRKR = N
  1650. ELSE
  1651. *
  1652. * WORK(IU) is N by N and WORK(IR) is N by N
  1653. *
  1654. LDWRKU = N
  1655. IR = IU + LDWRKU*N
  1656. LDWRKR = N
  1657. END IF
  1658. ITAU = IR + LDWRKR*N
  1659. IWORK = ITAU + N
  1660. *
  1661. * Compute A=Q*R, copying result to U
  1662. * (Workspace: need 2*N*N + 2*N, prefer 2*N*N + N + N*NB)
  1663. *
  1664. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1665. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1666. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1667. *
  1668. * Generate Q in U
  1669. * (Workspace: need 2*N*N + N + M, prefer 2*N*N + N + M*NB)
  1670. *
  1671. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1672. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1673. *
  1674. * Copy R to WORK(IU), zeroing out below it
  1675. *
  1676. CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
  1677. $ LDWRKU )
  1678. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1679. $ WORK( IU+1 ), LDWRKU )
  1680. IE = ITAU
  1681. ITAUQ = IE + N
  1682. ITAUP = ITAUQ + N
  1683. IWORK = ITAUP + N
  1684. *
  1685. * Bidiagonalize R in WORK(IU), copying result to
  1686. * WORK(IR)
  1687. * (Workspace: need 2*N*N + 4*N,
  1688. * prefer 2*N*N+3*N+2*N*NB)
  1689. *
  1690. CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
  1691. $ WORK( IE ), WORK( ITAUQ ),
  1692. $ WORK( ITAUP ), WORK( IWORK ),
  1693. $ LWORK-IWORK+1, IERR )
  1694. CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU,
  1695. $ WORK( IR ), LDWRKR )
  1696. *
  1697. * Generate left bidiagonalizing vectors in WORK(IU)
  1698. * (Workspace: need 2*N*N + 4*N, prefer 2*N*N + 3*N + N*NB)
  1699. *
  1700. CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
  1701. $ WORK( ITAUQ ), WORK( IWORK ),
  1702. $ LWORK-IWORK+1, IERR )
  1703. *
  1704. * Generate right bidiagonalizing vectors in WORK(IR)
  1705. * (Workspace: need 2*N*N + 4*N-1,
  1706. * prefer 2*N*N+3*N+(N-1)*NB)
  1707. *
  1708. CALL DORGBR( 'P', N, N, N, WORK( IR ), LDWRKR,
  1709. $ WORK( ITAUP ), WORK( IWORK ),
  1710. $ LWORK-IWORK+1, IERR )
  1711. IWORK = IE + N
  1712. *
  1713. * Perform bidiagonal QR iteration, computing left
  1714. * singular vectors of R in WORK(IU) and computing
  1715. * right singular vectors of R in WORK(IR)
  1716. * (Workspace: need 2*N*N + BDSPAC)
  1717. *
  1718. CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ),
  1719. $ WORK( IR ), LDWRKR, WORK( IU ),
  1720. $ LDWRKU, DUM, 1, WORK( IWORK ), INFO )
  1721. *
  1722. * Multiply Q in U by left singular vectors of R in
  1723. * WORK(IU), storing result in A
  1724. * (Workspace: need N*N)
  1725. *
  1726. CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
  1727. $ WORK( IU ), LDWRKU, ZERO, A, LDA )
  1728. *
  1729. * Copy left singular vectors of A from A to U
  1730. *
  1731. CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
  1732. *
  1733. * Copy right singular vectors of R from WORK(IR) to A
  1734. *
  1735. CALL DLACPY( 'F', N, N, WORK( IR ), LDWRKR, A,
  1736. $ LDA )
  1737. *
  1738. ELSE
  1739. *
  1740. * Insufficient workspace for a fast algorithm
  1741. *
  1742. ITAU = 1
  1743. IWORK = ITAU + N
  1744. *
  1745. * Compute A=Q*R, copying result to U
  1746. * (Workspace: need 2*N, prefer N + N*NB)
  1747. *
  1748. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1749. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1750. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1751. *
  1752. * Generate Q in U
  1753. * (Workspace: need N + M, prefer N + M*NB)
  1754. *
  1755. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1756. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1757. IE = ITAU
  1758. ITAUQ = IE + N
  1759. ITAUP = ITAUQ + N
  1760. IWORK = ITAUP + N
  1761. *
  1762. * Zero out below R in A
  1763. *
  1764. IF( N .GT. 1 ) THEN
  1765. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1766. $ A( 2, 1 ), LDA )
  1767. END IF
  1768. *
  1769. * Bidiagonalize R in A
  1770. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1771. *
  1772. CALL DGEBRD( N, N, A, LDA, S, WORK( IE ),
  1773. $ WORK( ITAUQ ), WORK( ITAUP ),
  1774. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1775. *
  1776. * Multiply Q in U by left bidiagonalizing vectors
  1777. * in A
  1778. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1779. *
  1780. CALL DORMBR( 'Q', 'R', 'N', M, N, N, A, LDA,
  1781. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1782. $ LWORK-IWORK+1, IERR )
  1783. *
  1784. * Generate right bidiagonalizing vectors in A
  1785. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  1786. *
  1787. CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
  1788. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1789. IWORK = IE + N
  1790. *
  1791. * Perform bidiagonal QR iteration, computing left
  1792. * singular vectors of A in U and computing right
  1793. * singular vectors of A in A
  1794. * (Workspace: need BDSPAC)
  1795. *
  1796. CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), A,
  1797. $ LDA, U, LDU, DUM, 1, WORK( IWORK ),
  1798. $ INFO )
  1799. *
  1800. END IF
  1801. *
  1802. ELSE IF( WNTVAS ) THEN
  1803. *
  1804. * Path 9 (M much larger than N, JOBU='A', JOBVT='S'
  1805. * or 'A')
  1806. * M left singular vectors to be computed in U and
  1807. * N right singular vectors to be computed in VT
  1808. *
  1809. IF( LWORK.GE.N*N+MAX( N+M, 4*N, BDSPAC ) ) THEN
  1810. *
  1811. * Sufficient workspace for a fast algorithm
  1812. *
  1813. IU = 1
  1814. IF( LWORK.GE.WRKBL+LDA*N ) THEN
  1815. *
  1816. * WORK(IU) is LDA by N
  1817. *
  1818. LDWRKU = LDA
  1819. ELSE
  1820. *
  1821. * WORK(IU) is N by N
  1822. *
  1823. LDWRKU = N
  1824. END IF
  1825. ITAU = IU + LDWRKU*N
  1826. IWORK = ITAU + N
  1827. *
  1828. * Compute A=Q*R, copying result to U
  1829. * (Workspace: need N*N + 2*N, prefer N*N + N + N*NB)
  1830. *
  1831. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1832. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1833. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1834. *
  1835. * Generate Q in U
  1836. * (Workspace: need N*N + N + M, prefer N*N + N + M*NB)
  1837. *
  1838. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1839. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1840. *
  1841. * Copy R to WORK(IU), zeroing out below it
  1842. *
  1843. CALL DLACPY( 'U', N, N, A, LDA, WORK( IU ),
  1844. $ LDWRKU )
  1845. CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1846. $ WORK( IU+1 ), LDWRKU )
  1847. IE = ITAU
  1848. ITAUQ = IE + N
  1849. ITAUP = ITAUQ + N
  1850. IWORK = ITAUP + N
  1851. *
  1852. * Bidiagonalize R in WORK(IU), copying result to VT
  1853. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + 2*N*NB)
  1854. *
  1855. CALL DGEBRD( N, N, WORK( IU ), LDWRKU, S,
  1856. $ WORK( IE ), WORK( ITAUQ ),
  1857. $ WORK( ITAUP ), WORK( IWORK ),
  1858. $ LWORK-IWORK+1, IERR )
  1859. CALL DLACPY( 'U', N, N, WORK( IU ), LDWRKU, VT,
  1860. $ LDVT )
  1861. *
  1862. * Generate left bidiagonalizing vectors in WORK(IU)
  1863. * (Workspace: need N*N + 4*N, prefer N*N + 3*N + N*NB)
  1864. *
  1865. CALL DORGBR( 'Q', N, N, N, WORK( IU ), LDWRKU,
  1866. $ WORK( ITAUQ ), WORK( IWORK ),
  1867. $ LWORK-IWORK+1, IERR )
  1868. *
  1869. * Generate right bidiagonalizing vectors in VT
  1870. * (Workspace: need N*N + 4*N-1,
  1871. * prefer N*N+3*N+(N-1)*NB)
  1872. *
  1873. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  1874. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1875. IWORK = IE + N
  1876. *
  1877. * Perform bidiagonal QR iteration, computing left
  1878. * singular vectors of R in WORK(IU) and computing
  1879. * right singular vectors of R in VT
  1880. * (Workspace: need N*N + BDSPAC)
  1881. *
  1882. CALL DBDSQR( 'U', N, N, N, 0, S, WORK( IE ), VT,
  1883. $ LDVT, WORK( IU ), LDWRKU, DUM, 1,
  1884. $ WORK( IWORK ), INFO )
  1885. *
  1886. * Multiply Q in U by left singular vectors of R in
  1887. * WORK(IU), storing result in A
  1888. * (Workspace: need N*N)
  1889. *
  1890. CALL DGEMM( 'N', 'N', M, N, N, ONE, U, LDU,
  1891. $ WORK( IU ), LDWRKU, ZERO, A, LDA )
  1892. *
  1893. * Copy left singular vectors of A from A to U
  1894. *
  1895. CALL DLACPY( 'F', M, N, A, LDA, U, LDU )
  1896. *
  1897. ELSE
  1898. *
  1899. * Insufficient workspace for a fast algorithm
  1900. *
  1901. ITAU = 1
  1902. IWORK = ITAU + N
  1903. *
  1904. * Compute A=Q*R, copying result to U
  1905. * (Workspace: need 2*N, prefer N + N*NB)
  1906. *
  1907. CALL DGEQRF( M, N, A, LDA, WORK( ITAU ),
  1908. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1909. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1910. *
  1911. * Generate Q in U
  1912. * (Workspace: need N + M, prefer N + M*NB)
  1913. *
  1914. CALL DORGQR( M, M, N, U, LDU, WORK( ITAU ),
  1915. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1916. *
  1917. * Copy R from A to VT, zeroing out below it
  1918. *
  1919. CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
  1920. IF( N.GT.1 )
  1921. $ CALL DLASET( 'L', N-1, N-1, ZERO, ZERO,
  1922. $ VT( 2, 1 ), LDVT )
  1923. IE = ITAU
  1924. ITAUQ = IE + N
  1925. ITAUP = ITAUQ + N
  1926. IWORK = ITAUP + N
  1927. *
  1928. * Bidiagonalize R in VT
  1929. * (Workspace: need 4*N, prefer 3*N + 2*N*NB)
  1930. *
  1931. CALL DGEBRD( N, N, VT, LDVT, S, WORK( IE ),
  1932. $ WORK( ITAUQ ), WORK( ITAUP ),
  1933. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1934. *
  1935. * Multiply Q in U by left bidiagonalizing vectors
  1936. * in VT
  1937. * (Workspace: need 3*N + M, prefer 3*N + M*NB)
  1938. *
  1939. CALL DORMBR( 'Q', 'R', 'N', M, N, N, VT, LDVT,
  1940. $ WORK( ITAUQ ), U, LDU, WORK( IWORK ),
  1941. $ LWORK-IWORK+1, IERR )
  1942. *
  1943. * Generate right bidiagonalizing vectors in VT
  1944. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  1945. *
  1946. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  1947. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1948. IWORK = IE + N
  1949. *
  1950. * Perform bidiagonal QR iteration, computing left
  1951. * singular vectors of A in U and computing right
  1952. * singular vectors of A in VT
  1953. * (Workspace: need BDSPAC)
  1954. *
  1955. CALL DBDSQR( 'U', N, N, M, 0, S, WORK( IE ), VT,
  1956. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
  1957. $ INFO )
  1958. *
  1959. END IF
  1960. *
  1961. END IF
  1962. *
  1963. END IF
  1964. *
  1965. ELSE
  1966. *
  1967. * M .LT. MNTHR
  1968. *
  1969. * Path 10 (M at least N, but not much larger)
  1970. * Reduce to bidiagonal form without QR decomposition
  1971. *
  1972. IE = 1
  1973. ITAUQ = IE + N
  1974. ITAUP = ITAUQ + N
  1975. IWORK = ITAUP + N
  1976. *
  1977. * Bidiagonalize A
  1978. * (Workspace: need 3*N + M, prefer 3*N + (M + N)*NB)
  1979. *
  1980. CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  1981. $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
  1982. $ IERR )
  1983. IF( WNTUAS ) THEN
  1984. *
  1985. * If left singular vectors desired in U, copy result to U
  1986. * and generate left bidiagonalizing vectors in U
  1987. * (Workspace: need 3*N + NCU, prefer 3*N + NCU*NB)
  1988. *
  1989. CALL DLACPY( 'L', M, N, A, LDA, U, LDU )
  1990. IF( WNTUS )
  1991. $ NCU = N
  1992. IF( WNTUA )
  1993. $ NCU = M
  1994. CALL DORGBR( 'Q', M, NCU, N, U, LDU, WORK( ITAUQ ),
  1995. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  1996. END IF
  1997. IF( WNTVAS ) THEN
  1998. *
  1999. * If right singular vectors desired in VT, copy result to
  2000. * VT and generate right bidiagonalizing vectors in VT
  2001. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  2002. *
  2003. CALL DLACPY( 'U', N, N, A, LDA, VT, LDVT )
  2004. CALL DORGBR( 'P', N, N, N, VT, LDVT, WORK( ITAUP ),
  2005. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2006. END IF
  2007. IF( WNTUO ) THEN
  2008. *
  2009. * If left singular vectors desired in A, generate left
  2010. * bidiagonalizing vectors in A
  2011. * (Workspace: need 4*N, prefer 3*N + N*NB)
  2012. *
  2013. CALL DORGBR( 'Q', M, N, N, A, LDA, WORK( ITAUQ ),
  2014. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2015. END IF
  2016. IF( WNTVO ) THEN
  2017. *
  2018. * If right singular vectors desired in A, generate right
  2019. * bidiagonalizing vectors in A
  2020. * (Workspace: need 4*N-1, prefer 3*N + (N-1)*NB)
  2021. *
  2022. CALL DORGBR( 'P', N, N, N, A, LDA, WORK( ITAUP ),
  2023. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2024. END IF
  2025. IWORK = IE + N
  2026. IF( WNTUAS .OR. WNTUO )
  2027. $ NRU = M
  2028. IF( WNTUN )
  2029. $ NRU = 0
  2030. IF( WNTVAS .OR. WNTVO )
  2031. $ NCVT = N
  2032. IF( WNTVN )
  2033. $ NCVT = 0
  2034. IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
  2035. *
  2036. * Perform bidiagonal QR iteration, if desired, computing
  2037. * left singular vectors in U and computing right singular
  2038. * vectors in VT
  2039. * (Workspace: need BDSPAC)
  2040. *
  2041. CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
  2042. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
  2043. ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
  2044. *
  2045. * Perform bidiagonal QR iteration, if desired, computing
  2046. * left singular vectors in U and computing right singular
  2047. * vectors in A
  2048. * (Workspace: need BDSPAC)
  2049. *
  2050. CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
  2051. $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
  2052. ELSE
  2053. *
  2054. * Perform bidiagonal QR iteration, if desired, computing
  2055. * left singular vectors in A and computing right singular
  2056. * vectors in VT
  2057. * (Workspace: need BDSPAC)
  2058. *
  2059. CALL DBDSQR( 'U', N, NCVT, NRU, 0, S, WORK( IE ), VT,
  2060. $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
  2061. END IF
  2062. *
  2063. END IF
  2064. *
  2065. ELSE
  2066. *
  2067. * A has more columns than rows. If A has sufficiently more
  2068. * columns than rows, first reduce using the LQ decomposition (if
  2069. * sufficient workspace available)
  2070. *
  2071. IF( N.GE.MNTHR ) THEN
  2072. *
  2073. IF( WNTVN ) THEN
  2074. *
  2075. * Path 1t(N much larger than M, JOBVT='N')
  2076. * No right singular vectors to be computed
  2077. *
  2078. ITAU = 1
  2079. IWORK = ITAU + M
  2080. *
  2081. * Compute A=L*Q
  2082. * (Workspace: need 2*M, prefer M + M*NB)
  2083. *
  2084. CALL DGELQF( M, N, A, LDA, WORK( ITAU ), WORK( IWORK ),
  2085. $ LWORK-IWORK+1, IERR )
  2086. *
  2087. * Zero out above L
  2088. *
  2089. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ), LDA )
  2090. IE = 1
  2091. ITAUQ = IE + M
  2092. ITAUP = ITAUQ + M
  2093. IWORK = ITAUP + M
  2094. *
  2095. * Bidiagonalize L in A
  2096. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2097. *
  2098. CALL DGEBRD( M, M, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  2099. $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
  2100. $ IERR )
  2101. IF( WNTUO .OR. WNTUAS ) THEN
  2102. *
  2103. * If left singular vectors desired, generate Q
  2104. * (Workspace: need 4*M, prefer 3*M + M*NB)
  2105. *
  2106. CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
  2107. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2108. END IF
  2109. IWORK = IE + M
  2110. NRU = 0
  2111. IF( WNTUO .OR. WNTUAS )
  2112. $ NRU = M
  2113. *
  2114. * Perform bidiagonal QR iteration, computing left singular
  2115. * vectors of A in A if desired
  2116. * (Workspace: need BDSPAC)
  2117. *
  2118. CALL DBDSQR( 'U', M, 0, NRU, 0, S, WORK( IE ), DUM, 1, A,
  2119. $ LDA, DUM, 1, WORK( IWORK ), INFO )
  2120. *
  2121. * If left singular vectors desired in U, copy them there
  2122. *
  2123. IF( WNTUAS )
  2124. $ CALL DLACPY( 'F', M, M, A, LDA, U, LDU )
  2125. *
  2126. ELSE IF( WNTVO .AND. WNTUN ) THEN
  2127. *
  2128. * Path 2t(N much larger than M, JOBU='N', JOBVT='O')
  2129. * M right singular vectors to be overwritten on A and
  2130. * no left singular vectors to be computed
  2131. *
  2132. IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
  2133. *
  2134. * Sufficient workspace for a fast algorithm
  2135. *
  2136. IR = 1
  2137. IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
  2138. *
  2139. * WORK(IU) is LDA by N and WORK(IR) is LDA by M
  2140. *
  2141. LDWRKU = LDA
  2142. CHUNK = N
  2143. LDWRKR = LDA
  2144. ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
  2145. *
  2146. * WORK(IU) is LDA by N and WORK(IR) is M by M
  2147. *
  2148. LDWRKU = LDA
  2149. CHUNK = N
  2150. LDWRKR = M
  2151. ELSE
  2152. *
  2153. * WORK(IU) is M by CHUNK and WORK(IR) is M by M
  2154. *
  2155. LDWRKU = M
  2156. CHUNK = ( LWORK-M*M-M ) / M
  2157. LDWRKR = M
  2158. END IF
  2159. ITAU = IR + LDWRKR*M
  2160. IWORK = ITAU + M
  2161. *
  2162. * Compute A=L*Q
  2163. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2164. *
  2165. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2166. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2167. *
  2168. * Copy L to WORK(IR) and zero out above it
  2169. *
  2170. CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ), LDWRKR )
  2171. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  2172. $ WORK( IR+LDWRKR ), LDWRKR )
  2173. *
  2174. * Generate Q in A
  2175. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2176. *
  2177. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2178. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2179. IE = ITAU
  2180. ITAUQ = IE + M
  2181. ITAUP = ITAUQ + M
  2182. IWORK = ITAUP + M
  2183. *
  2184. * Bidiagonalize L in WORK(IR)
  2185. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  2186. *
  2187. CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S, WORK( IE ),
  2188. $ WORK( ITAUQ ), WORK( ITAUP ),
  2189. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2190. *
  2191. * Generate right vectors bidiagonalizing L
  2192. * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
  2193. *
  2194. CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
  2195. $ WORK( ITAUP ), WORK( IWORK ),
  2196. $ LWORK-IWORK+1, IERR )
  2197. IWORK = IE + M
  2198. *
  2199. * Perform bidiagonal QR iteration, computing right
  2200. * singular vectors of L in WORK(IR)
  2201. * (Workspace: need M*M + BDSPAC)
  2202. *
  2203. CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
  2204. $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
  2205. $ WORK( IWORK ), INFO )
  2206. IU = IE + M
  2207. *
  2208. * Multiply right singular vectors of L in WORK(IR) by Q
  2209. * in A, storing result in WORK(IU) and copying to A
  2210. * (Workspace: need M*M + 2*M, prefer M*M + M*N + M)
  2211. *
  2212. DO 30 I = 1, N, CHUNK
  2213. BLK = MIN( N-I+1, CHUNK )
  2214. CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
  2215. $ LDWRKR, A( 1, I ), LDA, ZERO,
  2216. $ WORK( IU ), LDWRKU )
  2217. CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
  2218. $ A( 1, I ), LDA )
  2219. 30 CONTINUE
  2220. *
  2221. ELSE
  2222. *
  2223. * Insufficient workspace for a fast algorithm
  2224. *
  2225. IE = 1
  2226. ITAUQ = IE + M
  2227. ITAUP = ITAUQ + M
  2228. IWORK = ITAUP + M
  2229. *
  2230. * Bidiagonalize A
  2231. * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
  2232. *
  2233. CALL DGEBRD( M, N, A, LDA, S, WORK( IE ),
  2234. $ WORK( ITAUQ ), WORK( ITAUP ),
  2235. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2236. *
  2237. * Generate right vectors bidiagonalizing A
  2238. * (Workspace: need 4*M, prefer 3*M + M*NB)
  2239. *
  2240. CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
  2241. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2242. IWORK = IE + M
  2243. *
  2244. * Perform bidiagonal QR iteration, computing right
  2245. * singular vectors of A in A
  2246. * (Workspace: need BDSPAC)
  2247. *
  2248. CALL DBDSQR( 'L', M, N, 0, 0, S, WORK( IE ), A, LDA,
  2249. $ DUM, 1, DUM, 1, WORK( IWORK ), INFO )
  2250. *
  2251. END IF
  2252. *
  2253. ELSE IF( WNTVO .AND. WNTUAS ) THEN
  2254. *
  2255. * Path 3t(N much larger than M, JOBU='S' or 'A', JOBVT='O')
  2256. * M right singular vectors to be overwritten on A and
  2257. * M left singular vectors to be computed in U
  2258. *
  2259. IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
  2260. *
  2261. * Sufficient workspace for a fast algorithm
  2262. *
  2263. IR = 1
  2264. IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + LDA*M ) THEN
  2265. *
  2266. * WORK(IU) is LDA by N and WORK(IR) is LDA by M
  2267. *
  2268. LDWRKU = LDA
  2269. CHUNK = N
  2270. LDWRKR = LDA
  2271. ELSE IF( LWORK.GE.MAX( WRKBL, LDA*N + M ) + M*M ) THEN
  2272. *
  2273. * WORK(IU) is LDA by N and WORK(IR) is M by M
  2274. *
  2275. LDWRKU = LDA
  2276. CHUNK = N
  2277. LDWRKR = M
  2278. ELSE
  2279. *
  2280. * WORK(IU) is M by CHUNK and WORK(IR) is M by M
  2281. *
  2282. LDWRKU = M
  2283. CHUNK = ( LWORK-M*M-M ) / M
  2284. LDWRKR = M
  2285. END IF
  2286. ITAU = IR + LDWRKR*M
  2287. IWORK = ITAU + M
  2288. *
  2289. * Compute A=L*Q
  2290. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2291. *
  2292. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2293. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2294. *
  2295. * Copy L to U, zeroing about above it
  2296. *
  2297. CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
  2298. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
  2299. $ LDU )
  2300. *
  2301. * Generate Q in A
  2302. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2303. *
  2304. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2305. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2306. IE = ITAU
  2307. ITAUQ = IE + M
  2308. ITAUP = ITAUQ + M
  2309. IWORK = ITAUP + M
  2310. *
  2311. * Bidiagonalize L in U, copying result to WORK(IR)
  2312. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  2313. *
  2314. CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
  2315. $ WORK( ITAUQ ), WORK( ITAUP ),
  2316. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2317. CALL DLACPY( 'U', M, M, U, LDU, WORK( IR ), LDWRKR )
  2318. *
  2319. * Generate right vectors bidiagonalizing L in WORK(IR)
  2320. * (Workspace: need M*M + 4*M-1, prefer M*M + 3*M + (M-1)*NB)
  2321. *
  2322. CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
  2323. $ WORK( ITAUP ), WORK( IWORK ),
  2324. $ LWORK-IWORK+1, IERR )
  2325. *
  2326. * Generate left vectors bidiagonalizing L in U
  2327. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
  2328. *
  2329. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  2330. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2331. IWORK = IE + M
  2332. *
  2333. * Perform bidiagonal QR iteration, computing left
  2334. * singular vectors of L in U, and computing right
  2335. * singular vectors of L in WORK(IR)
  2336. * (Workspace: need M*M + BDSPAC)
  2337. *
  2338. CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
  2339. $ WORK( IR ), LDWRKR, U, LDU, DUM, 1,
  2340. $ WORK( IWORK ), INFO )
  2341. IU = IE + M
  2342. *
  2343. * Multiply right singular vectors of L in WORK(IR) by Q
  2344. * in A, storing result in WORK(IU) and copying to A
  2345. * (Workspace: need M*M + 2*M, prefer M*M + M*N + M))
  2346. *
  2347. DO 40 I = 1, N, CHUNK
  2348. BLK = MIN( N-I+1, CHUNK )
  2349. CALL DGEMM( 'N', 'N', M, BLK, M, ONE, WORK( IR ),
  2350. $ LDWRKR, A( 1, I ), LDA, ZERO,
  2351. $ WORK( IU ), LDWRKU )
  2352. CALL DLACPY( 'F', M, BLK, WORK( IU ), LDWRKU,
  2353. $ A( 1, I ), LDA )
  2354. 40 CONTINUE
  2355. *
  2356. ELSE
  2357. *
  2358. * Insufficient workspace for a fast algorithm
  2359. *
  2360. ITAU = 1
  2361. IWORK = ITAU + M
  2362. *
  2363. * Compute A=L*Q
  2364. * (Workspace: need 2*M, prefer M + M*NB)
  2365. *
  2366. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2367. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2368. *
  2369. * Copy L to U, zeroing out above it
  2370. *
  2371. CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
  2372. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
  2373. $ LDU )
  2374. *
  2375. * Generate Q in A
  2376. * (Workspace: need 2*M, prefer M + M*NB)
  2377. *
  2378. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2379. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2380. IE = ITAU
  2381. ITAUQ = IE + M
  2382. ITAUP = ITAUQ + M
  2383. IWORK = ITAUP + M
  2384. *
  2385. * Bidiagonalize L in U
  2386. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2387. *
  2388. CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
  2389. $ WORK( ITAUQ ), WORK( ITAUP ),
  2390. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2391. *
  2392. * Multiply right vectors bidiagonalizing L by Q in A
  2393. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  2394. *
  2395. CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
  2396. $ WORK( ITAUP ), A, LDA, WORK( IWORK ),
  2397. $ LWORK-IWORK+1, IERR )
  2398. *
  2399. * Generate left vectors bidiagonalizing L in U
  2400. * (Workspace: need 4*M, prefer 3*M + M*NB)
  2401. *
  2402. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  2403. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2404. IWORK = IE + M
  2405. *
  2406. * Perform bidiagonal QR iteration, computing left
  2407. * singular vectors of A in U and computing right
  2408. * singular vectors of A in A
  2409. * (Workspace: need BDSPAC)
  2410. *
  2411. CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), A, LDA,
  2412. $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
  2413. *
  2414. END IF
  2415. *
  2416. ELSE IF( WNTVS ) THEN
  2417. *
  2418. IF( WNTUN ) THEN
  2419. *
  2420. * Path 4t(N much larger than M, JOBU='N', JOBVT='S')
  2421. * M right singular vectors to be computed in VT and
  2422. * no left singular vectors to be computed
  2423. *
  2424. IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
  2425. *
  2426. * Sufficient workspace for a fast algorithm
  2427. *
  2428. IR = 1
  2429. IF( LWORK.GE.WRKBL+LDA*M ) THEN
  2430. *
  2431. * WORK(IR) is LDA by M
  2432. *
  2433. LDWRKR = LDA
  2434. ELSE
  2435. *
  2436. * WORK(IR) is M by M
  2437. *
  2438. LDWRKR = M
  2439. END IF
  2440. ITAU = IR + LDWRKR*M
  2441. IWORK = ITAU + M
  2442. *
  2443. * Compute A=L*Q
  2444. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2445. *
  2446. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2447. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2448. *
  2449. * Copy L to WORK(IR), zeroing out above it
  2450. *
  2451. CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
  2452. $ LDWRKR )
  2453. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  2454. $ WORK( IR+LDWRKR ), LDWRKR )
  2455. *
  2456. * Generate Q in A
  2457. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2458. *
  2459. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2460. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2461. IE = ITAU
  2462. ITAUQ = IE + M
  2463. ITAUP = ITAUQ + M
  2464. IWORK = ITAUP + M
  2465. *
  2466. * Bidiagonalize L in WORK(IR)
  2467. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  2468. *
  2469. CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
  2470. $ WORK( IE ), WORK( ITAUQ ),
  2471. $ WORK( ITAUP ), WORK( IWORK ),
  2472. $ LWORK-IWORK+1, IERR )
  2473. *
  2474. * Generate right vectors bidiagonalizing L in
  2475. * WORK(IR)
  2476. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
  2477. *
  2478. CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
  2479. $ WORK( ITAUP ), WORK( IWORK ),
  2480. $ LWORK-IWORK+1, IERR )
  2481. IWORK = IE + M
  2482. *
  2483. * Perform bidiagonal QR iteration, computing right
  2484. * singular vectors of L in WORK(IR)
  2485. * (Workspace: need M*M + BDSPAC)
  2486. *
  2487. CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
  2488. $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
  2489. $ WORK( IWORK ), INFO )
  2490. *
  2491. * Multiply right singular vectors of L in WORK(IR) by
  2492. * Q in A, storing result in VT
  2493. * (Workspace: need M*M)
  2494. *
  2495. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
  2496. $ LDWRKR, A, LDA, ZERO, VT, LDVT )
  2497. *
  2498. ELSE
  2499. *
  2500. * Insufficient workspace for a fast algorithm
  2501. *
  2502. ITAU = 1
  2503. IWORK = ITAU + M
  2504. *
  2505. * Compute A=L*Q
  2506. * (Workspace: need 2*M, prefer M + M*NB)
  2507. *
  2508. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2509. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2510. *
  2511. * Copy result to VT
  2512. *
  2513. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  2514. *
  2515. * Generate Q in VT
  2516. * (Workspace: need 2*M, prefer M + M*NB)
  2517. *
  2518. CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
  2519. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2520. IE = ITAU
  2521. ITAUQ = IE + M
  2522. ITAUP = ITAUQ + M
  2523. IWORK = ITAUP + M
  2524. *
  2525. * Zero out above L in A
  2526. *
  2527. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
  2528. $ LDA )
  2529. *
  2530. * Bidiagonalize L in A
  2531. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2532. *
  2533. CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
  2534. $ WORK( ITAUQ ), WORK( ITAUP ),
  2535. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2536. *
  2537. * Multiply right vectors bidiagonalizing L by Q in VT
  2538. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  2539. *
  2540. CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
  2541. $ WORK( ITAUP ), VT, LDVT,
  2542. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2543. IWORK = IE + M
  2544. *
  2545. * Perform bidiagonal QR iteration, computing right
  2546. * singular vectors of A in VT
  2547. * (Workspace: need BDSPAC)
  2548. *
  2549. CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
  2550. $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
  2551. $ INFO )
  2552. *
  2553. END IF
  2554. *
  2555. ELSE IF( WNTUO ) THEN
  2556. *
  2557. * Path 5t(N much larger than M, JOBU='O', JOBVT='S')
  2558. * M right singular vectors to be computed in VT and
  2559. * M left singular vectors to be overwritten on A
  2560. *
  2561. IF( LWORK.GE.2*M*M+MAX( 4*M, BDSPAC ) ) THEN
  2562. *
  2563. * Sufficient workspace for a fast algorithm
  2564. *
  2565. IU = 1
  2566. IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
  2567. *
  2568. * WORK(IU) is LDA by M and WORK(IR) is LDA by M
  2569. *
  2570. LDWRKU = LDA
  2571. IR = IU + LDWRKU*M
  2572. LDWRKR = LDA
  2573. ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
  2574. *
  2575. * WORK(IU) is LDA by M and WORK(IR) is M by M
  2576. *
  2577. LDWRKU = LDA
  2578. IR = IU + LDWRKU*M
  2579. LDWRKR = M
  2580. ELSE
  2581. *
  2582. * WORK(IU) is M by M and WORK(IR) is M by M
  2583. *
  2584. LDWRKU = M
  2585. IR = IU + LDWRKU*M
  2586. LDWRKR = M
  2587. END IF
  2588. ITAU = IR + LDWRKR*M
  2589. IWORK = ITAU + M
  2590. *
  2591. * Compute A=L*Q
  2592. * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
  2593. *
  2594. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2595. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2596. *
  2597. * Copy L to WORK(IU), zeroing out below it
  2598. *
  2599. CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
  2600. $ LDWRKU )
  2601. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  2602. $ WORK( IU+LDWRKU ), LDWRKU )
  2603. *
  2604. * Generate Q in A
  2605. * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
  2606. *
  2607. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2608. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2609. IE = ITAU
  2610. ITAUQ = IE + M
  2611. ITAUP = ITAUQ + M
  2612. IWORK = ITAUP + M
  2613. *
  2614. * Bidiagonalize L in WORK(IU), copying result to
  2615. * WORK(IR)
  2616. * (Workspace: need 2*M*M + 4*M,
  2617. * prefer 2*M*M+3*M+2*M*NB)
  2618. *
  2619. CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
  2620. $ WORK( IE ), WORK( ITAUQ ),
  2621. $ WORK( ITAUP ), WORK( IWORK ),
  2622. $ LWORK-IWORK+1, IERR )
  2623. CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
  2624. $ WORK( IR ), LDWRKR )
  2625. *
  2626. * Generate right bidiagonalizing vectors in WORK(IU)
  2627. * (Workspace: need 2*M*M + 4*M-1,
  2628. * prefer 2*M*M+3*M+(M-1)*NB)
  2629. *
  2630. CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
  2631. $ WORK( ITAUP ), WORK( IWORK ),
  2632. $ LWORK-IWORK+1, IERR )
  2633. *
  2634. * Generate left bidiagonalizing vectors in WORK(IR)
  2635. * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
  2636. *
  2637. CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
  2638. $ WORK( ITAUQ ), WORK( IWORK ),
  2639. $ LWORK-IWORK+1, IERR )
  2640. IWORK = IE + M
  2641. *
  2642. * Perform bidiagonal QR iteration, computing left
  2643. * singular vectors of L in WORK(IR) and computing
  2644. * right singular vectors of L in WORK(IU)
  2645. * (Workspace: need 2*M*M + BDSPAC)
  2646. *
  2647. CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
  2648. $ WORK( IU ), LDWRKU, WORK( IR ),
  2649. $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
  2650. *
  2651. * Multiply right singular vectors of L in WORK(IU) by
  2652. * Q in A, storing result in VT
  2653. * (Workspace: need M*M)
  2654. *
  2655. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
  2656. $ LDWRKU, A, LDA, ZERO, VT, LDVT )
  2657. *
  2658. * Copy left singular vectors of L to A
  2659. * (Workspace: need M*M)
  2660. *
  2661. CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
  2662. $ LDA )
  2663. *
  2664. ELSE
  2665. *
  2666. * Insufficient workspace for a fast algorithm
  2667. *
  2668. ITAU = 1
  2669. IWORK = ITAU + M
  2670. *
  2671. * Compute A=L*Q, copying result to VT
  2672. * (Workspace: need 2*M, prefer M + M*NB)
  2673. *
  2674. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2675. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2676. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  2677. *
  2678. * Generate Q in VT
  2679. * (Workspace: need 2*M, prefer M + M*NB)
  2680. *
  2681. CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
  2682. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2683. IE = ITAU
  2684. ITAUQ = IE + M
  2685. ITAUP = ITAUQ + M
  2686. IWORK = ITAUP + M
  2687. *
  2688. * Zero out above L in A
  2689. *
  2690. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
  2691. $ LDA )
  2692. *
  2693. * Bidiagonalize L in A
  2694. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2695. *
  2696. CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
  2697. $ WORK( ITAUQ ), WORK( ITAUP ),
  2698. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2699. *
  2700. * Multiply right vectors bidiagonalizing L by Q in VT
  2701. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  2702. *
  2703. CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
  2704. $ WORK( ITAUP ), VT, LDVT,
  2705. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2706. *
  2707. * Generate left bidiagonalizing vectors of L in A
  2708. * (Workspace: need 4*M, prefer 3*M + M*NB)
  2709. *
  2710. CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
  2711. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2712. IWORK = IE + M
  2713. *
  2714. * Perform bidiagonal QR iteration, compute left
  2715. * singular vectors of A in A and compute right
  2716. * singular vectors of A in VT
  2717. * (Workspace: need BDSPAC)
  2718. *
  2719. CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
  2720. $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
  2721. $ INFO )
  2722. *
  2723. END IF
  2724. *
  2725. ELSE IF( WNTUAS ) THEN
  2726. *
  2727. * Path 6t(N much larger than M, JOBU='S' or 'A',
  2728. * JOBVT='S')
  2729. * M right singular vectors to be computed in VT and
  2730. * M left singular vectors to be computed in U
  2731. *
  2732. IF( LWORK.GE.M*M+MAX( 4*M, BDSPAC ) ) THEN
  2733. *
  2734. * Sufficient workspace for a fast algorithm
  2735. *
  2736. IU = 1
  2737. IF( LWORK.GE.WRKBL+LDA*M ) THEN
  2738. *
  2739. * WORK(IU) is LDA by N
  2740. *
  2741. LDWRKU = LDA
  2742. ELSE
  2743. *
  2744. * WORK(IU) is LDA by M
  2745. *
  2746. LDWRKU = M
  2747. END IF
  2748. ITAU = IU + LDWRKU*M
  2749. IWORK = ITAU + M
  2750. *
  2751. * Compute A=L*Q
  2752. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2753. *
  2754. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2755. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2756. *
  2757. * Copy L to WORK(IU), zeroing out above it
  2758. *
  2759. CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
  2760. $ LDWRKU )
  2761. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  2762. $ WORK( IU+LDWRKU ), LDWRKU )
  2763. *
  2764. * Generate Q in A
  2765. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2766. *
  2767. CALL DORGLQ( M, N, M, A, LDA, WORK( ITAU ),
  2768. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2769. IE = ITAU
  2770. ITAUQ = IE + M
  2771. ITAUP = ITAUQ + M
  2772. IWORK = ITAUP + M
  2773. *
  2774. * Bidiagonalize L in WORK(IU), copying result to U
  2775. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  2776. *
  2777. CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
  2778. $ WORK( IE ), WORK( ITAUQ ),
  2779. $ WORK( ITAUP ), WORK( IWORK ),
  2780. $ LWORK-IWORK+1, IERR )
  2781. CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
  2782. $ LDU )
  2783. *
  2784. * Generate right bidiagonalizing vectors in WORK(IU)
  2785. * (Workspace: need M*M + 4*M-1,
  2786. * prefer M*M+3*M+(M-1)*NB)
  2787. *
  2788. CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
  2789. $ WORK( ITAUP ), WORK( IWORK ),
  2790. $ LWORK-IWORK+1, IERR )
  2791. *
  2792. * Generate left bidiagonalizing vectors in U
  2793. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
  2794. *
  2795. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  2796. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2797. IWORK = IE + M
  2798. *
  2799. * Perform bidiagonal QR iteration, computing left
  2800. * singular vectors of L in U and computing right
  2801. * singular vectors of L in WORK(IU)
  2802. * (Workspace: need M*M + BDSPAC)
  2803. *
  2804. CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
  2805. $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
  2806. $ WORK( IWORK ), INFO )
  2807. *
  2808. * Multiply right singular vectors of L in WORK(IU) by
  2809. * Q in A, storing result in VT
  2810. * (Workspace: need M*M)
  2811. *
  2812. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
  2813. $ LDWRKU, A, LDA, ZERO, VT, LDVT )
  2814. *
  2815. ELSE
  2816. *
  2817. * Insufficient workspace for a fast algorithm
  2818. *
  2819. ITAU = 1
  2820. IWORK = ITAU + M
  2821. *
  2822. * Compute A=L*Q, copying result to VT
  2823. * (Workspace: need 2*M, prefer M + M*NB)
  2824. *
  2825. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2826. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2827. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  2828. *
  2829. * Generate Q in VT
  2830. * (Workspace: need 2*M, prefer M + M*NB)
  2831. *
  2832. CALL DORGLQ( M, N, M, VT, LDVT, WORK( ITAU ),
  2833. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2834. *
  2835. * Copy L to U, zeroing out above it
  2836. *
  2837. CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
  2838. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
  2839. $ LDU )
  2840. IE = ITAU
  2841. ITAUQ = IE + M
  2842. ITAUP = ITAUQ + M
  2843. IWORK = ITAUP + M
  2844. *
  2845. * Bidiagonalize L in U
  2846. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2847. *
  2848. CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
  2849. $ WORK( ITAUQ ), WORK( ITAUP ),
  2850. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2851. *
  2852. * Multiply right bidiagonalizing vectors in U by Q
  2853. * in VT
  2854. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  2855. *
  2856. CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
  2857. $ WORK( ITAUP ), VT, LDVT,
  2858. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2859. *
  2860. * Generate left bidiagonalizing vectors in U
  2861. * (Workspace: need 4*M, prefer 3*M + M*NB)
  2862. *
  2863. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  2864. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2865. IWORK = IE + M
  2866. *
  2867. * Perform bidiagonal QR iteration, computing left
  2868. * singular vectors of A in U and computing right
  2869. * singular vectors of A in VT
  2870. * (Workspace: need BDSPAC)
  2871. *
  2872. CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
  2873. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
  2874. $ INFO )
  2875. *
  2876. END IF
  2877. *
  2878. END IF
  2879. *
  2880. ELSE IF( WNTVA ) THEN
  2881. *
  2882. IF( WNTUN ) THEN
  2883. *
  2884. * Path 7t(N much larger than M, JOBU='N', JOBVT='A')
  2885. * N right singular vectors to be computed in VT and
  2886. * no left singular vectors to be computed
  2887. *
  2888. IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
  2889. *
  2890. * Sufficient workspace for a fast algorithm
  2891. *
  2892. IR = 1
  2893. IF( LWORK.GE.WRKBL+LDA*M ) THEN
  2894. *
  2895. * WORK(IR) is LDA by M
  2896. *
  2897. LDWRKR = LDA
  2898. ELSE
  2899. *
  2900. * WORK(IR) is M by M
  2901. *
  2902. LDWRKR = M
  2903. END IF
  2904. ITAU = IR + LDWRKR*M
  2905. IWORK = ITAU + M
  2906. *
  2907. * Compute A=L*Q, copying result to VT
  2908. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  2909. *
  2910. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2911. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2912. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  2913. *
  2914. * Copy L to WORK(IR), zeroing out above it
  2915. *
  2916. CALL DLACPY( 'L', M, M, A, LDA, WORK( IR ),
  2917. $ LDWRKR )
  2918. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  2919. $ WORK( IR+LDWRKR ), LDWRKR )
  2920. *
  2921. * Generate Q in VT
  2922. * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
  2923. *
  2924. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  2925. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2926. IE = ITAU
  2927. ITAUQ = IE + M
  2928. ITAUP = ITAUQ + M
  2929. IWORK = ITAUP + M
  2930. *
  2931. * Bidiagonalize L in WORK(IR)
  2932. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  2933. *
  2934. CALL DGEBRD( M, M, WORK( IR ), LDWRKR, S,
  2935. $ WORK( IE ), WORK( ITAUQ ),
  2936. $ WORK( ITAUP ), WORK( IWORK ),
  2937. $ LWORK-IWORK+1, IERR )
  2938. *
  2939. * Generate right bidiagonalizing vectors in WORK(IR)
  2940. * (Workspace: need M*M + 4*M-1,
  2941. * prefer M*M+3*M+(M-1)*NB)
  2942. *
  2943. CALL DORGBR( 'P', M, M, M, WORK( IR ), LDWRKR,
  2944. $ WORK( ITAUP ), WORK( IWORK ),
  2945. $ LWORK-IWORK+1, IERR )
  2946. IWORK = IE + M
  2947. *
  2948. * Perform bidiagonal QR iteration, computing right
  2949. * singular vectors of L in WORK(IR)
  2950. * (Workspace: need M*M + BDSPAC)
  2951. *
  2952. CALL DBDSQR( 'U', M, M, 0, 0, S, WORK( IE ),
  2953. $ WORK( IR ), LDWRKR, DUM, 1, DUM, 1,
  2954. $ WORK( IWORK ), INFO )
  2955. *
  2956. * Multiply right singular vectors of L in WORK(IR) by
  2957. * Q in VT, storing result in A
  2958. * (Workspace: need M*M)
  2959. *
  2960. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IR ),
  2961. $ LDWRKR, VT, LDVT, ZERO, A, LDA )
  2962. *
  2963. * Copy right singular vectors of A from A to VT
  2964. *
  2965. CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
  2966. *
  2967. ELSE
  2968. *
  2969. * Insufficient workspace for a fast algorithm
  2970. *
  2971. ITAU = 1
  2972. IWORK = ITAU + M
  2973. *
  2974. * Compute A=L*Q, copying result to VT
  2975. * (Workspace: need 2*M, prefer M + M*NB)
  2976. *
  2977. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  2978. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2979. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  2980. *
  2981. * Generate Q in VT
  2982. * (Workspace: need M + N, prefer M + N*NB)
  2983. *
  2984. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  2985. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  2986. IE = ITAU
  2987. ITAUQ = IE + M
  2988. ITAUP = ITAUQ + M
  2989. IWORK = ITAUP + M
  2990. *
  2991. * Zero out above L in A
  2992. *
  2993. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
  2994. $ LDA )
  2995. *
  2996. * Bidiagonalize L in A
  2997. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  2998. *
  2999. CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
  3000. $ WORK( ITAUQ ), WORK( ITAUP ),
  3001. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3002. *
  3003. * Multiply right bidiagonalizing vectors in A by Q
  3004. * in VT
  3005. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  3006. *
  3007. CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
  3008. $ WORK( ITAUP ), VT, LDVT,
  3009. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3010. IWORK = IE + M
  3011. *
  3012. * Perform bidiagonal QR iteration, computing right
  3013. * singular vectors of A in VT
  3014. * (Workspace: need BDSPAC)
  3015. *
  3016. CALL DBDSQR( 'U', M, N, 0, 0, S, WORK( IE ), VT,
  3017. $ LDVT, DUM, 1, DUM, 1, WORK( IWORK ),
  3018. $ INFO )
  3019. *
  3020. END IF
  3021. *
  3022. ELSE IF( WNTUO ) THEN
  3023. *
  3024. * Path 8t(N much larger than M, JOBU='O', JOBVT='A')
  3025. * N right singular vectors to be computed in VT and
  3026. * M left singular vectors to be overwritten on A
  3027. *
  3028. IF( LWORK.GE.2*M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
  3029. *
  3030. * Sufficient workspace for a fast algorithm
  3031. *
  3032. IU = 1
  3033. IF( LWORK.GE.WRKBL+2*LDA*M ) THEN
  3034. *
  3035. * WORK(IU) is LDA by M and WORK(IR) is LDA by M
  3036. *
  3037. LDWRKU = LDA
  3038. IR = IU + LDWRKU*M
  3039. LDWRKR = LDA
  3040. ELSE IF( LWORK.GE.WRKBL+( LDA + M )*M ) THEN
  3041. *
  3042. * WORK(IU) is LDA by M and WORK(IR) is M by M
  3043. *
  3044. LDWRKU = LDA
  3045. IR = IU + LDWRKU*M
  3046. LDWRKR = M
  3047. ELSE
  3048. *
  3049. * WORK(IU) is M by M and WORK(IR) is M by M
  3050. *
  3051. LDWRKU = M
  3052. IR = IU + LDWRKU*M
  3053. LDWRKR = M
  3054. END IF
  3055. ITAU = IR + LDWRKR*M
  3056. IWORK = ITAU + M
  3057. *
  3058. * Compute A=L*Q, copying result to VT
  3059. * (Workspace: need 2*M*M + 2*M, prefer 2*M*M + M + M*NB)
  3060. *
  3061. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  3062. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3063. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  3064. *
  3065. * Generate Q in VT
  3066. * (Workspace: need 2*M*M + M + N, prefer 2*M*M + M + N*NB)
  3067. *
  3068. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  3069. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3070. *
  3071. * Copy L to WORK(IU), zeroing out above it
  3072. *
  3073. CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
  3074. $ LDWRKU )
  3075. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  3076. $ WORK( IU+LDWRKU ), LDWRKU )
  3077. IE = ITAU
  3078. ITAUQ = IE + M
  3079. ITAUP = ITAUQ + M
  3080. IWORK = ITAUP + M
  3081. *
  3082. * Bidiagonalize L in WORK(IU), copying result to
  3083. * WORK(IR)
  3084. * (Workspace: need 2*M*M + 4*M,
  3085. * prefer 2*M*M+3*M+2*M*NB)
  3086. *
  3087. CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
  3088. $ WORK( IE ), WORK( ITAUQ ),
  3089. $ WORK( ITAUP ), WORK( IWORK ),
  3090. $ LWORK-IWORK+1, IERR )
  3091. CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU,
  3092. $ WORK( IR ), LDWRKR )
  3093. *
  3094. * Generate right bidiagonalizing vectors in WORK(IU)
  3095. * (Workspace: need 2*M*M + 4*M-1,
  3096. * prefer 2*M*M+3*M+(M-1)*NB)
  3097. *
  3098. CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
  3099. $ WORK( ITAUP ), WORK( IWORK ),
  3100. $ LWORK-IWORK+1, IERR )
  3101. *
  3102. * Generate left bidiagonalizing vectors in WORK(IR)
  3103. * (Workspace: need 2*M*M + 4*M, prefer 2*M*M + 3*M + M*NB)
  3104. *
  3105. CALL DORGBR( 'Q', M, M, M, WORK( IR ), LDWRKR,
  3106. $ WORK( ITAUQ ), WORK( IWORK ),
  3107. $ LWORK-IWORK+1, IERR )
  3108. IWORK = IE + M
  3109. *
  3110. * Perform bidiagonal QR iteration, computing left
  3111. * singular vectors of L in WORK(IR) and computing
  3112. * right singular vectors of L in WORK(IU)
  3113. * (Workspace: need 2*M*M + BDSPAC)
  3114. *
  3115. CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
  3116. $ WORK( IU ), LDWRKU, WORK( IR ),
  3117. $ LDWRKR, DUM, 1, WORK( IWORK ), INFO )
  3118. *
  3119. * Multiply right singular vectors of L in WORK(IU) by
  3120. * Q in VT, storing result in A
  3121. * (Workspace: need M*M)
  3122. *
  3123. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
  3124. $ LDWRKU, VT, LDVT, ZERO, A, LDA )
  3125. *
  3126. * Copy right singular vectors of A from A to VT
  3127. *
  3128. CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
  3129. *
  3130. * Copy left singular vectors of A from WORK(IR) to A
  3131. *
  3132. CALL DLACPY( 'F', M, M, WORK( IR ), LDWRKR, A,
  3133. $ LDA )
  3134. *
  3135. ELSE
  3136. *
  3137. * Insufficient workspace for a fast algorithm
  3138. *
  3139. ITAU = 1
  3140. IWORK = ITAU + M
  3141. *
  3142. * Compute A=L*Q, copying result to VT
  3143. * (Workspace: need 2*M, prefer M + M*NB)
  3144. *
  3145. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  3146. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3147. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  3148. *
  3149. * Generate Q in VT
  3150. * (Workspace: need M + N, prefer M + N*NB)
  3151. *
  3152. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  3153. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3154. IE = ITAU
  3155. ITAUQ = IE + M
  3156. ITAUP = ITAUQ + M
  3157. IWORK = ITAUP + M
  3158. *
  3159. * Zero out above L in A
  3160. *
  3161. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, A( 1, 2 ),
  3162. $ LDA )
  3163. *
  3164. * Bidiagonalize L in A
  3165. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  3166. *
  3167. CALL DGEBRD( M, M, A, LDA, S, WORK( IE ),
  3168. $ WORK( ITAUQ ), WORK( ITAUP ),
  3169. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3170. *
  3171. * Multiply right bidiagonalizing vectors in A by Q
  3172. * in VT
  3173. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  3174. *
  3175. CALL DORMBR( 'P', 'L', 'T', M, N, M, A, LDA,
  3176. $ WORK( ITAUP ), VT, LDVT,
  3177. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3178. *
  3179. * Generate left bidiagonalizing vectors in A
  3180. * (Workspace: need 4*M, prefer 3*M + M*NB)
  3181. *
  3182. CALL DORGBR( 'Q', M, M, M, A, LDA, WORK( ITAUQ ),
  3183. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3184. IWORK = IE + M
  3185. *
  3186. * Perform bidiagonal QR iteration, computing left
  3187. * singular vectors of A in A and computing right
  3188. * singular vectors of A in VT
  3189. * (Workspace: need BDSPAC)
  3190. *
  3191. CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
  3192. $ LDVT, A, LDA, DUM, 1, WORK( IWORK ),
  3193. $ INFO )
  3194. *
  3195. END IF
  3196. *
  3197. ELSE IF( WNTUAS ) THEN
  3198. *
  3199. * Path 9t(N much larger than M, JOBU='S' or 'A',
  3200. * JOBVT='A')
  3201. * N right singular vectors to be computed in VT and
  3202. * M left singular vectors to be computed in U
  3203. *
  3204. IF( LWORK.GE.M*M+MAX( N + M, 4*M, BDSPAC ) ) THEN
  3205. *
  3206. * Sufficient workspace for a fast algorithm
  3207. *
  3208. IU = 1
  3209. IF( LWORK.GE.WRKBL+LDA*M ) THEN
  3210. *
  3211. * WORK(IU) is LDA by M
  3212. *
  3213. LDWRKU = LDA
  3214. ELSE
  3215. *
  3216. * WORK(IU) is M by M
  3217. *
  3218. LDWRKU = M
  3219. END IF
  3220. ITAU = IU + LDWRKU*M
  3221. IWORK = ITAU + M
  3222. *
  3223. * Compute A=L*Q, copying result to VT
  3224. * (Workspace: need M*M + 2*M, prefer M*M + M + M*NB)
  3225. *
  3226. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  3227. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3228. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  3229. *
  3230. * Generate Q in VT
  3231. * (Workspace: need M*M + M + N, prefer M*M + M + N*NB)
  3232. *
  3233. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  3234. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3235. *
  3236. * Copy L to WORK(IU), zeroing out above it
  3237. *
  3238. CALL DLACPY( 'L', M, M, A, LDA, WORK( IU ),
  3239. $ LDWRKU )
  3240. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO,
  3241. $ WORK( IU+LDWRKU ), LDWRKU )
  3242. IE = ITAU
  3243. ITAUQ = IE + M
  3244. ITAUP = ITAUQ + M
  3245. IWORK = ITAUP + M
  3246. *
  3247. * Bidiagonalize L in WORK(IU), copying result to U
  3248. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + 2*M*NB)
  3249. *
  3250. CALL DGEBRD( M, M, WORK( IU ), LDWRKU, S,
  3251. $ WORK( IE ), WORK( ITAUQ ),
  3252. $ WORK( ITAUP ), WORK( IWORK ),
  3253. $ LWORK-IWORK+1, IERR )
  3254. CALL DLACPY( 'L', M, M, WORK( IU ), LDWRKU, U,
  3255. $ LDU )
  3256. *
  3257. * Generate right bidiagonalizing vectors in WORK(IU)
  3258. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + (M-1)*NB)
  3259. *
  3260. CALL DORGBR( 'P', M, M, M, WORK( IU ), LDWRKU,
  3261. $ WORK( ITAUP ), WORK( IWORK ),
  3262. $ LWORK-IWORK+1, IERR )
  3263. *
  3264. * Generate left bidiagonalizing vectors in U
  3265. * (Workspace: need M*M + 4*M, prefer M*M + 3*M + M*NB)
  3266. *
  3267. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  3268. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3269. IWORK = IE + M
  3270. *
  3271. * Perform bidiagonal QR iteration, computing left
  3272. * singular vectors of L in U and computing right
  3273. * singular vectors of L in WORK(IU)
  3274. * (Workspace: need M*M + BDSPAC)
  3275. *
  3276. CALL DBDSQR( 'U', M, M, M, 0, S, WORK( IE ),
  3277. $ WORK( IU ), LDWRKU, U, LDU, DUM, 1,
  3278. $ WORK( IWORK ), INFO )
  3279. *
  3280. * Multiply right singular vectors of L in WORK(IU) by
  3281. * Q in VT, storing result in A
  3282. * (Workspace: need M*M)
  3283. *
  3284. CALL DGEMM( 'N', 'N', M, N, M, ONE, WORK( IU ),
  3285. $ LDWRKU, VT, LDVT, ZERO, A, LDA )
  3286. *
  3287. * Copy right singular vectors of A from A to VT
  3288. *
  3289. CALL DLACPY( 'F', M, N, A, LDA, VT, LDVT )
  3290. *
  3291. ELSE
  3292. *
  3293. * Insufficient workspace for a fast algorithm
  3294. *
  3295. ITAU = 1
  3296. IWORK = ITAU + M
  3297. *
  3298. * Compute A=L*Q, copying result to VT
  3299. * (Workspace: need 2*M, prefer M + M*NB)
  3300. *
  3301. CALL DGELQF( M, N, A, LDA, WORK( ITAU ),
  3302. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3303. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  3304. *
  3305. * Generate Q in VT
  3306. * (Workspace: need M + N, prefer M + N*NB)
  3307. *
  3308. CALL DORGLQ( N, N, M, VT, LDVT, WORK( ITAU ),
  3309. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3310. *
  3311. * Copy L to U, zeroing out above it
  3312. *
  3313. CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
  3314. CALL DLASET( 'U', M-1, M-1, ZERO, ZERO, U( 1, 2 ),
  3315. $ LDU )
  3316. IE = ITAU
  3317. ITAUQ = IE + M
  3318. ITAUP = ITAUQ + M
  3319. IWORK = ITAUP + M
  3320. *
  3321. * Bidiagonalize L in U
  3322. * (Workspace: need 4*M, prefer 3*M + 2*M*NB)
  3323. *
  3324. CALL DGEBRD( M, M, U, LDU, S, WORK( IE ),
  3325. $ WORK( ITAUQ ), WORK( ITAUP ),
  3326. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3327. *
  3328. * Multiply right bidiagonalizing vectors in U by Q
  3329. * in VT
  3330. * (Workspace: need 3*M + N, prefer 3*M + N*NB)
  3331. *
  3332. CALL DORMBR( 'P', 'L', 'T', M, N, M, U, LDU,
  3333. $ WORK( ITAUP ), VT, LDVT,
  3334. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3335. *
  3336. * Generate left bidiagonalizing vectors in U
  3337. * (Workspace: need 4*M, prefer 3*M + M*NB)
  3338. *
  3339. CALL DORGBR( 'Q', M, M, M, U, LDU, WORK( ITAUQ ),
  3340. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3341. IWORK = IE + M
  3342. *
  3343. * Perform bidiagonal QR iteration, computing left
  3344. * singular vectors of A in U and computing right
  3345. * singular vectors of A in VT
  3346. * (Workspace: need BDSPAC)
  3347. *
  3348. CALL DBDSQR( 'U', M, N, M, 0, S, WORK( IE ), VT,
  3349. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ),
  3350. $ INFO )
  3351. *
  3352. END IF
  3353. *
  3354. END IF
  3355. *
  3356. END IF
  3357. *
  3358. ELSE
  3359. *
  3360. * N .LT. MNTHR
  3361. *
  3362. * Path 10t(N greater than M, but not much larger)
  3363. * Reduce to bidiagonal form without LQ decomposition
  3364. *
  3365. IE = 1
  3366. ITAUQ = IE + M
  3367. ITAUP = ITAUQ + M
  3368. IWORK = ITAUP + M
  3369. *
  3370. * Bidiagonalize A
  3371. * (Workspace: need 3*M + N, prefer 3*M + (M + N)*NB)
  3372. *
  3373. CALL DGEBRD( M, N, A, LDA, S, WORK( IE ), WORK( ITAUQ ),
  3374. $ WORK( ITAUP ), WORK( IWORK ), LWORK-IWORK+1,
  3375. $ IERR )
  3376. IF( WNTUAS ) THEN
  3377. *
  3378. * If left singular vectors desired in U, copy result to U
  3379. * and generate left bidiagonalizing vectors in U
  3380. * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
  3381. *
  3382. CALL DLACPY( 'L', M, M, A, LDA, U, LDU )
  3383. CALL DORGBR( 'Q', M, M, N, U, LDU, WORK( ITAUQ ),
  3384. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3385. END IF
  3386. IF( WNTVAS ) THEN
  3387. *
  3388. * If right singular vectors desired in VT, copy result to
  3389. * VT and generate right bidiagonalizing vectors in VT
  3390. * (Workspace: need 3*M + NRVT, prefer 3*M + NRVT*NB)
  3391. *
  3392. CALL DLACPY( 'U', M, N, A, LDA, VT, LDVT )
  3393. IF( WNTVA )
  3394. $ NRVT = N
  3395. IF( WNTVS )
  3396. $ NRVT = M
  3397. CALL DORGBR( 'P', NRVT, N, M, VT, LDVT, WORK( ITAUP ),
  3398. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3399. END IF
  3400. IF( WNTUO ) THEN
  3401. *
  3402. * If left singular vectors desired in A, generate left
  3403. * bidiagonalizing vectors in A
  3404. * (Workspace: need 4*M-1, prefer 3*M + (M-1)*NB)
  3405. *
  3406. CALL DORGBR( 'Q', M, M, N, A, LDA, WORK( ITAUQ ),
  3407. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3408. END IF
  3409. IF( WNTVO ) THEN
  3410. *
  3411. * If right singular vectors desired in A, generate right
  3412. * bidiagonalizing vectors in A
  3413. * (Workspace: need 4*M, prefer 3*M + M*NB)
  3414. *
  3415. CALL DORGBR( 'P', M, N, M, A, LDA, WORK( ITAUP ),
  3416. $ WORK( IWORK ), LWORK-IWORK+1, IERR )
  3417. END IF
  3418. IWORK = IE + M
  3419. IF( WNTUAS .OR. WNTUO )
  3420. $ NRU = M
  3421. IF( WNTUN )
  3422. $ NRU = 0
  3423. IF( WNTVAS .OR. WNTVO )
  3424. $ NCVT = N
  3425. IF( WNTVN )
  3426. $ NCVT = 0
  3427. IF( ( .NOT.WNTUO ) .AND. ( .NOT.WNTVO ) ) THEN
  3428. *
  3429. * Perform bidiagonal QR iteration, if desired, computing
  3430. * left singular vectors in U and computing right singular
  3431. * vectors in VT
  3432. * (Workspace: need BDSPAC)
  3433. *
  3434. CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
  3435. $ LDVT, U, LDU, DUM, 1, WORK( IWORK ), INFO )
  3436. ELSE IF( ( .NOT.WNTUO ) .AND. WNTVO ) THEN
  3437. *
  3438. * Perform bidiagonal QR iteration, if desired, computing
  3439. * left singular vectors in U and computing right singular
  3440. * vectors in A
  3441. * (Workspace: need BDSPAC)
  3442. *
  3443. CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), A, LDA,
  3444. $ U, LDU, DUM, 1, WORK( IWORK ), INFO )
  3445. ELSE
  3446. *
  3447. * Perform bidiagonal QR iteration, if desired, computing
  3448. * left singular vectors in A and computing right singular
  3449. * vectors in VT
  3450. * (Workspace: need BDSPAC)
  3451. *
  3452. CALL DBDSQR( 'L', M, NCVT, NRU, 0, S, WORK( IE ), VT,
  3453. $ LDVT, A, LDA, DUM, 1, WORK( IWORK ), INFO )
  3454. END IF
  3455. *
  3456. END IF
  3457. *
  3458. END IF
  3459. *
  3460. * If DBDSQR failed to converge, copy unconverged superdiagonals
  3461. * to WORK( 2:MINMN )
  3462. *
  3463. IF( INFO.NE.0 ) THEN
  3464. IF( IE.GT.2 ) THEN
  3465. DO 50 I = 1, MINMN - 1
  3466. WORK( I+1 ) = WORK( I+IE-1 )
  3467. 50 CONTINUE
  3468. END IF
  3469. IF( IE.LT.2 ) THEN
  3470. DO 60 I = MINMN - 1, 1, -1
  3471. WORK( I+1 ) = WORK( I+IE-1 )
  3472. 60 CONTINUE
  3473. END IF
  3474. END IF
  3475. *
  3476. * Undo scaling if necessary
  3477. *
  3478. IF( ISCL.EQ.1 ) THEN
  3479. IF( ANRM.GT.BIGNUM )
  3480. $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN, 1, S, MINMN,
  3481. $ IERR )
  3482. IF( INFO.NE.0 .AND. ANRM.GT.BIGNUM )
  3483. $ CALL DLASCL( 'G', 0, 0, BIGNUM, ANRM, MINMN-1, 1, WORK( 2 ),
  3484. $ MINMN, IERR )
  3485. IF( ANRM.LT.SMLNUM )
  3486. $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN, 1, S, MINMN,
  3487. $ IERR )
  3488. IF( INFO.NE.0 .AND. ANRM.LT.SMLNUM )
  3489. $ CALL DLASCL( 'G', 0, 0, SMLNUM, ANRM, MINMN-1, 1, WORK( 2 ),
  3490. $ MINMN, IERR )
  3491. END IF
  3492. *
  3493. * Return optimal workspace in WORK(1)
  3494. *
  3495. WORK( 1 ) = MAXWRK
  3496. *
  3497. RETURN
  3498. *
  3499. * End of DGESVD
  3500. *
  3501. END