You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

dsb2st_kernels.f 10 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335
  1. *> \brief \b DSB2ST_KERNELS
  2. *
  3. * @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016
  4. *
  5. * =========== DOCUMENTATION ===========
  6. *
  7. * Online html documentation available at
  8. * http://www.netlib.org/lapack/explore-html/
  9. *
  10. *> \htmlonly
  11. *> Download DSB2ST_KERNELS + dependencies
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f">
  13. *> [TGZ]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f">
  15. *> [ZIP]</a>
  16. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f">
  17. *> [TXT]</a>
  18. *> \endhtmlonly
  19. *
  20. * Definition:
  21. * ===========
  22. *
  23. * SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
  24. * ST, ED, SWEEP, N, NB, IB,
  25. * A, LDA, V, TAU, LDVT, WORK)
  26. *
  27. * IMPLICIT NONE
  28. *
  29. * .. Scalar Arguments ..
  30. * CHARACTER UPLO
  31. * LOGICAL WANTZ
  32. * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
  33. * ..
  34. * .. Array Arguments ..
  35. * DOUBLE PRECISION A( LDA, * ), V( * ),
  36. * TAU( * ), WORK( * )
  37. *
  38. *> \par Purpose:
  39. * =============
  40. *>
  41. *> \verbatim
  42. *>
  43. *> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST
  44. *> subroutine.
  45. *> \endverbatim
  46. *
  47. * Arguments:
  48. * ==========
  49. *
  50. *> @param[in] n
  51. *> The order of the matrix A.
  52. *>
  53. *> @param[in] nb
  54. *> The size of the band.
  55. *>
  56. *> @param[in, out] A
  57. *> A pointer to the matrix A.
  58. *>
  59. *> @param[in] lda
  60. *> The leading dimension of the matrix A.
  61. *>
  62. *> @param[out] V
  63. *> DOUBLE PRECISION array, dimension 2*n if eigenvalues only are
  64. *> requested or to be queried for vectors.
  65. *>
  66. *> @param[out] TAU
  67. *> DOUBLE PRECISION array, dimension (2*n).
  68. *> The scalar factors of the Householder reflectors are stored
  69. *> in this array.
  70. *>
  71. *> @param[in] st
  72. *> internal parameter for indices.
  73. *>
  74. *> @param[in] ed
  75. *> internal parameter for indices.
  76. *>
  77. *> @param[in] sweep
  78. *> internal parameter for indices.
  79. *>
  80. *> @param[in] Vblksiz
  81. *> internal parameter for indices.
  82. *>
  83. *> @param[in] wantz
  84. *> logical which indicate if Eigenvalue are requested or both
  85. *> Eigenvalue/Eigenvectors.
  86. *>
  87. *> @param[in] work
  88. *> Workspace of size nb.
  89. *>
  90. *> \par Further Details:
  91. * =====================
  92. *>
  93. *> \verbatim
  94. *>
  95. *> Implemented by Azzam Haidar.
  96. *>
  97. *> All details are available on technical report, SC11, SC13 papers.
  98. *>
  99. *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  100. *> Parallel reduction to condensed forms for symmetric eigenvalue problems
  101. *> using aggregated fine-grained and memory-aware kernels. In Proceedings
  102. *> of 2011 International Conference for High Performance Computing,
  103. *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
  104. *> Article 8 , 11 pages.
  105. *> http://doi.acm.org/10.1145/2063384.2063394
  106. *>
  107. *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
  108. *> An improved parallel singular value algorithm and its implementation
  109. *> for multicore hardware, In Proceedings of 2013 International Conference
  110. *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
  111. *> Denver, Colorado, USA, 2013.
  112. *> Article 90, 12 pages.
  113. *> http://doi.acm.org/10.1145/2503210.2503292
  114. *>
  115. *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  116. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  117. *> calculations based on fine-grained memory aware tasks.
  118. *> International Journal of High Performance Computing Applications.
  119. *> Volume 28 Issue 2, Pages 196-209, May 2014.
  120. *> http://hpc.sagepub.com/content/28/2/196
  121. *>
  122. *> \endverbatim
  123. *>
  124. * =====================================================================
  125. SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
  126. $ ST, ED, SWEEP, N, NB, IB,
  127. $ A, LDA, V, TAU, LDVT, WORK)
  128. *
  129. IMPLICIT NONE
  130. *
  131. * -- LAPACK computational routine (version 3.7.0) --
  132. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  133. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  134. * December 2016
  135. *
  136. * .. Scalar Arguments ..
  137. CHARACTER UPLO
  138. LOGICAL WANTZ
  139. INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
  140. * ..
  141. * .. Array Arguments ..
  142. DOUBLE PRECISION A( LDA, * ), V( * ),
  143. $ TAU( * ), WORK( * )
  144. * ..
  145. *
  146. * =====================================================================
  147. *
  148. * .. Parameters ..
  149. DOUBLE PRECISION ZERO, ONE
  150. PARAMETER ( ZERO = 0.0D+0,
  151. $ ONE = 1.0D+0 )
  152. * ..
  153. * .. Local Scalars ..
  154. LOGICAL UPPER
  155. INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
  156. $ DPOS, OFDPOS, AJETER
  157. DOUBLE PRECISION CTMP
  158. * ..
  159. * .. External Subroutines ..
  160. EXTERNAL DLARFG, DLARFX, DLARFY
  161. * ..
  162. * .. Intrinsic Functions ..
  163. INTRINSIC MOD
  164. * .. External Functions ..
  165. LOGICAL LSAME
  166. EXTERNAL LSAME
  167. * ..
  168. * ..
  169. * .. Executable Statements ..
  170. *
  171. AJETER = IB + LDVT
  172. UPPER = LSAME( UPLO, 'U' )
  173. IF( UPPER ) THEN
  174. DPOS = 2 * NB + 1
  175. OFDPOS = 2 * NB
  176. ELSE
  177. DPOS = 1
  178. OFDPOS = 2
  179. ENDIF
  180. *
  181. * Upper case
  182. *
  183. IF( UPPER ) THEN
  184. *
  185. IF( WANTZ ) THEN
  186. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  187. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  188. ELSE
  189. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  190. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  191. ENDIF
  192. *
  193. IF( TTYPE.EQ.1 ) THEN
  194. LM = ED - ST + 1
  195. *
  196. V( VPOS ) = ONE
  197. DO 10 I = 1, LM-1
  198. V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) )
  199. A( OFDPOS-I, ST+I ) = ZERO
  200. 10 CONTINUE
  201. CTMP = ( A( OFDPOS, ST ) )
  202. CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1,
  203. $ TAU( TAUPOS ) )
  204. A( OFDPOS, ST ) = CTMP
  205. *
  206. LM = ED - ST + 1
  207. CALL DLARFY( UPLO, LM, V( VPOS ), 1,
  208. $ ( TAU( TAUPOS ) ),
  209. $ A( DPOS, ST ), LDA-1, WORK)
  210. ENDIF
  211. *
  212. IF( TTYPE.EQ.3 ) THEN
  213. *
  214. LM = ED - ST + 1
  215. CALL DLARFY( UPLO, LM, V( VPOS ), 1,
  216. $ ( TAU( TAUPOS ) ),
  217. $ A( DPOS, ST ), LDA-1, WORK)
  218. ENDIF
  219. *
  220. IF( TTYPE.EQ.2 ) THEN
  221. J1 = ED+1
  222. J2 = MIN( ED+NB, N )
  223. LN = ED-ST+1
  224. LM = J2-J1+1
  225. IF( LM.GT.0) THEN
  226. CALL DLARFX( 'Left', LN, LM, V( VPOS ),
  227. $ ( TAU( TAUPOS ) ),
  228. $ A( DPOS-NB, J1 ), LDA-1, WORK)
  229. *
  230. IF( WANTZ ) THEN
  231. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  232. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  233. ELSE
  234. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  235. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  236. ENDIF
  237. *
  238. V( VPOS ) = ONE
  239. DO 30 I = 1, LM-1
  240. V( VPOS+I ) =
  241. $ ( A( DPOS-NB-I, J1+I ) )
  242. A( DPOS-NB-I, J1+I ) = ZERO
  243. 30 CONTINUE
  244. CTMP = ( A( DPOS-NB, J1 ) )
  245. CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
  246. A( DPOS-NB, J1 ) = CTMP
  247. *
  248. CALL DLARFX( 'Right', LN-1, LM, V( VPOS ),
  249. $ TAU( TAUPOS ),
  250. $ A( DPOS-NB+1, J1 ), LDA-1, WORK)
  251. ENDIF
  252. ENDIF
  253. *
  254. * Lower case
  255. *
  256. ELSE
  257. *
  258. IF( WANTZ ) THEN
  259. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  260. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  261. ELSE
  262. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  263. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  264. ENDIF
  265. *
  266. IF( TTYPE.EQ.1 ) THEN
  267. LM = ED - ST + 1
  268. *
  269. V( VPOS ) = ONE
  270. DO 20 I = 1, LM-1
  271. V( VPOS+I ) = A( OFDPOS+I, ST-1 )
  272. A( OFDPOS+I, ST-1 ) = ZERO
  273. 20 CONTINUE
  274. CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
  275. $ TAU( TAUPOS ) )
  276. *
  277. LM = ED - ST + 1
  278. *
  279. CALL DLARFY( UPLO, LM, V( VPOS ), 1,
  280. $ ( TAU( TAUPOS ) ),
  281. $ A( DPOS, ST ), LDA-1, WORK)
  282. ENDIF
  283. *
  284. IF( TTYPE.EQ.3 ) THEN
  285. LM = ED - ST + 1
  286. *
  287. CALL DLARFY( UPLO, LM, V( VPOS ), 1,
  288. $ ( TAU( TAUPOS ) ),
  289. $ A( DPOS, ST ), LDA-1, WORK)
  290. ENDIF
  291. *
  292. IF( TTYPE.EQ.2 ) THEN
  293. J1 = ED+1
  294. J2 = MIN( ED+NB, N )
  295. LN = ED-ST+1
  296. LM = J2-J1+1
  297. *
  298. IF( LM.GT.0) THEN
  299. CALL DLARFX( 'Right', LM, LN, V( VPOS ),
  300. $ TAU( TAUPOS ), A( DPOS+NB, ST ),
  301. $ LDA-1, WORK)
  302. *
  303. IF( WANTZ ) THEN
  304. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  305. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  306. ELSE
  307. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  308. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  309. ENDIF
  310. *
  311. V( VPOS ) = ONE
  312. DO 40 I = 1, LM-1
  313. V( VPOS+I ) = A( DPOS+NB+I, ST )
  314. A( DPOS+NB+I, ST ) = ZERO
  315. 40 CONTINUE
  316. CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
  317. $ TAU( TAUPOS ) )
  318. *
  319. CALL DLARFX( 'Left', LM, LN-1, V( VPOS ),
  320. $ ( TAU( TAUPOS ) ),
  321. $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
  322. ENDIF
  323. ENDIF
  324. ENDIF
  325. *
  326. RETURN
  327. *
  328. * END OF DSB2ST_KERNELS
  329. *
  330. END