You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

ssb2st_kernels.f 11 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. *> \brief \b SSB2ST_KERNELS
  2. *
  3. * @generated from zhb2st_kernels.f, fortran z -> s, Wed Dec 7 08:22:40 2016
  4. *
  5. * =========== DOCUMENTATION ===========
  6. *
  7. * Online html documentation available at
  8. * http://www.netlib.org/lapack/explore-html/
  9. *
  10. *> \htmlonly
  11. *> Download SSB2ST_KERNELS + dependencies
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/ssb2st_kernels.f">
  13. *> [TGZ]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/ssb2st_kernels.f">
  15. *> [ZIP]</a>
  16. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/ssb2st_kernels.f">
  17. *> [TXT]</a>
  18. *> \endhtmlonly
  19. *
  20. * Definition:
  21. * ===========
  22. *
  23. * SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
  24. * ST, ED, SWEEP, N, NB, IB,
  25. * A, LDA, V, TAU, LDVT, WORK)
  26. *
  27. * IMPLICIT NONE
  28. *
  29. * .. Scalar Arguments ..
  30. * CHARACTER UPLO
  31. * LOGICAL WANTZ
  32. * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
  33. * ..
  34. * .. Array Arguments ..
  35. * REAL A( LDA, * ), V( * ),
  36. * TAU( * ), WORK( * )
  37. *
  38. *> \par Purpose:
  39. * =============
  40. *>
  41. *> \verbatim
  42. *>
  43. *> SSB2ST_KERNELS is an internal routine used by the SSYTRD_SB2ST
  44. *> subroutine.
  45. *> \endverbatim
  46. *
  47. * Arguments:
  48. * ==========
  49. *
  50. *> \param[in] UPLO
  51. *> \verbatim
  52. *> UPLO is CHARACTER*1
  53. *> \endverbatim
  54. *>
  55. *> \param[in] WANTZ
  56. *> \verbatim
  57. *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
  58. *> Eigenvalue/Eigenvectors.
  59. *> \endverbatim
  60. *>
  61. *> \param[in] TTYPE
  62. *> \verbatim
  63. *> TTYPE is INTEGER
  64. *> \endverbatim
  65. *>
  66. *> \param[in] ST
  67. *> \verbatim
  68. *> ST is INTEGER
  69. *> internal parameter for indices.
  70. *> \endverbatim
  71. *>
  72. *> \param[in] ED
  73. *> \verbatim
  74. *> ED is INTEGER
  75. *> internal parameter for indices.
  76. *> \endverbatim
  77. *>
  78. *> \param[in] SWEEP
  79. *> \verbatim
  80. *> SWEEP is INTEGER
  81. *> internal parameter for indices.
  82. *> \endverbatim
  83. *>
  84. *> \param[in] N
  85. *> \verbatim
  86. *> N is INTEGER. The order of the matrix A.
  87. *> \endverbatim
  88. *>
  89. *> \param[in] NB
  90. *> \verbatim
  91. *> NB is INTEGER. The size of the band.
  92. *> \endverbatim
  93. *>
  94. *> \param[in] IB
  95. *> \verbatim
  96. *> IB is INTEGER.
  97. *> \endverbatim
  98. *>
  99. *> \param[in, out] A
  100. *> \verbatim
  101. *> A is REAL array. A pointer to the matrix A.
  102. *> \endverbatim
  103. *>
  104. *> \param[in] LDA
  105. *> \verbatim
  106. *> LDA is INTEGER. The leading dimension of the matrix A.
  107. *> \endverbatim
  108. *>
  109. *> \param[out] V
  110. *> \verbatim
  111. *> V is REAL array, dimension 2*n if eigenvalues only are
  112. *> requested or to be queried for vectors.
  113. *> \endverbatim
  114. *>
  115. *> \param[out] TAU
  116. *> \verbatim
  117. *> TAU is REAL array, dimension (2*n).
  118. *> The scalar factors of the Householder reflectors are stored
  119. *> in this array.
  120. *> \endverbatim
  121. *>
  122. *> \param[in] LDVT
  123. *> \verbatim
  124. *> LDVT is INTEGER.
  125. *> \endverbatim
  126. *>
  127. *> \param[in] WORK
  128. *> \verbatim
  129. *> WORK is REAL array. Workspace of size nb.
  130. *> \endverbatim
  131. *> @param[in] n
  132. *> The order of the matrix A.
  133. *>
  134. *>
  135. *> \par Further Details:
  136. * =====================
  137. *>
  138. *> \verbatim
  139. *>
  140. *> Implemented by Azzam Haidar.
  141. *>
  142. *> All details are available on technical report, SC11, SC13 papers.
  143. *>
  144. *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  145. *> Parallel reduction to condensed forms for symmetric eigenvalue problems
  146. *> using aggregated fine-grained and memory-aware kernels. In Proceedings
  147. *> of 2011 International Conference for High Performance Computing,
  148. *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
  149. *> Article 8 , 11 pages.
  150. *> http://doi.acm.org/10.1145/2063384.2063394
  151. *>
  152. *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
  153. *> An improved parallel singular value algorithm and its implementation
  154. *> for multicore hardware, In Proceedings of 2013 International Conference
  155. *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
  156. *> Denver, Colorado, USA, 2013.
  157. *> Article 90, 12 pages.
  158. *> http://doi.acm.org/10.1145/2503210.2503292
  159. *>
  160. *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  161. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  162. *> calculations based on fine-grained memory aware tasks.
  163. *> International Journal of High Performance Computing Applications.
  164. *> Volume 28 Issue 2, Pages 196-209, May 2014.
  165. *> http://hpc.sagepub.com/content/28/2/196
  166. *>
  167. *> \endverbatim
  168. *>
  169. * =====================================================================
  170. SUBROUTINE SSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
  171. $ ST, ED, SWEEP, N, NB, IB,
  172. $ A, LDA, V, TAU, LDVT, WORK)
  173. *
  174. IMPLICIT NONE
  175. *
  176. * -- LAPACK computational routine (version 3.7.1) --
  177. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  178. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  179. * June 2017
  180. *
  181. * .. Scalar Arguments ..
  182. CHARACTER UPLO
  183. LOGICAL WANTZ
  184. INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
  185. * ..
  186. * .. Array Arguments ..
  187. REAL A( LDA, * ), V( * ),
  188. $ TAU( * ), WORK( * )
  189. * ..
  190. *
  191. * =====================================================================
  192. *
  193. * .. Parameters ..
  194. REAL ZERO, ONE
  195. PARAMETER ( ZERO = 0.0E+0,
  196. $ ONE = 1.0E+0 )
  197. * ..
  198. * .. Local Scalars ..
  199. LOGICAL UPPER
  200. INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
  201. $ DPOS, OFDPOS, AJETER
  202. REAL CTMP
  203. * ..
  204. * .. External Subroutines ..
  205. EXTERNAL SLARFG, SLARFX, SLARFY
  206. * ..
  207. * .. Intrinsic Functions ..
  208. INTRINSIC MOD
  209. * .. External Functions ..
  210. LOGICAL LSAME
  211. EXTERNAL LSAME
  212. * ..
  213. * ..
  214. * .. Executable Statements ..
  215. *
  216. AJETER = IB + LDVT
  217. UPPER = LSAME( UPLO, 'U' )
  218. IF( UPPER ) THEN
  219. DPOS = 2 * NB + 1
  220. OFDPOS = 2 * NB
  221. ELSE
  222. DPOS = 1
  223. OFDPOS = 2
  224. ENDIF
  225. *
  226. * Upper case
  227. *
  228. IF( UPPER ) THEN
  229. *
  230. IF( WANTZ ) THEN
  231. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  232. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  233. ELSE
  234. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  235. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  236. ENDIF
  237. *
  238. IF( TTYPE.EQ.1 ) THEN
  239. LM = ED - ST + 1
  240. *
  241. V( VPOS ) = ONE
  242. DO 10 I = 1, LM-1
  243. V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) )
  244. A( OFDPOS-I, ST+I ) = ZERO
  245. 10 CONTINUE
  246. CTMP = ( A( OFDPOS, ST ) )
  247. CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1,
  248. $ TAU( TAUPOS ) )
  249. A( OFDPOS, ST ) = CTMP
  250. *
  251. LM = ED - ST + 1
  252. CALL SLARFY( UPLO, LM, V( VPOS ), 1,
  253. $ ( TAU( TAUPOS ) ),
  254. $ A( DPOS, ST ), LDA-1, WORK)
  255. ENDIF
  256. *
  257. IF( TTYPE.EQ.3 ) THEN
  258. *
  259. LM = ED - ST + 1
  260. CALL SLARFY( UPLO, LM, V( VPOS ), 1,
  261. $ ( TAU( TAUPOS ) ),
  262. $ A( DPOS, ST ), LDA-1, WORK)
  263. ENDIF
  264. *
  265. IF( TTYPE.EQ.2 ) THEN
  266. J1 = ED+1
  267. J2 = MIN( ED+NB, N )
  268. LN = ED-ST+1
  269. LM = J2-J1+1
  270. IF( LM.GT.0) THEN
  271. CALL SLARFX( 'Left', LN, LM, V( VPOS ),
  272. $ ( TAU( TAUPOS ) ),
  273. $ A( DPOS-NB, J1 ), LDA-1, WORK)
  274. *
  275. IF( WANTZ ) THEN
  276. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  277. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  278. ELSE
  279. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  280. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  281. ENDIF
  282. *
  283. V( VPOS ) = ONE
  284. DO 30 I = 1, LM-1
  285. V( VPOS+I ) =
  286. $ ( A( DPOS-NB-I, J1+I ) )
  287. A( DPOS-NB-I, J1+I ) = ZERO
  288. 30 CONTINUE
  289. CTMP = ( A( DPOS-NB, J1 ) )
  290. CALL SLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
  291. A( DPOS-NB, J1 ) = CTMP
  292. *
  293. CALL SLARFX( 'Right', LN-1, LM, V( VPOS ),
  294. $ TAU( TAUPOS ),
  295. $ A( DPOS-NB+1, J1 ), LDA-1, WORK)
  296. ENDIF
  297. ENDIF
  298. *
  299. * Lower case
  300. *
  301. ELSE
  302. *
  303. IF( WANTZ ) THEN
  304. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  305. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  306. ELSE
  307. VPOS = MOD( SWEEP-1, 2 ) * N + ST
  308. TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  309. ENDIF
  310. *
  311. IF( TTYPE.EQ.1 ) THEN
  312. LM = ED - ST + 1
  313. *
  314. V( VPOS ) = ONE
  315. DO 20 I = 1, LM-1
  316. V( VPOS+I ) = A( OFDPOS+I, ST-1 )
  317. A( OFDPOS+I, ST-1 ) = ZERO
  318. 20 CONTINUE
  319. CALL SLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
  320. $ TAU( TAUPOS ) )
  321. *
  322. LM = ED - ST + 1
  323. *
  324. CALL SLARFY( UPLO, LM, V( VPOS ), 1,
  325. $ ( TAU( TAUPOS ) ),
  326. $ A( DPOS, ST ), LDA-1, WORK)
  327. ENDIF
  328. *
  329. IF( TTYPE.EQ.3 ) THEN
  330. LM = ED - ST + 1
  331. *
  332. CALL SLARFY( UPLO, LM, V( VPOS ), 1,
  333. $ ( TAU( TAUPOS ) ),
  334. $ A( DPOS, ST ), LDA-1, WORK)
  335. ENDIF
  336. *
  337. IF( TTYPE.EQ.2 ) THEN
  338. J1 = ED+1
  339. J2 = MIN( ED+NB, N )
  340. LN = ED-ST+1
  341. LM = J2-J1+1
  342. *
  343. IF( LM.GT.0) THEN
  344. CALL SLARFX( 'Right', LM, LN, V( VPOS ),
  345. $ TAU( TAUPOS ), A( DPOS+NB, ST ),
  346. $ LDA-1, WORK)
  347. *
  348. IF( WANTZ ) THEN
  349. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  350. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  351. ELSE
  352. VPOS = MOD( SWEEP-1, 2 ) * N + J1
  353. TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  354. ENDIF
  355. *
  356. V( VPOS ) = ONE
  357. DO 40 I = 1, LM-1
  358. V( VPOS+I ) = A( DPOS+NB+I, ST )
  359. A( DPOS+NB+I, ST ) = ZERO
  360. 40 CONTINUE
  361. CALL SLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
  362. $ TAU( TAUPOS ) )
  363. *
  364. CALL SLARFX( 'Left', LM, LN-1, V( VPOS ),
  365. $ ( TAU( TAUPOS ) ),
  366. $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
  367. ENDIF
  368. ENDIF
  369. ENDIF
  370. *
  371. RETURN
  372. *
  373. * END OF SSB2ST_KERNELS
  374. *
  375. END