You can not select more than 25 topics Topics must start with a chinese character,a letter or number, can include dashes ('-') and can be up to 35 characters long.

iparam2stage.F 12 kB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. *> \brief \b IPARAM2STAGE
  2. *
  3. * =========== DOCUMENTATION ===========
  4. *
  5. * Online html documentation available at
  6. * http://www.netlib.org/lapack/explore-html/
  7. *
  8. *> \htmlonly
  9. *> Download IPARAM2STAGE + dependencies
  10. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
  11. *> [TGZ]</a>
  12. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
  13. *> [ZIP]</a>
  14. *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
  15. *> [TXT]</a>
  16. *> \endhtmlonly
  17. *
  18. * Definition:
  19. * ===========
  20. *
  21. * INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
  22. * NI, NBI, IBI, NXI )
  23. * #if defined(_OPENMP)
  24. * use omp_lib
  25. * #endif
  26. * IMPLICIT NONE
  27. *
  28. * .. Scalar Arguments ..
  29. * CHARACTER*( * ) NAME, OPTS
  30. * INTEGER ISPEC, NI, NBI, IBI, NXI
  31. *
  32. *> \par Purpose:
  33. * =============
  34. *>
  35. *> \verbatim
  36. *>
  37. *> This program sets problem and machine dependent parameters
  38. *> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
  39. *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
  40. *> and related subroutines for eigenvalue problems.
  41. *> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
  42. *> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
  43. *> with a direct conversion ISPEC + 16.
  44. *> \endverbatim
  45. *
  46. * Arguments:
  47. * ==========
  48. *
  49. *> \param[in] ISPEC
  50. *> \verbatim
  51. *> ISPEC is integer scalar
  52. *> ISPEC specifies which tunable parameter IPARAM2STAGE should
  53. *> return.
  54. *>
  55. *> ISPEC=17: the optimal blocksize nb for the reduction to
  56. *> BAND
  57. *>
  58. *> ISPEC=18: the optimal blocksize ib for the eigenvectors
  59. *> singular vectors update routine
  60. *>
  61. *> ISPEC=19: The length of the array that store the Housholder
  62. *> representation for the second stage
  63. *> Band to Tridiagonal or Bidiagonal
  64. *>
  65. *> ISPEC=20: The workspace needed for the routine in input.
  66. *>
  67. *> ISPEC=21: For future release.
  68. *> \endverbatim
  69. *>
  70. *> \param[in] NAME
  71. *> \verbatim
  72. *> NAME is character string
  73. *> Name of the calling subroutine
  74. *> \endverbatim
  75. *>
  76. *> \param[in] OPTS
  77. *> \verbatim
  78. *> OPTS is CHARACTER*(*)
  79. *> The character options to the subroutine NAME, concatenated
  80. *> into a single character string. For example, UPLO = 'U',
  81. *> TRANS = 'T', and DIAG = 'N' for a triangular routine would
  82. *> be specified as OPTS = 'UTN'.
  83. *> \endverbatim
  84. *>
  85. *> \param[in] NI
  86. *> \verbatim
  87. *> NI is INTEGER which is the size of the matrix
  88. *> \endverbatim
  89. *>
  90. *> \param[in] NBI
  91. *> \verbatim
  92. *> NBI is INTEGER which is the used in the reduction,
  93. *> (e.g., the size of the band), needed to compute workspace
  94. *> and LHOUS2.
  95. *> \endverbatim
  96. *>
  97. *> \param[in] IBI
  98. *> \verbatim
  99. *> IBI is INTEGER which represent the IB of the reduction,
  100. *> needed to compute workspace and LHOUS2.
  101. *> \endverbatim
  102. *>
  103. *> \param[in] NXI
  104. *> \verbatim
  105. *> NXI is INTEGER needed in the future release.
  106. *> \endverbatim
  107. *
  108. * Authors:
  109. * ========
  110. *
  111. *> \author Univ. of Tennessee
  112. *> \author Univ. of California Berkeley
  113. *> \author Univ. of Colorado Denver
  114. *> \author NAG Ltd.
  115. *
  116. *> \ingroup auxOTHERauxiliary
  117. *
  118. *> \par Further Details:
  119. * =====================
  120. *>
  121. *> \verbatim
  122. *>
  123. *> Implemented by Azzam Haidar.
  124. *>
  125. *> All detail are available on technical report, SC11, SC13 papers.
  126. *>
  127. *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  128. *> Parallel reduction to condensed forms for symmetric eigenvalue problems
  129. *> using aggregated fine-grained and memory-aware kernels. In Proceedings
  130. *> of 2011 International Conference for High Performance Computing,
  131. *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
  132. *> Article 8 , 11 pages.
  133. *> http://doi.acm.org/10.1145/2063384.2063394
  134. *>
  135. *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
  136. *> An improved parallel singular value algorithm and its implementation
  137. *> for multicore hardware, In Proceedings of 2013 International Conference
  138. *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
  139. *> Denver, Colorado, USA, 2013.
  140. *> Article 90, 12 pages.
  141. *> http://doi.acm.org/10.1145/2503210.2503292
  142. *>
  143. *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  144. *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  145. *> calculations based on fine-grained memory aware tasks.
  146. *> International Journal of High Performance Computing Applications.
  147. *> Volume 28 Issue 2, Pages 196-209, May 2014.
  148. *> http://hpc.sagepub.com/content/28/2/196
  149. *>
  150. *> \endverbatim
  151. *>
  152. * =====================================================================
  153. INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
  154. $ NI, NBI, IBI, NXI )
  155. #if defined(_OPENMP)
  156. use omp_lib
  157. #endif
  158. IMPLICIT NONE
  159. *
  160. * -- LAPACK auxiliary routine --
  161. * -- LAPACK is a software package provided by Univ. of Tennessee, --
  162. * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  163. *
  164. * .. Scalar Arguments ..
  165. CHARACTER*( * ) NAME, OPTS
  166. INTEGER ISPEC, NI, NBI, IBI, NXI
  167. *
  168. * ================================================================
  169. * ..
  170. * .. Local Scalars ..
  171. INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
  172. $ FACTOPTNB, QROPTNB, LQOPTNB
  173. LOGICAL RPREC, CPREC
  174. CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
  175. * ..
  176. * .. Intrinsic Functions ..
  177. INTRINSIC CHAR, ICHAR, MAX
  178. * ..
  179. * .. External Functions ..
  180. INTEGER ILAENV
  181. LOGICAL LSAME
  182. EXTERNAL ILAENV, LSAME
  183. * ..
  184. * .. Executable Statements ..
  185. *
  186. * Invalid value for ISPEC
  187. *
  188. IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
  189. IPARAM2STAGE = -1
  190. RETURN
  191. ENDIF
  192. *
  193. * Get the number of threads
  194. *
  195. NTHREADS = 1
  196. #if defined(_OPENMP)
  197. !$OMP PARALLEL
  198. NTHREADS = OMP_GET_NUM_THREADS()
  199. !$OMP END PARALLEL
  200. #endif
  201. * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
  202. *
  203. IF( ISPEC .NE. 19 ) THEN
  204. *
  205. * Convert NAME to upper case if the first character is lower case.
  206. *
  207. IPARAM2STAGE = -1
  208. SUBNAM = NAME
  209. IC = ICHAR( SUBNAM( 1: 1 ) )
  210. IZ = ICHAR( 'Z' )
  211. IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
  212. *
  213. * ASCII character set
  214. *
  215. IF( IC.GE.97 .AND. IC.LE.122 ) THEN
  216. SUBNAM( 1: 1 ) = CHAR( IC-32 )
  217. DO 100 I = 2, 12
  218. IC = ICHAR( SUBNAM( I: I ) )
  219. IF( IC.GE.97 .AND. IC.LE.122 )
  220. $ SUBNAM( I: I ) = CHAR( IC-32 )
  221. 100 CONTINUE
  222. END IF
  223. *
  224. ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
  225. *
  226. * EBCDIC character set
  227. *
  228. IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  229. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  230. $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
  231. SUBNAM( 1: 1 ) = CHAR( IC+64 )
  232. DO 110 I = 2, 12
  233. IC = ICHAR( SUBNAM( I: I ) )
  234. IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  235. $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  236. $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
  237. $ I ) = CHAR( IC+64 )
  238. 110 CONTINUE
  239. END IF
  240. *
  241. ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
  242. *
  243. * Prime machines: ASCII+128
  244. *
  245. IF( IC.GE.225 .AND. IC.LE.250 ) THEN
  246. SUBNAM( 1: 1 ) = CHAR( IC-32 )
  247. DO 120 I = 2, 12
  248. IC = ICHAR( SUBNAM( I: I ) )
  249. IF( IC.GE.225 .AND. IC.LE.250 )
  250. $ SUBNAM( I: I ) = CHAR( IC-32 )
  251. 120 CONTINUE
  252. END IF
  253. END IF
  254. *
  255. PREC = SUBNAM( 1: 1 )
  256. ALGO = SUBNAM( 4: 6 )
  257. STAG = SUBNAM( 8:12 )
  258. RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
  259. CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
  260. *
  261. * Invalid value for PRECISION
  262. *
  263. IF( .NOT.( RPREC .OR. CPREC ) ) THEN
  264. IPARAM2STAGE = -1
  265. RETURN
  266. ENDIF
  267. ENDIF
  268. * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
  269. * $ ' ALGO ',ALGO,' STAGE ',STAG
  270. *
  271. *
  272. IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN
  273. *
  274. * ISPEC = 17, 18: block size KD, IB
  275. * Could be also dependent from N but for now it
  276. * depend only on sequential or parallel
  277. *
  278. IF( NTHREADS.GT.4 ) THEN
  279. IF( CPREC ) THEN
  280. KD = 128
  281. IB = 32
  282. ELSE
  283. KD = 160
  284. IB = 40
  285. ENDIF
  286. ELSE IF( NTHREADS.GT.1 ) THEN
  287. IF( CPREC ) THEN
  288. KD = 64
  289. IB = 32
  290. ELSE
  291. KD = 64
  292. IB = 32
  293. ENDIF
  294. ELSE
  295. IF( CPREC ) THEN
  296. KD = 16
  297. IB = 16
  298. ELSE
  299. KD = 32
  300. IB = 16
  301. ENDIF
  302. ENDIF
  303. IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
  304. IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
  305. *
  306. ELSE IF ( ISPEC .EQ. 19 ) THEN
  307. *
  308. * ISPEC = 19:
  309. * LHOUS length of the Houselholder representation
  310. * matrix (V,T) of the second stage. should be >= 1.
  311. *
  312. * Will add the VECT OPTION HERE next release
  313. VECT = OPTS(1:1)
  314. IF( LSAME( VECT, 'N' ) ) THEN
  315. LHOUS = MAX( 1, 4*NI )
  316. ELSE
  317. * This is not correct, it need to call the ALGO and the stage2
  318. LHOUS = MAX( 1, 4*NI ) + IBI
  319. ENDIF
  320. IF( LHOUS.GE.0 ) THEN
  321. IPARAM2STAGE = LHOUS
  322. ELSE
  323. IPARAM2STAGE = -1
  324. ENDIF
  325. *
  326. ELSE IF ( ISPEC .EQ. 20 ) THEN
  327. *
  328. * ISPEC = 20: (21 for future use)
  329. * LWORK length of the workspace for
  330. * either or both stages for TRD and BRD. should be >= 1.
  331. * TRD:
  332. * TRD_stage 1: = LT + LW + LS1 + LS2
  333. * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
  334. * where LDT=LDS2=KD
  335. * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  336. * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
  337. * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
  338. * = N*KD + N*max(KD+1,FACTOPTNB)
  339. * + max(2*KD*KD, KD*NTHREADS)
  340. * + (KD+1)*N
  341. LWORK = -1
  342. SUBNAM(1:1) = PREC
  343. SUBNAM(2:6) = 'GEQRF'
  344. QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
  345. SUBNAM(2:6) = 'GELQF'
  346. LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
  347. * Could be QR or LQ for TRD and the max for BRD
  348. FACTOPTNB = MAX(QROPTNB, LQOPTNB)
  349. IF( ALGO.EQ.'TRD' ) THEN
  350. IF( STAG.EQ.'2STAG' ) THEN
  351. LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
  352. $ + MAX(2*NBI*NBI, NBI*NTHREADS)
  353. $ + (NBI+1)*NI
  354. ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
  355. LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  356. ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
  357. LWORK = (2*NBI+1)*NI + NBI*NTHREADS
  358. ENDIF
  359. ELSE IF( ALGO.EQ.'BRD' ) THEN
  360. IF( STAG.EQ.'2STAG' ) THEN
  361. LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
  362. $ + MAX(2*NBI*NBI, NBI*NTHREADS)
  363. $ + (NBI+1)*NI
  364. ELSE IF( STAG.EQ.'GE2GB' ) THEN
  365. LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  366. ELSE IF( STAG.EQ.'GB2BD' ) THEN
  367. LWORK = (3*NBI+1)*NI + NBI*NTHREADS
  368. ENDIF
  369. ENDIF
  370. LWORK = MAX ( 1, LWORK )
  371. IF( LWORK.GT.0 ) THEN
  372. IPARAM2STAGE = LWORK
  373. ELSE
  374. IPARAM2STAGE = -1
  375. ENDIF
  376. *
  377. ELSE IF ( ISPEC .EQ. 21 ) THEN
  378. *
  379. * ISPEC = 21 for future use
  380. IPARAM2STAGE = NXI
  381. ENDIF
  382. *
  383. * ==== End of IPARAM2STAGE ====
  384. *
  385. END