|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212 |
- #include "relapack.h"
- #if XSYGST_ALLOW_MALLOC
- #include "stdlib.h"
- #endif
-
- static void RELAPACK_dsygst_rec(const blasint *, const char *, const blasint *,
- double *, const blasint *, const double *, const blasint *,
- double *, const blasint *, blasint *);
-
-
- /** DSYGST reduces a real symmetric-definite generalized eigenproblem to standard form.
- *
- * This routine is functionally equivalent to LAPACK's dsygst.
- * For details on its interface, see
- * http://www.netlib.org/lapack/explore-html/dc/d04/dsygst_8f.html
- * */
- void RELAPACK_dsygst(
- const blasint *itype, const char *uplo, const blasint *n,
- double *A, const blasint *ldA, const double *B, const blasint *ldB,
- blasint *info
- ) {
-
- // Check arguments
- const blasint lower = LAPACK(lsame)(uplo, "L");
- const blasint upper = LAPACK(lsame)(uplo, "U");
- *info = 0;
- if (*itype < 1 || *itype > 3)
- *info = -1;
- else if (!lower && !upper)
- *info = -2;
- else if (*n < 0)
- *info = -3;
- else if (*ldA < MAX(1, *n))
- *info = -5;
- else if (*ldB < MAX(1, *n))
- *info = -7;
- if (*info) {
- const blasint minfo = -*info;
- LAPACK(xerbla)("DSYGST", &minfo, strlen("DSYGST"));
- return;
- }
-
- // Clean char * arguments
- const char cleanuplo = lower ? 'L' : 'U';
-
- // Allocate work space
- double *Work = NULL;
- blasint lWork = 0;
- #if XSYGST_ALLOW_MALLOC
- const blasint n1 = DREC_SPLIT(*n);
- lWork = abs( n1 * (*n - n1) );
- Work = malloc(lWork * sizeof(double));
- if (!Work)
- lWork = 0;
- #endif
-
- // recursive kernel
- RELAPACK_dsygst_rec(itype, &cleanuplo, n, A, ldA, B, ldB, Work, &lWork, info);
-
- // Free work space
- #if XSYGST_ALLOW_MALLOC
- if (Work)
- free(Work);
- #endif
- }
-
-
- /** dsygst's recursive compute kernel */
- static void RELAPACK_dsygst_rec(
- const blasint *itype, const char *uplo, const blasint *n,
- double *A, const blasint *ldA, const double *B, const blasint *ldB,
- double *Work, const blasint *lWork, blasint *info
- ) {
-
- if (*n <= MAX(CROSSOVER_SSYGST, 1)) {
- // Unblocked
- LAPACK(dsygs2)(itype, uplo, n, A, ldA, B, ldB, info);
- return;
- }
-
- // Constants
- const double ZERO[] = { 0. };
- const double ONE[] = { 1. };
- const double MONE[] = { -1. };
- const double HALF[] = { .5 };
- const double MHALF[] = { -.5 };
- const blasint iONE[] = { 1 };
-
- // Loop iterator
- blasint i;
-
- // Splitting
- const blasint n1 = DREC_SPLIT(*n);
- const blasint n2 = *n - n1;
-
- // A_TL A_TR
- // A_BL A_BR
- double *const A_TL = A;
- double *const A_TR = A + *ldA * n1;
- double *const A_BL = A + n1;
- double *const A_BR = A + *ldA * n1 + n1;
-
- // B_TL B_TR
- // B_BL B_BR
- const double *const B_TL = B;
- const double *const B_TR = B + *ldB * n1;
- const double *const B_BL = B + n1;
- const double *const B_BR = B + *ldB * n1 + n1;
-
- // recursion(A_TL, B_TL)
- RELAPACK_dsygst_rec(itype, uplo, &n1, A_TL, ldA, B_TL, ldB, Work, lWork, info);
-
- if (*itype == 1)
- if (*uplo == 'L') {
- // A_BL = A_BL / B_TL'
- BLAS(dtrsm)("R", "L", "T", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
- if (*lWork > n2 * n1) {
- // T = -1/2 * B_BL * A_TL
- BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ZERO, Work, &n2);
- // A_BL = A_BL + T
- for (i = 0; i < n1; i++)
- BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
- } else
- // A_BL = A_BL - 1/2 B_BL * A_TL
- BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
- // A_BR = A_BR - A_BL * B_BL' - B_BL * A_BL'
- BLAS(dsyr2k)("L", "N", &n2, &n1, MONE, A_BL, ldA, B_BL, ldB, ONE, A_BR, ldA);
- if (*lWork > n2 * n1)
- // A_BL = A_BL + T
- for (i = 0; i < n1; i++)
- BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
- else
- // A_BL = A_BL - 1/2 B_BL * A_TL
- BLAS(dsymm)("R", "L", &n2, &n1, MHALF, A_TL, ldA, B_BL, ldB, ONE, A_BL, ldA);
- // A_BL = B_BR \ A_BL
- BLAS(dtrsm)("L", "L", "N", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
- } else {
- // A_TR = B_TL' \ A_TR
- BLAS(dtrsm)("L", "U", "T", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
- if (*lWork > n2 * n1) {
- // T = -1/2 * A_TL * B_TR
- BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ZERO, Work, &n1);
- // A_TR = A_BL + T
- for (i = 0; i < n2; i++)
- BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
- } else
- // A_TR = A_TR - 1/2 A_TL * B_TR
- BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
- // A_BR = A_BR - A_TR' * B_TR - B_TR' * A_TR
- BLAS(dsyr2k)("U", "T", &n2, &n1, MONE, A_TR, ldA, B_TR, ldB, ONE, A_BR, ldA);
- if (*lWork > n2 * n1)
- // A_TR = A_BL + T
- for (i = 0; i < n2; i++)
- BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
- else
- // A_TR = A_TR - 1/2 A_TL * B_TR
- BLAS(dsymm)("L", "U", &n1, &n2, MHALF, A_TL, ldA, B_TR, ldB, ONE, A_TR, ldA);
- // A_TR = A_TR / B_BR
- BLAS(dtrsm)("R", "U", "N", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
- }
- else
- if (*uplo == 'L') {
- // A_BL = A_BL * B_TL
- BLAS(dtrmm)("R", "L", "N", "N", &n2, &n1, ONE, B_TL, ldB, A_BL, ldA);
- if (*lWork > n2 * n1) {
- // T = 1/2 * A_BR * B_BL
- BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ZERO, Work, &n2);
- // A_BL = A_BL + T
- for (i = 0; i < n1; i++)
- BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
- } else
- // A_BL = A_BL + 1/2 A_BR * B_BL
- BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
- // A_TL = A_TL + A_BL' * B_BL + B_BL' * A_BL
- BLAS(dsyr2k)("L", "T", &n1, &n2, ONE, A_BL, ldA, B_BL, ldB, ONE, A_TL, ldA);
- if (*lWork > n2 * n1)
- // A_BL = A_BL + T
- for (i = 0; i < n1; i++)
- BLAS(daxpy)(&n2, ONE, Work + n2 * i, iONE, A_BL + *ldA * i, iONE);
- else
- // A_BL = A_BL + 1/2 A_BR * B_BL
- BLAS(dsymm)("L", "L", &n2, &n1, HALF, A_BR, ldA, B_BL, ldB, ONE, A_BL, ldA);
- // A_BL = B_BR * A_BL
- BLAS(dtrmm)("L", "L", "T", "N", &n2, &n1, ONE, B_BR, ldB, A_BL, ldA);
- } else {
- // A_TR = B_TL * A_TR
- BLAS(dtrmm)("L", "U", "N", "N", &n1, &n2, ONE, B_TL, ldB, A_TR, ldA);
- if (*lWork > n2 * n1) {
- // T = 1/2 * B_TR * A_BR
- BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ZERO, Work, &n1);
- // A_TR = A_TR + T
- for (i = 0; i < n2; i++)
- BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
- } else
- // A_TR = A_TR + 1/2 B_TR A_BR
- BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
- // A_TL = A_TL + A_TR * B_TR' + B_TR * A_TR'
- BLAS(dsyr2k)("U", "N", &n1, &n2, ONE, A_TR, ldA, B_TR, ldB, ONE, A_TL, ldA);
- if (*lWork > n2 * n1)
- // A_TR = A_TR + T
- for (i = 0; i < n2; i++)
- BLAS(daxpy)(&n1, ONE, Work + n1 * i, iONE, A_TR + *ldA * i, iONE);
- else
- // A_TR = A_TR + 1/2 B_TR * A_BR
- BLAS(dsymm)("R", "U", &n1, &n2, HALF, A_BR, ldA, B_TR, ldB, ONE, A_TR, ldA);
- // A_TR = A_TR * B_BR
- BLAS(dtrmm)("R", "U", "T", "N", &n1, &n2, ONE, B_BR, ldB, A_TR, ldA);
- }
-
- // recursion(A_BR, B_BR)
- RELAPACK_dsygst_rec(itype, uplo, &n2, A_BR, ldA, B_BR, ldB, Work, lWork, info);
- }
|