@@ -36,6 +36,7 @@ void RELAPACK_cgbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*m == 0 || *n == 0) return; | |||||
// Constant | // Constant | ||||
const float ZERO[] = { 0., 0. }; | const float ZERO[] = { 0., 0. }; | ||||
@@ -56,10 +57,10 @@ void RELAPACK_cgbtrf( | |||||
// Allocate work space | // Allocate work space | ||||
const blasint n1 = CREC_SPLIT(*n); | const blasint n1 = CREC_SPLIT(*n); | ||||
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; | |||||
const blasint nWorkl = (kv > n1) ? n1 : kv; | |||||
const blasint mWorku = (*kl > n1) ? n1 : *kl; | |||||
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; | |||||
const blasint mWorkl = abs ( (kv > n1) ? MAX(1, *m - *kl) : kv); | |||||
const blasint nWorkl = abs ( (kv > n1) ? n1 : kv); | |||||
const blasint mWorku = abs ((*kl > n1) ? n1 : *kl); | |||||
const blasint nWorku = abs ((*kl > n1) ? MAX(0, *n - *kl) : *kl); | |||||
float *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(float)); | float *Workl = malloc(mWorkl * nWorkl * 2 * sizeof(float)); | ||||
float *Worku = malloc(mWorku * nWorku * 2 * sizeof(float)); | float *Worku = malloc(mWorku * nWorku * 2 * sizeof(float)); | ||||
LAPACK(claset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); | LAPACK(claset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); | ||||
@@ -82,7 +83,7 @@ static void RELAPACK_cgbtrf_rec( | |||||
blasint *info | blasint *info | ||||
) { | ) { | ||||
if (*n <= MAX(CROSSOVER_CGBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_CGBTRF, 1)|| *n > *kl || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(cgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | LAPACK(cgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | ||||
return; | return; | ||||
@@ -35,6 +35,8 @@ void RELAPACK_cpbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*n == 0) return; | |||||
// Clean char * arguments | // Clean char * arguments | ||||
const char cleanuplo = lower ? 'L' : 'U'; | const char cleanuplo = lower ? 'L' : 'U'; | ||||
@@ -43,8 +45,8 @@ void RELAPACK_cpbtrf( | |||||
// Allocate work space | // Allocate work space | ||||
const blasint n1 = CREC_SPLIT(*n); | const blasint n1 = CREC_SPLIT(*n); | ||||
const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; | |||||
const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; | |||||
const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); | |||||
const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); | |||||
float *Work = malloc(mWork * nWork * 2 * sizeof(float)); | float *Work = malloc(mWork * nWork * 2 * sizeof(float)); | ||||
LAPACK(claset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | LAPACK(claset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | ||||
@@ -64,7 +66,7 @@ static void RELAPACK_cpbtrf_rec( | |||||
blasint *info | blasint *info | ||||
){ | ){ | ||||
if (*n <= MAX(CROSSOVER_CPBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_CPBTRF, 1) || *ldAb==1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(cpbtf2)(uplo, n, kd, Ab, ldAb, info); | LAPACK(cpbtf2)(uplo, n, kd, Ab, ldAb, info); | ||||
return; | return; | ||||
@@ -148,7 +150,7 @@ static void RELAPACK_cpbtrf_rec( | |||||
} | } | ||||
// recursion(A_BR) | // recursion(A_BR) | ||||
if (*kd > n1) | |||||
if (*kd > n1 && ldA != 0) | |||||
RELAPACK_cpotrf(uplo, &n2, A_BR, ldA, info); | RELAPACK_cpotrf(uplo, &n2, A_BR, ldA, info); | ||||
else | else | ||||
RELAPACK_cpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | RELAPACK_cpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | ||||
@@ -36,6 +36,8 @@ void RELAPACK_dgbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*m == 0 || *n == 0) return; | |||||
// Constant | // Constant | ||||
const double ZERO[] = { 0. }; | const double ZERO[] = { 0. }; | ||||
@@ -83,7 +85,7 @@ static void RELAPACK_dgbtrf_rec( | |||||
blasint *info | blasint *info | ||||
) { | ) { | ||||
if (*n <= MAX(CROSSOVER_DGBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_DGBTRF, 1) || *n > *kl || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(dgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | LAPACK(dgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | ||||
return; | return; | ||||
@@ -195,6 +197,7 @@ static void RELAPACK_dgbtrf_rec( | |||||
// Worku = A_TRr | // Worku = A_TRr | ||||
LAPACK(dlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); | LAPACK(dlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); | ||||
// Worku = A_TL \ Worku | // Worku = A_TL \ Worku | ||||
if (ldWorku <= 0) return; | |||||
BLAS(dtrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); | BLAS(dtrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); | ||||
// A_TRr = Worku | // A_TRr = Worku | ||||
LAPACK(dlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); | LAPACK(dlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); | ||||
@@ -35,6 +35,8 @@ void RELAPACK_dpbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*n == 0) return; | |||||
// Clean char * arguments | // Clean char * arguments | ||||
const char cleanuplo = lower ? 'L' : 'U'; | const char cleanuplo = lower ? 'L' : 'U'; | ||||
@@ -43,8 +45,8 @@ void RELAPACK_dpbtrf( | |||||
// Allocate work space | // Allocate work space | ||||
const blasint n1 = DREC_SPLIT(*n); | const blasint n1 = DREC_SPLIT(*n); | ||||
const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; | |||||
const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; | |||||
const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); | |||||
const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); | |||||
double *Work = malloc(mWork * nWork * sizeof(double)); | double *Work = malloc(mWork * nWork * sizeof(double)); | ||||
LAPACK(dlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | LAPACK(dlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | ||||
@@ -64,7 +66,7 @@ static void RELAPACK_dpbtrf_rec( | |||||
blasint *info | blasint *info | ||||
){ | ){ | ||||
if (*n <= MAX(CROSSOVER_DPBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_DPBTRF, 1) || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(dpbtf2)(uplo, n, kd, Ab, ldAb, info); | LAPACK(dpbtf2)(uplo, n, kd, Ab, ldAb, info); | ||||
return; | return; | ||||
@@ -148,7 +150,7 @@ static void RELAPACK_dpbtrf_rec( | |||||
} | } | ||||
// recursion(A_BR) | // recursion(A_BR) | ||||
if (*kd > n1) | |||||
if (*kd > n1 && ldA != 0) | |||||
RELAPACK_dpotrf(uplo, &n2, A_BR, ldA, info); | RELAPACK_dpotrf(uplo, &n2, A_BR, ldA, info); | ||||
else | else | ||||
RELAPACK_dpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | RELAPACK_dpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | ||||
@@ -35,6 +35,13 @@ void RELAPACK_sgbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*m == 0 || *n == 0) return; | |||||
if (*ldAb == 1) { | |||||
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | |||||
return; | |||||
} | |||||
// Constant | // Constant | ||||
const float ZERO[] = { 0. }; | const float ZERO[] = { 0. }; | ||||
@@ -82,8 +89,9 @@ static void RELAPACK_sgbtrf_rec( | |||||
blasint *info | blasint *info | ||||
) { | ) { | ||||
if (*m == 0 || *n == 0) return; | |||||
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { | |||||
if ( *n <= MAX(CROSSOVER_SGBTRF, 1) || *n > *kl || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | ||||
return; | return; | ||||
@@ -160,7 +168,7 @@ static void RELAPACK_sgbtrf_rec( | |||||
// recursion(Ab_L, ipiv_T) | // recursion(Ab_L, ipiv_T) | ||||
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); | RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); | ||||
if (*info) return; | |||||
// Workl = A_BLb | // Workl = A_BLb | ||||
LAPACK(slacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); | LAPACK(slacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); | ||||
@@ -222,8 +230,8 @@ static void RELAPACK_sgbtrf_rec( | |||||
// recursion(Ab_BR, ipiv_B) | // recursion(Ab_BR, ipiv_B) | ||||
//cause of infinite recursion here ? | //cause of infinite recursion here ? | ||||
// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||||
LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||||
RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); | |||||
// LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); | |||||
if (*info) | if (*info) | ||||
*info += n1; | *info += n1; | ||||
// shift pivots | // shift pivots | ||||
@@ -35,6 +35,9 @@ void RELAPACK_spbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*n == 0) return; | |||||
// Clean char * arguments | // Clean char * arguments | ||||
const char cleanuplo = lower ? 'L' : 'U'; | const char cleanuplo = lower ? 'L' : 'U'; | ||||
@@ -43,8 +46,8 @@ void RELAPACK_spbtrf( | |||||
// Allocate work space | // Allocate work space | ||||
const blasint n1 = SREC_SPLIT(*n); | const blasint n1 = SREC_SPLIT(*n); | ||||
const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; | |||||
const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; | |||||
const blasint mWork = abs( (*kd > n1) ? (lower ? *n - *kd : n1) : *kd); | |||||
const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); | |||||
float *Work = malloc(mWork * nWork * sizeof(float)); | float *Work = malloc(mWork * nWork * sizeof(float)); | ||||
LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | LAPACK(slaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | ||||
@@ -64,7 +67,9 @@ static void RELAPACK_spbtrf_rec( | |||||
blasint *info | blasint *info | ||||
){ | ){ | ||||
if (*n <= MAX(CROSSOVER_SPBTRF, 1)) { | |||||
if (*n == 0 ) return; | |||||
if ( *n <= MAX(CROSSOVER_SPBTRF, 1) || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(spbtf2)(uplo, n, kd, Ab, ldAb, info); | LAPACK(spbtf2)(uplo, n, kd, Ab, ldAb, info); | ||||
return; | return; | ||||
@@ -148,7 +153,7 @@ static void RELAPACK_spbtrf_rec( | |||||
} | } | ||||
// recursion(A_BR) | // recursion(A_BR) | ||||
if (*kd > n1) | |||||
if (*kd > n1 && ldA != 0) | |||||
RELAPACK_spotrf(uplo, &n2, A_BR, ldA, info); | RELAPACK_spotrf(uplo, &n2, A_BR, ldA, info); | ||||
else | else | ||||
RELAPACK_spbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | RELAPACK_spbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | ||||
@@ -36,6 +36,8 @@ void RELAPACK_zgbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*m == 0 || *n == 0) return; | |||||
// Constant | // Constant | ||||
const double ZERO[] = { 0., 0. }; | const double ZERO[] = { 0., 0. }; | ||||
@@ -82,7 +84,7 @@ static void RELAPACK_zgbtrf_rec( | |||||
blasint *info | blasint *info | ||||
) { | ) { | ||||
if (*n <= MAX(CROSSOVER_ZGBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_ZGBTRF, 1) || *n > *kl || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(zgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | LAPACK(zgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); | ||||
return; | return; | ||||
@@ -92,6 +94,7 @@ static void RELAPACK_zgbtrf_rec( | |||||
const double ONE[] = { 1., 0. }; | const double ONE[] = { 1., 0. }; | ||||
const double MONE[] = { -1., 0. }; | const double MONE[] = { -1., 0. }; | ||||
const blasint iONE[] = { 1 }; | const blasint iONE[] = { 1 }; | ||||
const blasint min11 = -11; | |||||
// Loop iterators | // Loop iterators | ||||
blasint i, j; | blasint i, j; | ||||
@@ -158,6 +161,7 @@ static void RELAPACK_zgbtrf_rec( | |||||
// recursion(Ab_L, ipiv_T) | // recursion(Ab_L, ipiv_T) | ||||
RELAPACK_zgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); | RELAPACK_zgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); | ||||
if (*info) return; | |||||
// Workl = A_BLb | // Workl = A_BLb | ||||
LAPACK(zlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); | LAPACK(zlacpy)("U", &m22, &n1, A_BLb, ldA, Workl, ldWorkl); | ||||
@@ -193,11 +197,21 @@ static void RELAPACK_zgbtrf_rec( | |||||
} | } | ||||
// A_TRl = A_TL \ A_TRl | // A_TRl = A_TL \ A_TRl | ||||
if (*ldA < MAX(1,m1)) { | |||||
LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); | |||||
return; | |||||
} else { | |||||
BLAS(ztrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA); | BLAS(ztrsm)("L", "L", "N", "U", &m1, &n21, ONE, A_TL, ldA, A_TRl, ldA); | ||||
} | |||||
// Worku = A_TRr | // Worku = A_TRr | ||||
LAPACK(zlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); | LAPACK(zlacpy)("L", &m1, &n22, A_TRr, ldA, Worku, ldWorku); | ||||
// Worku = A_TL \ Worku | // Worku = A_TL \ Worku | ||||
if (*ldWorku < MAX(1,m1)) { | |||||
LAPACK(xerbla)("ZGBTRF", &min11, strlen("ZGBTRF")); | |||||
return; | |||||
} else { | |||||
BLAS(ztrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); | BLAS(ztrsm)("L", "L", "N", "U", &m1, &n22, ONE, A_TL, ldA, Worku, ldWorku); | ||||
} | |||||
// A_TRr = Worku | // A_TRr = Worku | ||||
LAPACK(zlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); | LAPACK(zlacpy)("L", &m1, &n22, Worku, ldWorku, A_TRr, ldA); | ||||
// A_BRtl = A_BRtl - A_BLt * A_TRl | // A_BRtl = A_BRtl - A_BLt * A_TRl | ||||
@@ -35,6 +35,8 @@ void RELAPACK_zpbtrf( | |||||
return; | return; | ||||
} | } | ||||
if (*n == 0) return; | |||||
// Clean char * arguments | // Clean char * arguments | ||||
const char cleanuplo = lower ? 'L' : 'U'; | const char cleanuplo = lower ? 'L' : 'U'; | ||||
@@ -43,9 +45,10 @@ void RELAPACK_zpbtrf( | |||||
// Allocate work space | // Allocate work space | ||||
const blasint n1 = ZREC_SPLIT(*n); | const blasint n1 = ZREC_SPLIT(*n); | ||||
const blasint mWork = (*kd > n1) ? (lower ? *n - *kd : n1) : *kd; | |||||
const blasint nWork = (*kd > n1) ? (lower ? n1 : *n - *kd) : *kd; | |||||
const blasint mWork = abs((*kd > n1) ? (lower ? *n - *kd : n1) : *kd); | |||||
const blasint nWork = abs((*kd > n1) ? (lower ? n1 : *n - *kd) : *kd); | |||||
double *Work = malloc(mWork * nWork * 2 * sizeof(double)); | double *Work = malloc(mWork * nWork * 2 * sizeof(double)); | ||||
LAPACK(zlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | LAPACK(zlaset)(uplo, &mWork, &nWork, ZERO, ZERO, Work, &mWork); | ||||
// Recursive kernel | // Recursive kernel | ||||
@@ -64,7 +67,7 @@ static void RELAPACK_zpbtrf_rec( | |||||
blasint *info | blasint *info | ||||
){ | ){ | ||||
if (*n <= MAX(CROSSOVER_ZPBTRF, 1)) { | |||||
if (*n <= MAX(CROSSOVER_ZPBTRF, 1) || *ldAb == 1) { | |||||
// Unblocked | // Unblocked | ||||
LAPACK(zpbtf2)(uplo, n, kd, Ab, ldAb, info); | LAPACK(zpbtf2)(uplo, n, kd, Ab, ldAb, info); | ||||
return; | return; | ||||
@@ -148,7 +151,7 @@ static void RELAPACK_zpbtrf_rec( | |||||
} | } | ||||
// recursion(A_BR) | // recursion(A_BR) | ||||
if (*kd > n1) | |||||
if (*kd > n1 && ldA != 0) | |||||
RELAPACK_zpotrf(uplo, &n2, A_BR, ldA, info); | RELAPACK_zpotrf(uplo, &n2, A_BR, ldA, info); | ||||
else | else | ||||
RELAPACK_zpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | RELAPACK_zpbtrf_rec(uplo, &n2, kd, Ab_BR, ldAb, Work, ldWork, info); | ||||