|
@@ -27,7 +27,7 @@ void RELAPACK_sgbtrf( |
|
|
*info = -3; |
|
|
*info = -3; |
|
|
else if (*ku < 0) |
|
|
else if (*ku < 0) |
|
|
*info = -4; |
|
|
*info = -4; |
|
|
else if (*ldAb < 2 * *kl + *ku + 1) |
|
|
|
|
|
|
|
|
else if (*ldAb < 2 * *kl + *ku + 1) |
|
|
*info = -6; |
|
|
*info = -6; |
|
|
if (*info) { |
|
|
if (*info) { |
|
|
const blasint minfo = -*info; |
|
|
const blasint minfo = -*info; |
|
@@ -55,15 +55,16 @@ void RELAPACK_sgbtrf( |
|
|
|
|
|
|
|
|
// Allocate work space |
|
|
// Allocate work space |
|
|
const blasint n1 = SREC_SPLIT(*n); |
|
|
const blasint n1 = SREC_SPLIT(*n); |
|
|
const blasint mWorkl = (kv > n1) ? MAX(1, *m - *kl) : kv; |
|
|
|
|
|
const blasint nWorkl = (kv > n1) ? n1 : kv; |
|
|
|
|
|
const blasint mWorku = (*kl > n1) ? n1 : *kl; |
|
|
|
|
|
const blasint nWorku = (*kl > n1) ? MAX(0, *n - *kl) : *kl; |
|
|
|
|
|
|
|
|
const blasint mWorkl = abs( (kv > n1) ? MAX(1, *m - *kl) : kv ); |
|
|
|
|
|
const blasint nWorkl = abs( (kv > n1) ? n1 : kv ); |
|
|
|
|
|
const blasint mWorku = abs( (*kl > n1) ? n1 : *kl ); |
|
|
|
|
|
const blasint nWorku = abs( (*kl > n1) ? MAX(0, *n - *kl) : *kl ); |
|
|
float *Workl = malloc(mWorkl * nWorkl * sizeof(float)); |
|
|
float *Workl = malloc(mWorkl * nWorkl * sizeof(float)); |
|
|
float *Worku = malloc(mWorku * nWorku * sizeof(float)); |
|
|
float *Worku = malloc(mWorku * nWorku * sizeof(float)); |
|
|
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); |
|
|
LAPACK(slaset)("L", &mWorkl, &nWorkl, ZERO, ZERO, Workl, &mWorkl); |
|
|
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku); |
|
|
LAPACK(slaset)("U", &mWorku, &nWorku, ZERO, ZERO, Worku, &mWorku); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// Recursive kernel |
|
|
// Recursive kernel |
|
|
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info); |
|
|
RELAPACK_sgbtrf_rec(m, n, kl, ku, Ab, ldAb, ipiv, Workl, &mWorkl, Worku, &mWorku, info); |
|
|
|
|
|
|
|
@@ -81,6 +82,7 @@ static void RELAPACK_sgbtrf_rec( |
|
|
blasint *info |
|
|
blasint *info |
|
|
) { |
|
|
) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { |
|
|
if (*n <= MAX(CROSSOVER_SGBTRF, 1)) { |
|
|
// Unblocked |
|
|
// Unblocked |
|
|
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); |
|
|
LAPACK(sgbtf2)(m, n, kl, ku, Ab, ldAb, ipiv, info); |
|
@@ -127,7 +129,7 @@ static void RELAPACK_sgbtrf_rec( |
|
|
float *const A_BR = A + *ldA * n1 + m1; |
|
|
float *const A_BR = A + *ldA * n1 + m1; |
|
|
|
|
|
|
|
|
// ipiv_T |
|
|
// ipiv_T |
|
|
// ipiv_B |
|
|
|
|
|
|
|
|
// ipiv_B |
|
|
blasint *const ipiv_T = ipiv; |
|
|
blasint *const ipiv_T = ipiv; |
|
|
blasint *const ipiv_B = ipiv + n1; |
|
|
blasint *const ipiv_B = ipiv + n1; |
|
|
|
|
|
|
|
@@ -155,6 +157,7 @@ static void RELAPACK_sgbtrf_rec( |
|
|
float *const A_BRbl = A_BR + m21; |
|
|
float *const A_BRbl = A_BR + m21; |
|
|
float *const A_BRbr = A_BR + *ldA * n21 + m21; |
|
|
float *const A_BRbr = A_BR + *ldA * n21 + m21; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// recursion(Ab_L, ipiv_T) |
|
|
// recursion(Ab_L, ipiv_T) |
|
|
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); |
|
|
RELAPACK_sgbtrf_rec(m, &n1, kl, ku, Ab_L, ldAb, ipiv_T, Workl, ldWorkl, Worku, ldWorku, info); |
|
|
|
|
|
|
|
@@ -216,8 +219,11 @@ static void RELAPACK_sgbtrf_rec( |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
// recursion(Ab_BR, ipiv_B) |
|
|
// recursion(Ab_BR, ipiv_B) |
|
|
RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); |
|
|
|
|
|
|
|
|
//cause of infinite recursion here ? |
|
|
|
|
|
// RELAPACK_sgbtrf_rec(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, Workl, ldWorkl, Worku, ldWorku, info); |
|
|
|
|
|
LAPACK(sgbtf2)(&m2, &n2, kl, ku, Ab_BR, ldAb, ipiv_B, info); |
|
|
if (*info) |
|
|
if (*info) |
|
|
*info += n1; |
|
|
*info += n1; |
|
|
// shift pivots |
|
|
// shift pivots |
|
|