Browse Source

Add shortcuts for (small) cases that do not need expensive buffer allocation

tags/v0.3.16^2
Martin Kroeker GitHub 4 years ago
parent
commit
d6d7a6685d
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 92 additions and 0 deletions
  1. +5
    -0
      interface/ger.c
  2. +20
    -0
      interface/spr.c
  3. +18
    -0
      interface/spr2.c
  4. +4
    -0
      interface/symv.c
  5. +19
    -0
      interface/syr2.c
  6. +26
    -0
      interface/zsyr.c

+ 5
- 0
interface/ger.c View File

@@ -164,6 +164,11 @@ void CNAME(enum CBLAS_ORDER order,
if (m == 0 || n == 0) return; if (m == 0 || n == 0) return;
if (alpha == 0.) return; if (alpha == 0.) return;


if (incx == 1 && incy == 1 && 1L*m*n <= 2048 *GEMM_MULTITHREAD_THRESHOLD) {
GER(m, n, 0, alpha, x, incx, y, incy, a, lda, buffer);
return;
}

IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


+ 20
- 0
interface/spr.c View File

@@ -167,6 +167,26 @@ void CNAME(enum CBLAS_ORDER order,


FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


if (incx == 1 && n <100) {
blasint i;
if (uplo==0) {
for (i = 0; i < n; i++){
if (x[i] != ZERO) {
AXPYU_K(i + 1, 0, 0, alpha * x[i], x, 1, a, 1, NULL, 0);
}
a += i + 1;
}
} else {
for (i = 0; i < n; i++){
if (x[i] != ZERO) {
AXPYU_K(n - i, 0, 0, alpha * x[i], x + i, 1, a, 1, NULL, 0);
}
a += n - i;
}
}
return;
}

if (incx < 0 ) x -= (n - 1) * incx; if (incx < 0 ) x -= (n - 1) * incx;


buffer = (FLOAT *)blas_memory_alloc(1); buffer = (FLOAT *)blas_memory_alloc(1);


+ 18
- 0
interface/spr2.c View File

@@ -168,6 +168,24 @@ void CNAME(enum CBLAS_ORDER order,


if (alpha == ZERO) return; if (alpha == ZERO) return;


if (incx == 1 && incy == 1 && n < 50) {
blasint i;
if (!uplo) {
for (i = 0; i < n; i++){
AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0);
AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0);
a += i + 1;
}
} else {
for (i = 0; i < n; i++){
AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0);
AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0);
a += n - i;
}
}
return;
}

IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


+ 4
- 0
interface/symv.c View File

@@ -170,6 +170,10 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,


if (alpha == ZERO) return; if (alpha == ZERO) return;


if (incx == 1 && incy == 1 && n*n < 2304 *GEMM_MULTITHREAD_THRESHOLD) {
(symv[uplo])(n, n, alpha, a, lda, x, incx, y, incy, buffer);
return;
}
IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


+ 19
- 0
interface/syr2.c View File

@@ -170,6 +170,25 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, blasint n, FLOAT alpha,


IDEBUG_START; IDEBUG_START;


if (incx == 1 && incy == 1 && n < 100) {
blasint i;
if (!uplo) {
for (i = 0; i < n; i++){
AXPYU_K(i + 1, 0, 0, alpha * x[i], y, 1, a, 1, NULL, 0);
AXPYU_K(i + 1, 0, 0, alpha * y[i], x, 1, a, 1, NULL, 0);
a += lda;
}
} else {
for (i = 0; i < n; i++){
AXPYU_K(n - i, 0, 0, alpha * x[i], y + i, 1, a, 1, NULL, 0);
AXPYU_K(n - i, 0, 0, alpha * y[i], x + i, 1, a, 1, NULL, 0);
a += 1 + lda;
}
}
return;
}

FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


if (incx < 0 ) x -= (n - 1) * incx; if (incx < 0 ) x -= (n - 1) * incx;


+ 26
- 0
interface/zsyr.c View File

@@ -172,6 +172,32 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, int n, FLOAT alpha, FLO


if ((alpha_r == ZERO) && (alpha_i == ZERO)) return; if ((alpha_r == ZERO) && (alpha_i == ZERO)) return;


if (incx == 1 && incy == 1 && n < 50) {
blasint i;
if (!uplo) {
for (i = 0; i < n; i++){
if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) {
AXPYU_K(i + 1, 0, 0,
alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1],
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
x, 1, a, 1, NULL, 0);
}
a += lda;
}
} else {
for (i = 0; i < n; i++){
if ((x[i * 2 + 0] != ZERO) || (x[i * 2 + 1] != ZERO)) {
AXPYU_K(m - i, 0, 0,
alpha_r * x[i * 2 + 0] - alpha_i * x[i * 2 + 1],
alpha_i * x[i * 2 + 0] + alpha_r * x[i * 2 + 1],
x + i * 2, 1, a, 1, NULL, 0);
}
a += 2 + lda;
}
}
return;
}

IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START(); FUNCTION_PROFILE_START();


Loading…
Cancel
Save