Browse Source

Fix BLAS, BLAS-like functions and Generic RISC-V kernels

* Fixed gemmt, imatcopy, zimatcopy_cnc functions
* Fixed cblas_cscal testing in ctest
* Removed rotmg unreacheble code
* Added zero size checks
tags/v0.3.27
kseniyazaytseva Andrey Sokolov 2 years ago
parent
commit
ff41cf5c49
17 changed files with 201 additions and 124 deletions
  1. +8
    -0
      cblas.h
  2. +9
    -0
      common_interface.h
  3. +5
    -5
      ctest/c_cblat1.f
  4. +3
    -3
      ctest/c_cblat1c.c
  5. +157
    -76
      interface/gemmt.c
  6. +4
    -4
      interface/imatcopy.c
  7. +4
    -24
      interface/rotmg.c
  8. +3
    -3
      interface/zimatcopy.c
  9. +0
    -1
      kernel/generic/zimatcopy_cnc.c
  10. +1
    -1
      kernel/riscv64/axpby.c
  11. +1
    -1
      kernel/riscv64/axpy.c
  12. +1
    -1
      kernel/riscv64/copy.c
  13. +1
    -1
      kernel/riscv64/dot.c
  14. +1
    -1
      kernel/riscv64/swap.c
  15. +1
    -1
      kernel/riscv64/zaxpy.c
  16. +1
    -1
      kernel/riscv64/zcopy.c
  17. +1
    -1
      kernel/riscv64/zswap.c

+ 8
- 0
cblas.h View File

@@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);


void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc);
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K,
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc);


void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N,
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc);


+ 9
- 0
common_interface.h View File

@@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double *
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *,
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *);


void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *,
float *, blasint *, float *, blasint *, float *, float *, blasint *);
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *,
double *, blasint *, double *, blasint *, double *, double *, blasint *);

int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *,
float *, float *, blasint *, float *, blasint *, float *, float *, blasint *, float *, blasint *,
float *, float *, blasint *); float *, float *, blasint *);


+ 5
- 5
ctest/c_cblat1.f View File

@@ -96,7 +96,7 @@
INTEGER ICAMAXTEST INTEGER ICAMAXTEST
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST
* .. External Subroutines .. * .. External Subroutines ..
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1
* .. Intrinsic Functions .. * .. Intrinsic Functions ..
INTRINSIC MAX INTRINSIC MAX
* .. Common blocks .. * .. Common blocks ..
@@ -214,8 +214,8 @@
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1),
+ STRUE4(NP1),SFAC) + STRUE4(NP1),SFAC)
ELSE IF (ICASE.EQ.8) THEN ELSE IF (ICASE.EQ.8) THEN
* .. CSCAL ..
CALL CSCAL(N,CA,CX,INCX)
* .. CSCALTEST ..
CALL CSCALTEST(N,CA,CX,INCX)
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX),
+ SFAC) + SFAC)
ELSE IF (ICASE.EQ.9) THEN ELSE IF (ICASE.EQ.9) THEN
@@ -236,14 +236,14 @@
* *
INCX = 1 INCX = 1
IF (ICASE.EQ.8) THEN IF (ICASE.EQ.8) THEN
* CSCAL
* CSCALTEST
* Add a test for alpha equal to zero. * Add a test for alpha equal to zero.
CA = (0.0E0,0.0E0) CA = (0.0E0,0.0E0)
DO 80 I = 1, 5 DO 80 I = 1, 5
MWPCT(I) = (0.0E0,0.0E0) MWPCT(I) = (0.0E0,0.0E0)
MWPCS(I) = (1.0E0,1.0E0) MWPCS(I) = (1.0E0,1.0E0)
80 CONTINUE 80 CONTINUE
CALL CSCAL(5,CA,CX,INCX)
CALL CSCALTEST(5,CA,CX,INCX)
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) CALL CTEST(5,CX,MWPCT,MWPCS,SFAC)
ELSE IF (ICASE.EQ.9) THEN ELSE IF (ICASE.EQ.9) THEN
* CSSCALTEST * CSSCALTEST


+ 3
- 3
ctest/c_cblat1c.c View File

@@ -685,7 +685,7 @@ real *sfac;
static integer i__; static integer i__;
extern /* Subroutine */ int ctest_(); extern /* Subroutine */ int ctest_();
static complex mwpcs[5], mwpct[5]; static complex mwpcs[5], mwpct[5];
extern /* Subroutine */ int itest1_(), stest1_();
extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_();
static complex cx[8]; static complex cx[8];
extern real scnrm2test_(); extern real scnrm2test_();
static integer np1; static integer np1;
@@ -727,7 +727,7 @@ real *sfac;
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac);
} else if (combla_1.icase == 8) { } else if (combla_1.icase == 8) {
/* .. CSCAL .. */ /* .. CSCAL .. */
cscal_(&combla_1.n, &ca, cx, &combla_1.incx);
cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx);
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48],
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac);
} else if (combla_1.icase == 9) { } else if (combla_1.icase == 9) {
@@ -761,7 +761,7 @@ real *sfac;
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.;
/* L80: */ /* L80: */
} }
cscal_(&c__5, &ca, cx, &combla_1.incx);
cscaltest_(&c__5, &ca, cx, &combla_1.incx);
ctest_(&c__5, cx, mwpct, mwpcs, sfac); ctest_(&c__5, cx, mwpct, mwpcs, sfac);
} else if (combla_1.icase == 9) { } else if (combla_1.icase == 9) {
/* CSSCALTEST */ /* CSSCALTEST */


+ 157
- 76
interface/gemmt.c View File

@@ -35,29 +35,26 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include "common.h" #include "common.h"
#ifdef FUNCTION_PROFILE
#include "functable.h"
#endif


#ifndef COMPLEX #ifndef COMPLEX
#define SMP_THRESHOLD_MIN 65536.0 #define SMP_THRESHOLD_MIN 65536.0
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "QGEMT "
#define ERROR_NAME "QGEMMT "
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "DGEMT "
#define ERROR_NAME "DGEMMT "
#elif defined(BFLOAT16) #elif defined(BFLOAT16)
#define ERROR_NAME "SBGEMT "
#define ERROR_NAME "SBGEMMT "
#else #else
#define ERROR_NAME "SGEMT "
#define ERROR_NAME "SGEMMT "
#endif #endif
#else #else
#define SMP_THRESHOLD_MIN 8192.0 #define SMP_THRESHOLD_MIN 8192.0
#ifdef XDOUBLE #ifdef XDOUBLE
#define ERROR_NAME "XGEMT "
#define ERROR_NAME "XGEMMT "
#elif defined(DOUBLE) #elif defined(DOUBLE)
#define ERROR_NAME "ZGEMT "
#define ERROR_NAME "ZGEMMT "
#else #else
#define ERROR_NAME "CGEMT "
#define ERROR_NAME "CGEMMT "
#endif #endif
#endif #endif


@@ -68,18 +65,22 @@
#ifndef CBLAS #ifndef CBLAS


void NAME(char *UPLO, char *TRANSA, char *TRANSB, void NAME(char *UPLO, char *TRANSA, char *TRANSB,
blasint * M, blasint * N, blasint * K,
blasint * M, blasint * K,
FLOAT * Alpha, FLOAT * Alpha,
IFLOAT * a, blasint * ldA, IFLOAT * a, blasint * ldA,
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC)
{ {


blasint m, n, k;
blasint m, k;
blasint lda, ldb, ldc; blasint lda, ldb, ldc;
int transa, transb, uplo; int transa, transb, uplo;
blasint info; blasint info;


char transA, transB, Uplo; char transA, transB, Uplo;
blasint nrowa, nrowb;
#if defined(COMPLEX)
blasint ncolb;
#endif
IFLOAT *buffer; IFLOAT *buffer;
IFLOAT *aa, *bb; IFLOAT *aa, *bb;
FLOAT *cc; FLOAT *cc;
@@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
PRINT_DEBUG_NAME; PRINT_DEBUG_NAME;


m = *M; m = *M;
n = *N;
k = *K; k = *K;


#if defined(COMPLEX) #if defined(COMPLEX)
@@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB,
if (Uplo == 'L') if (Uplo == 'L')
uplo = 1; uplo = 1;


nrowa = m;
if (transa & 1) nrowa = k;
nrowb = k;
#if defined(COMPLEX)
ncolb = m;
#endif
if (transb & 1) {
nrowb = m;
#if defined(COMPLEX)
ncolb = k;
#endif
}

info = 0; info = 0;


if (uplo < 0)
info = 14;
if (ldc < m)
if (ldc < MAX(1, m))
info = 13; info = 13;
if (ldb < MAX(1, nrowb))
info = 10;
if (lda < MAX(1, nrowa))
info = 8;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3;
info = 4;
if (transb < 0) if (transb < 0)
info = 2;
info = 3;
if (transa < 0) if (transa < 0)
info = 2;
if (uplo < 0)
info = 1; info = 1;


if (info) {
if (info != 0) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
} }
#else #else


void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M,
blasint N, blasint k,
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m,
blasint k,
#ifndef COMPLEX #ifndef COMPLEX
FLOAT alpha, FLOAT alpha,
IFLOAT * A, blasint LDA, IFLOAT * A, blasint LDA,
@@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,


int transa, transb, uplo; int transa, transb, uplo;
blasint info; blasint info;
blasint m, n, lda, ldb;
blasint lda, ldb;
FLOAT *a, *b; FLOAT *a, *b;
#if defined(COMPLEX)
blasint nrowb, ncolb;
#endif
XFLOAT *buffer; XFLOAT *buffer;


PRINT_DEBUG_CNAME; PRINT_DEBUG_CNAME;


uplo = -1;
transa = -1; transa = -1;
transb = -1; transb = -1;
info = 0; info = 0;


if (order == CblasColMajor) { if (order == CblasColMajor) {
if (Uplo == CblasUpper) uplo = 0;
if (Uplo == CblasLower) uplo = 1;


if (TransA == CblasNoTrans) if (TransA == CblasNoTrans)
transa = 0; transa = 0;
@@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
transb = 3; transb = 3;
#endif #endif


m = M;
n = N;

a = (void *)A; a = (void *)A;
b = (void *)B; b = (void *)B;
lda = LDA; lda = LDA;
@@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,


info = -1; info = -1;


if (ldc < m)
blasint nrowa;
#if !defined(COMPLEX)
blasint nrowb;
#endif
nrowa = m;
if (transa & 1) nrowa = k;
nrowb = k;
#if defined(COMPLEX)
ncolb = m;
#endif
if (transb & 1) {
nrowb = m;
#if defined(COMPLEX)
ncolb = k;
#endif
}

if (ldc < MAX(1, m))
info = 13; info = 13;
if (ldb < MAX(1, nrowb))
info = 10;
if (lda < MAX(1, nrowa))
info = 8;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3;
info = 4;
if (transb < 0) if (transb < 0)
info = 2;
info = 3;
if (transa < 0) if (transa < 0)
info = 2;
if (uplo < 0)
info = 1; info = 1;
} }


if (order == CblasRowMajor) { if (order == CblasRowMajor) {
m = N;
n = M;


a = (void *)B; a = (void *)B;
b = (void *)A; b = (void *)A;
@@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
lda = LDB; lda = LDB;
ldb = LDA; ldb = LDA;


if (Uplo == CblasUpper) uplo = 0;
if (Uplo == CblasLower) uplo = 1;

if (TransB == CblasNoTrans) if (TransB == CblasNoTrans)
transa = 0; transa = 0;
if (TransB == CblasTrans) if (TransB == CblasTrans)
@@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,


info = -1; info = -1;


if (ldc < m)
blasint ncola;
#if !defined(COMPLEX)
blasint ncolb;
#endif
ncola = m;
if (transa & 1) ncola = k;
ncolb = k;
#if defined(COMPLEX)
nrowb = m;
#endif

if (transb & 1) {
#if defined(COMPLEX)
nrowb = k;
#endif
ncolb = m;
}

if (ldc < MAX(1,m))
info = 13; info = 13;
if (ldb < MAX(1, ncolb))
info = 8;
if (lda < MAX(1, ncola))
info = 10;
if (k < 0) if (k < 0)
info = 5; info = 5;
if (n < 0)
info = 4;
if (m < 0) if (m < 0)
info = 3;
info = 4;
if (transb < 0) if (transb < 0)
info = 2; info = 2;
if (transa < 0) if (transa < 0)
info = 3;
if (uplo < 0)
info = 1; info = 1;

} }


uplo = -1;
if (Uplo == CblasUpper)
uplo = 0;
if (Uplo == CblasLower)
uplo = 1;
if (uplo < 0)
info = 14;

if (info >= 0) { if (info >= 0) {
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME));
return; return;
@@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,


#endif #endif


if ((m == 0) || (n == 0))
if (m == 0)
return; return;


IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START();
#if defined(COMPLEX)
if (transb > 1){
#ifndef CBLAS
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
#else
if (order == CblasColMajor)
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
if (order == CblasRowMajor)
IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb);
#endif
}
#endif


const blasint incb = (transb == 0) ? 1 : ldb;
const blasint incb = ((transb & 1) == 0) ? 1 : ldb;


if (uplo == 1) { if (uplo == 1) {
for (i = 0; i < n; i++) {
j = n - i;
for (i = 0; i < m; i++) {
j = m - i;


l = j; l = j;
#if defined(COMPLEX) #if defined(COMPLEX)
aa = a + i * 2; aa = a + i * 2;
bb = b + i * ldb * 2; bb = b + i * ldb * 2;
if (transa) {
l = k;
if (transa & 1) {
aa = a + lda * i * 2; aa = a + lda * i * 2;
bb = b + i * 2;
} }
if (transb & 1)
bb = b + i * 2;
cc = c + i * 2 * ldc + i * 2; cc = c + i * 2 * ldc + i * 2;
#else #else
aa = a + i; aa = a + i;
bb = b + i * ldb; bb = b + i * ldb;
if (transa) {
l = k;
if (transa & 1) {
aa = a + lda * i; aa = a + lda * i;
bb = b + i;
} }
if (transb & 1)
bb = b + i;
cc = c + i * ldc + i; cc = c + i * ldc + i;
#endif #endif


@@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
NULL, 0); NULL, 0);


if (alpha_r == ZERO && alpha_i == ZERO) if (alpha_r == ZERO && alpha_i == ZERO)
return;
continue;
#else #else
if (beta != ONE) if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
@@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,


IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START();

buffer_size = j + k + 128 / sizeof(FLOAT); buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT); buffer_size += 160 / sizeof(FLOAT);
@@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif


#if defined(COMPLEX) #if defined(COMPLEX)
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1, aa, lda, bb, incb, cc, 1,
buffer); buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
aa, lda, bb, incb, cc, 1,
buffer);
#else #else
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda, (gemv[(int)transa]) (j, k, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer); bb, incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda,
bb, incb, cc, 1, buffer);
#endif #endif
#ifdef SMP #ifdef SMP
} else { } else {
if (!(transa & 1))
(gemv_thread[(int)transa]) (j, k, alpha, aa, (gemv_thread[(int)transa]) (j, k, alpha, aa,
lda, bb, incb, cc, lda, bb, incb, cc,
1, buffer, 1, buffer,
nthreads); nthreads);
else
(gemv_thread[(int)transa]) (k, j, alpha, aa,
lda, bb, incb, cc,
1, buffer,
nthreads);


} }
#endif #endif
@@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
} }
} else { } else {


for (i = 0; i < n; i++) {
for (i = 0; i < m; i++) {
j = i + 1; j = i + 1;


l = j; l = j;
#if defined COMPLEX #if defined COMPLEX
bb = b + i * ldb * 2; bb = b + i * ldb * 2;
if (transa) {
l = k;
if (transb & 1) {
bb = b + i * 2; bb = b + i * 2;
} }
cc = c + i * 2 * ldc; cc = c + i * 2 * ldc;
#else #else
bb = b + i * ldb; bb = b + i * ldb;
if (transa) {
l = k;
if (transb & 1) {
bb = b + i; bb = b + i;
} }
cc = c + i * ldc; cc = c + i * ldc;
@@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
NULL, 0); NULL, 0);


if (alpha_r == ZERO && alpha_i == ZERO) if (alpha_r == ZERO && alpha_i == ZERO)
return;
continue;
#else #else
if (beta != ONE) if (beta != ONE)
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0);
@@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif
IDEBUG_START; IDEBUG_START;


FUNCTION_PROFILE_START();

buffer_size = j + k + 128 / sizeof(FLOAT); buffer_size = j + k + 128 / sizeof(FLOAT);
#ifdef WINDOWS_ABI #ifdef WINDOWS_ABI
buffer_size += 160 / sizeof(FLOAT); buffer_size += 160 / sizeof(FLOAT);
@@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo,
#endif #endif


#if defined(COMPLEX) #if defined(COMPLEX)
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1, a, lda, bb, incb, cc, 1,
buffer); buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i,
a, lda, bb, incb, cc, 1,
buffer);
#else #else
if (!(transa & 1))
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb,
incb, cc, 1, buffer); incb, cc, 1, buffer);
else
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb,
incb, cc, 1, buffer);
#endif #endif


#ifdef SMP #ifdef SMP
} else { } else {
if (!(transa & 1))
(gemv_thread[(int)transa]) (j, k, alpha, a, lda, (gemv_thread[(int)transa]) (j, k, alpha, a, lda,
bb, incb, cc, 1, bb, incb, cc, 1,
buffer, nthreads); buffer, nthreads);

else
(gemv_thread[(int)transa]) (k, j, alpha, a, lda,
bb, incb, cc, 1,
buffer, nthreads);
} }
#endif #endif


STACK_FREE(buffer); STACK_FREE(buffer);
} }
} }
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE,
args.m * args.k + args.k * args.n +
args.m * args.n, 2 * args.m * args.n * args.k);


IDEBUG_END; IDEBUG_END;


return; return;
}
}

+ 4
- 4
interface/imatcopy.c View File

@@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,


#endif #endif


if ( *lda > *ldb )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT);
else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT);
if ( *rows > *cols )
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT);
else
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT);


b = malloc(msize); b = malloc(msize);
if ( b == NULL ) if ( b == NULL )


+ 4
- 24
interface/rotmg.c View File

@@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
else else
{ {
dp2 = *dd2 * dy1; dp2 = *dd2 * dy1;
if(dp2 == ZERO)
{
dflag = -TWO;
dparam[0] = dflag;
return;
}
dp1 = *dd1 * *dx1; dp1 = *dd1 * *dx1;
dq2 = dp2 * dy1; dq2 = dp2 * dy1;
dq1 = dp1 * *dx1; dq1 = dp1 * *dx1;
@@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){
dh12 = dp2 / dp1; dh12 = dp2 / dp1;


du = ONE - dh12 * dh21; du = ONE - dh12 * dh21;
if(du > ZERO)
{
dflag = ZERO;
*dd1 = *dd1 / du;
*dd2 = *dd2 / du;
*dx1 = *dx1 * du;
} else {
dflag = -ONE;

dh11 = ZERO;
dh12 = ZERO;
dh21 = ZERO;
dh22 = ZERO;

*dd1 = ZERO;
*dd2 = ZERO;
*dx1 = ZERO;
}
dflag = ZERO;
*dd1 = *dd1 / du;
*dd2 = *dd2 / du;
*dx1 = *dx1 * du;
} }
else else


+ 3
- 3
interface/zimatcopy.c View File

@@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows,
} }
#endif #endif


if ( *lda > *ldb )
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2;
if ( *rows > *cols )
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2;
else else
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2;
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2;


b = malloc(msize); b = malloc(msize);
if ( b == NULL ) if ( b == NULL )


+ 0
- 1
kernel/generic/zimatcopy_cnc.c View File

@@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a,


if ( rows <= 0 ) return(0); if ( rows <= 0 ) return(0);
if ( cols <= 0 ) return(0); if ( cols <= 0 ) return(0);
if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0);


aptr = a; aptr = a;
lda *= 2; lda *= 2;


+ 1
- 1
kernel/riscv64/axpby.c View File

@@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT *
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);


ix = 0; ix = 0;
iy = 0; iy = 0;


+ 1
- 1
kernel/riscv64/axpy.c View File

@@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix,iy; BLASLONG ix,iy;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);
if ( da == 0.0 ) return(0); if ( da == 0.0 ) return(0);


ix = 0; ix = 0;


+ 1
- 1
kernel/riscv64/copy.c View File

@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG i=0; BLASLONG i=0;
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);


while(i < n) while(i < n)
{ {


+ 1
- 1
kernel/riscv64/dot.c View File

@@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
double dot = 0.0 ; double dot = 0.0 ;


if ( n < 0 ) return(dot);
if ( n < 1 ) return(dot);


while(i < n) while(i < n)
{ {


+ 1
- 1
kernel/riscv64/swap.c View File

@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x,
BLASLONG ix=0,iy=0; BLASLONG ix=0,iy=0;
FLOAT temp; FLOAT temp;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);


while(i < n) while(i < n)
{ {


+ 1
- 1
kernel/riscv64/zaxpy.c View File

@@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i,
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);
if ( da_r == 0.0 && da_i == 0.0 ) return(0); if ( da_r == 0.0 && da_i == 0.0 ) return(0);


ix = 0; ix = 0;


+ 1
- 1
kernel/riscv64/zcopy.c View File

@@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);


inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;


+ 1
- 1
kernel/riscv64/zswap.c View File

@@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm
BLASLONG inc_x2; BLASLONG inc_x2;
BLASLONG inc_y2; BLASLONG inc_y2;


if ( n < 0 ) return(0);
if ( n <= 0 ) return(0);


inc_x2 = 2 * inc_x; inc_x2 = 2 * inc_x;
inc_y2 = 2 * inc_y; inc_y2 = 2 * inc_y;


Loading…
Cancel
Save