Fix BLAS, BLAS-like functions and Generic RISC-V kernelstags/v0.3.27
@@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA | |||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | |||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | |||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||
@@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double * | |||
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | |||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | |||
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | |||
float *, float *, blasint *, float *, blasint *, | |||
float *, float *, blasint *); | |||
@@ -96,7 +96,7 @@ | |||
INTEGER ICAMAXTEST | |||
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | |||
* .. External Subroutines .. | |||
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 | |||
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1 | |||
* .. Intrinsic Functions .. | |||
INTRINSIC MAX | |||
* .. Common blocks .. | |||
@@ -214,8 +214,8 @@ | |||
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | |||
+ STRUE4(NP1),SFAC) | |||
ELSE IF (ICASE.EQ.8) THEN | |||
* .. CSCAL .. | |||
CALL CSCAL(N,CA,CX,INCX) | |||
* .. CSCALTEST .. | |||
CALL CSCALTEST(N,CA,CX,INCX) | |||
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | |||
+ SFAC) | |||
ELSE IF (ICASE.EQ.9) THEN | |||
@@ -236,14 +236,14 @@ | |||
* | |||
INCX = 1 | |||
IF (ICASE.EQ.8) THEN | |||
* CSCAL | |||
* CSCALTEST | |||
* Add a test for alpha equal to zero. | |||
CA = (0.0E0,0.0E0) | |||
DO 80 I = 1, 5 | |||
MWPCT(I) = (0.0E0,0.0E0) | |||
MWPCS(I) = (1.0E0,1.0E0) | |||
80 CONTINUE | |||
CALL CSCAL(5,CA,CX,INCX) | |||
CALL CSCALTEST(5,CA,CX,INCX) | |||
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | |||
ELSE IF (ICASE.EQ.9) THEN | |||
* CSSCALTEST | |||
@@ -685,7 +685,7 @@ real *sfac; | |||
static integer i__; | |||
extern /* Subroutine */ int ctest_(); | |||
static complex mwpcs[5], mwpct[5]; | |||
extern /* Subroutine */ int itest1_(), stest1_(); | |||
extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_(); | |||
static complex cx[8]; | |||
extern real scnrm2test_(); | |||
static integer np1; | |||
@@ -727,7 +727,7 @@ real *sfac; | |||
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | |||
} else if (combla_1.icase == 8) { | |||
/* .. CSCAL .. */ | |||
cscal_(&combla_1.n, &ca, cx, &combla_1.incx); | |||
cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx); | |||
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | |||
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | |||
} else if (combla_1.icase == 9) { | |||
@@ -761,7 +761,7 @@ real *sfac; | |||
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | |||
/* L80: */ | |||
} | |||
cscal_(&c__5, &ca, cx, &combla_1.incx); | |||
cscaltest_(&c__5, &ca, cx, &combla_1.incx); | |||
ctest_(&c__5, cx, mwpct, mwpcs, sfac); | |||
} else if (combla_1.icase == 9) { | |||
/* CSSCALTEST */ | |||
@@ -35,29 +35,26 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include "common.h" | |||
#ifdef FUNCTION_PROFILE | |||
#include "functable.h" | |||
#endif | |||
#ifndef COMPLEX | |||
#define SMP_THRESHOLD_MIN 65536.0 | |||
#ifdef XDOUBLE | |||
#define ERROR_NAME "QGEMT " | |||
#define ERROR_NAME "QGEMMT " | |||
#elif defined(DOUBLE) | |||
#define ERROR_NAME "DGEMT " | |||
#define ERROR_NAME "DGEMMT " | |||
#elif defined(BFLOAT16) | |||
#define ERROR_NAME "SBGEMT " | |||
#define ERROR_NAME "SBGEMMT " | |||
#else | |||
#define ERROR_NAME "SGEMT " | |||
#define ERROR_NAME "SGEMMT " | |||
#endif | |||
#else | |||
#define SMP_THRESHOLD_MIN 8192.0 | |||
#ifdef XDOUBLE | |||
#define ERROR_NAME "XGEMT " | |||
#define ERROR_NAME "XGEMMT " | |||
#elif defined(DOUBLE) | |||
#define ERROR_NAME "ZGEMT " | |||
#define ERROR_NAME "ZGEMMT " | |||
#else | |||
#define ERROR_NAME "CGEMT " | |||
#define ERROR_NAME "CGEMMT " | |||
#endif | |||
#endif | |||
@@ -68,18 +65,22 @@ | |||
#ifndef CBLAS | |||
void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
blasint * M, blasint * N, blasint * K, | |||
blasint * M, blasint * K, | |||
FLOAT * Alpha, | |||
IFLOAT * a, blasint * ldA, | |||
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | |||
{ | |||
blasint m, n, k; | |||
blasint m, k; | |||
blasint lda, ldb, ldc; | |||
int transa, transb, uplo; | |||
blasint info; | |||
char transA, transB, Uplo; | |||
blasint nrowa, nrowb; | |||
#if defined(COMPLEX) | |||
blasint ncolb; | |||
#endif | |||
IFLOAT *buffer; | |||
IFLOAT *aa, *bb; | |||
FLOAT *cc; | |||
@@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
PRINT_DEBUG_NAME; | |||
m = *M; | |||
n = *N; | |||
k = *K; | |||
#if defined(COMPLEX) | |||
@@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||
if (Uplo == 'L') | |||
uplo = 1; | |||
nrowa = m; | |||
if (transa & 1) nrowa = k; | |||
nrowb = k; | |||
#if defined(COMPLEX) | |||
ncolb = m; | |||
#endif | |||
if (transb & 1) { | |||
nrowb = m; | |||
#if defined(COMPLEX) | |||
ncolb = k; | |||
#endif | |||
} | |||
info = 0; | |||
if (uplo < 0) | |||
info = 14; | |||
if (ldc < m) | |||
if (ldc < MAX(1, m)) | |||
info = 13; | |||
if (ldb < MAX(1, nrowb)) | |||
info = 10; | |||
if (lda < MAX(1, nrowa)) | |||
info = 8; | |||
if (k < 0) | |||
info = 5; | |||
if (n < 0) | |||
info = 4; | |||
if (m < 0) | |||
info = 3; | |||
info = 4; | |||
if (transb < 0) | |||
info = 2; | |||
info = 3; | |||
if (transa < 0) | |||
info = 2; | |||
if (uplo < 0) | |||
info = 1; | |||
if (info) { | |||
if (info != 0) { | |||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
return; | |||
} | |||
#else | |||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, | |||
blasint N, blasint k, | |||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, | |||
blasint k, | |||
#ifndef COMPLEX | |||
FLOAT alpha, | |||
IFLOAT * A, blasint LDA, | |||
@@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
int transa, transb, uplo; | |||
blasint info; | |||
blasint m, n, lda, ldb; | |||
blasint lda, ldb; | |||
FLOAT *a, *b; | |||
#if defined(COMPLEX) | |||
blasint nrowb, ncolb; | |||
#endif | |||
XFLOAT *buffer; | |||
PRINT_DEBUG_CNAME; | |||
uplo = -1; | |||
transa = -1; | |||
transb = -1; | |||
info = 0; | |||
if (order == CblasColMajor) { | |||
if (Uplo == CblasUpper) uplo = 0; | |||
if (Uplo == CblasLower) uplo = 1; | |||
if (TransA == CblasNoTrans) | |||
transa = 0; | |||
@@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
transb = 3; | |||
#endif | |||
m = M; | |||
n = N; | |||
a = (void *)A; | |||
b = (void *)B; | |||
lda = LDA; | |||
@@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
info = -1; | |||
if (ldc < m) | |||
blasint nrowa; | |||
#if !defined(COMPLEX) | |||
blasint nrowb; | |||
#endif | |||
nrowa = m; | |||
if (transa & 1) nrowa = k; | |||
nrowb = k; | |||
#if defined(COMPLEX) | |||
ncolb = m; | |||
#endif | |||
if (transb & 1) { | |||
nrowb = m; | |||
#if defined(COMPLEX) | |||
ncolb = k; | |||
#endif | |||
} | |||
if (ldc < MAX(1, m)) | |||
info = 13; | |||
if (ldb < MAX(1, nrowb)) | |||
info = 10; | |||
if (lda < MAX(1, nrowa)) | |||
info = 8; | |||
if (k < 0) | |||
info = 5; | |||
if (n < 0) | |||
info = 4; | |||
if (m < 0) | |||
info = 3; | |||
info = 4; | |||
if (transb < 0) | |||
info = 2; | |||
info = 3; | |||
if (transa < 0) | |||
info = 2; | |||
if (uplo < 0) | |||
info = 1; | |||
} | |||
if (order == CblasRowMajor) { | |||
m = N; | |||
n = M; | |||
a = (void *)B; | |||
b = (void *)A; | |||
@@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
lda = LDB; | |||
ldb = LDA; | |||
if (Uplo == CblasUpper) uplo = 0; | |||
if (Uplo == CblasLower) uplo = 1; | |||
if (TransB == CblasNoTrans) | |||
transa = 0; | |||
if (TransB == CblasTrans) | |||
@@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
info = -1; | |||
if (ldc < m) | |||
blasint ncola; | |||
#if !defined(COMPLEX) | |||
blasint ncolb; | |||
#endif | |||
ncola = m; | |||
if (transa & 1) ncola = k; | |||
ncolb = k; | |||
#if defined(COMPLEX) | |||
nrowb = m; | |||
#endif | |||
if (transb & 1) { | |||
#if defined(COMPLEX) | |||
nrowb = k; | |||
#endif | |||
ncolb = m; | |||
} | |||
if (ldc < MAX(1,m)) | |||
info = 13; | |||
if (ldb < MAX(1, ncolb)) | |||
info = 8; | |||
if (lda < MAX(1, ncola)) | |||
info = 10; | |||
if (k < 0) | |||
info = 5; | |||
if (n < 0) | |||
info = 4; | |||
if (m < 0) | |||
info = 3; | |||
info = 4; | |||
if (transb < 0) | |||
info = 2; | |||
if (transa < 0) | |||
info = 3; | |||
if (uplo < 0) | |||
info = 1; | |||
} | |||
uplo = -1; | |||
if (Uplo == CblasUpper) | |||
uplo = 0; | |||
if (Uplo == CblasLower) | |||
uplo = 1; | |||
if (uplo < 0) | |||
info = 14; | |||
if (info >= 0) { | |||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | |||
return; | |||
@@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
#endif | |||
if ((m == 0) || (n == 0)) | |||
if (m == 0) | |||
return; | |||
IDEBUG_START; | |||
FUNCTION_PROFILE_START(); | |||
#if defined(COMPLEX) | |||
if (transb > 1){ | |||
#ifndef CBLAS | |||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
#else | |||
if (order == CblasColMajor) | |||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
if (order == CblasRowMajor) | |||
IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||
#endif | |||
} | |||
#endif | |||
const blasint incb = (transb == 0) ? 1 : ldb; | |||
const blasint incb = ((transb & 1) == 0) ? 1 : ldb; | |||
if (uplo == 1) { | |||
for (i = 0; i < n; i++) { | |||
j = n - i; | |||
for (i = 0; i < m; i++) { | |||
j = m - i; | |||
l = j; | |||
#if defined(COMPLEX) | |||
aa = a + i * 2; | |||
bb = b + i * ldb * 2; | |||
if (transa) { | |||
l = k; | |||
if (transa & 1) { | |||
aa = a + lda * i * 2; | |||
bb = b + i * 2; | |||
} | |||
if (transb & 1) | |||
bb = b + i * 2; | |||
cc = c + i * 2 * ldc + i * 2; | |||
#else | |||
aa = a + i; | |||
bb = b + i * ldb; | |||
if (transa) { | |||
l = k; | |||
if (transa & 1) { | |||
aa = a + lda * i; | |||
bb = b + i; | |||
} | |||
if (transb & 1) | |||
bb = b + i; | |||
cc = c + i * ldc + i; | |||
#endif | |||
@@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
NULL, 0); | |||
if (alpha_r == ZERO && alpha_i == ZERO) | |||
return; | |||
continue; | |||
#else | |||
if (beta != ONE) | |||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | |||
@@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
IDEBUG_START; | |||
FUNCTION_PROFILE_START(); | |||
buffer_size = j + k + 128 / sizeof(FLOAT); | |||
#ifdef WINDOWS_ABI | |||
buffer_size += 160 / sizeof(FLOAT); | |||
@@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
#endif | |||
#if defined(COMPLEX) | |||
if (!(transa & 1)) | |||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
aa, lda, bb, incb, cc, 1, | |||
buffer); | |||
else | |||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
aa, lda, bb, incb, cc, 1, | |||
buffer); | |||
#else | |||
if (!(transa & 1)) | |||
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | |||
bb, incb, cc, 1, buffer); | |||
else | |||
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda, | |||
bb, incb, cc, 1, buffer); | |||
#endif | |||
#ifdef SMP | |||
} else { | |||
if (!(transa & 1)) | |||
(gemv_thread[(int)transa]) (j, k, alpha, aa, | |||
lda, bb, incb, cc, | |||
1, buffer, | |||
nthreads); | |||
else | |||
(gemv_thread[(int)transa]) (k, j, alpha, aa, | |||
lda, bb, incb, cc, | |||
1, buffer, | |||
nthreads); | |||
} | |||
#endif | |||
@@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
} | |||
} else { | |||
for (i = 0; i < n; i++) { | |||
for (i = 0; i < m; i++) { | |||
j = i + 1; | |||
l = j; | |||
#if defined COMPLEX | |||
bb = b + i * ldb * 2; | |||
if (transa) { | |||
l = k; | |||
if (transb & 1) { | |||
bb = b + i * 2; | |||
} | |||
cc = c + i * 2 * ldc; | |||
#else | |||
bb = b + i * ldb; | |||
if (transa) { | |||
l = k; | |||
if (transb & 1) { | |||
bb = b + i; | |||
} | |||
cc = c + i * ldc; | |||
@@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
NULL, 0); | |||
if (alpha_r == ZERO && alpha_i == ZERO) | |||
return; | |||
continue; | |||
#else | |||
if (beta != ONE) | |||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | |||
@@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
#endif | |||
IDEBUG_START; | |||
FUNCTION_PROFILE_START(); | |||
buffer_size = j + k + 128 / sizeof(FLOAT); | |||
#ifdef WINDOWS_ABI | |||
buffer_size += 160 / sizeof(FLOAT); | |||
@@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||
#endif | |||
#if defined(COMPLEX) | |||
if (!(transa & 1)) | |||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | |||
a, lda, bb, incb, cc, 1, | |||
buffer); | |||
else | |||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||
a, lda, bb, incb, cc, 1, | |||
buffer); | |||
#else | |||
if (!(transa & 1)) | |||
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | |||
incb, cc, 1, buffer); | |||
else | |||
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, | |||
incb, cc, 1, buffer); | |||
#endif | |||
#ifdef SMP | |||
} else { | |||
if (!(transa & 1)) | |||
(gemv_thread[(int)transa]) (j, k, alpha, a, lda, | |||
bb, incb, cc, 1, | |||
buffer, nthreads); | |||
else | |||
(gemv_thread[(int)transa]) (k, j, alpha, a, lda, | |||
bb, incb, cc, 1, | |||
buffer, nthreads); | |||
} | |||
#endif | |||
STACK_FREE(buffer); | |||
} | |||
} | |||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, | |||
args.m * args.k + args.k * args.n + | |||
args.m * args.n, 2 * args.m * args.n * args.k); | |||
IDEBUG_END; | |||
return; | |||
} | |||
} |
@@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
#endif | |||
if ( *lda > *ldb ) | |||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); | |||
else | |||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); | |||
if ( *rows > *cols ) | |||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT); | |||
else | |||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT); | |||
b = malloc(msize); | |||
if ( b == NULL ) | |||
@@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
else | |||
{ | |||
dp2 = *dd2 * dy1; | |||
if(dp2 == ZERO) | |||
{ | |||
dflag = -TWO; | |||
dparam[0] = dflag; | |||
return; | |||
} | |||
dp1 = *dd1 * *dx1; | |||
dq2 = dp2 * dy1; | |||
dq1 = dp1 * *dx1; | |||
@@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
dh12 = dp2 / dp1; | |||
du = ONE - dh12 * dh21; | |||
if(du > ZERO) | |||
{ | |||
dflag = ZERO; | |||
*dd1 = *dd1 / du; | |||
*dd2 = *dd2 / du; | |||
*dx1 = *dx1 * du; | |||
} else { | |||
dflag = -ONE; | |||
dh11 = ZERO; | |||
dh12 = ZERO; | |||
dh21 = ZERO; | |||
dh22 = ZERO; | |||
*dd1 = ZERO; | |||
*dd2 = ZERO; | |||
*dx1 = ZERO; | |||
} | |||
dflag = ZERO; | |||
*dd1 = *dd1 / du; | |||
*dd2 = *dd2 / du; | |||
*dx1 = *dx1 * du; | |||
} | |||
else | |||
@@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||
} | |||
#endif | |||
if ( *lda > *ldb ) | |||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; | |||
if ( *rows > *cols ) | |||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2; | |||
else | |||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; | |||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2; | |||
b = malloc(msize); | |||
if ( b == NULL ) | |||
@@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, | |||
if ( rows <= 0 ) return(0); | |||
if ( cols <= 0 ) return(0); | |||
if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0); | |||
aptr = a; | |||
lda *= 2; | |||
@@ -45,6 +45,11 @@ DAXPYKERNEL = ../riscv64/axpy.c | |||
CAXPYKERNEL = ../riscv64/zaxpy.c | |||
ZAXPYKERNEL = ../riscv64/zaxpy.c | |||
SAXPBYKERNEL = ../riscv64/axpby.c | |||
DAXPBYKERNEL = ../riscv64/axpby.c | |||
CAXPBYKERNEL = ../riscv64/zaxpby.c | |||
ZAXPBYKERNEL = ../riscv64/zaxpby.c | |||
SCOPYKERNEL = ../riscv64/copy.c | |||
DCOPYKERNEL = ../riscv64/copy.c | |||
CCOPYKERNEL = ../riscv64/zcopy.c | |||
@@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT * | |||
BLASLONG i=0; | |||
BLASLONG ix,iy; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
ix = 0; | |||
iy = 0; | |||
@@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||
BLASLONG i=0; | |||
BLASLONG ix,iy; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
if ( da == 0.0 ) return(0); | |||
ix = 0; | |||
@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
BLASLONG i=0; | |||
BLASLONG ix=0,iy=0; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
while(i < n) | |||
{ | |||
@@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
BLASLONG ix=0,iy=0; | |||
double dot = 0.0 ; | |||
if ( n < 0 ) return(dot); | |||
if ( n < 1 ) return(dot); | |||
while(i < n) | |||
{ | |||
@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||
BLASLONG ix=0,iy=0; | |||
FLOAT temp; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
while(i < n) | |||
{ | |||
@@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||
BLASLONG inc_x2; | |||
BLASLONG inc_y2; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
if ( da_r == 0.0 && da_i == 0.0 ) return(0); | |||
ix = 0; | |||
@@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||
BLASLONG inc_x2; | |||
BLASLONG inc_y2; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
inc_x2 = 2 * inc_x; | |||
inc_y2 = 2 * inc_y; | |||
@@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||
BLASLONG inc_x2; | |||
BLASLONG inc_y2; | |||
if ( n < 0 ) return(0); | |||
if ( n <= 0 ) return(0); | |||
inc_x2 = 2 * inc_x; | |||
inc_y2 = 2 * inc_y; | |||