* Fixed gemmt, imatcopy, zimatcopy_cnc functions * Fixed cblas_cscal testing in ctest * Removed rotmg unreacheble code * Added zero size checkstags/v0.3.27
@@ -289,6 +289,14 @@ void cblas_zgemm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLA | |||||
void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | void cblas_zgemm3m(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, OPENBLAS_CONST blasint K, | ||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | ||||
void cblas_sgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_dgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST double alpha, OPENBLAS_CONST double *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST double *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST double beta, double *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_cgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_zgemmt(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransA, OPENBLAS_CONST enum CBLAS_TRANSPOSE TransB, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint K, | |||||
OPENBLAS_CONST void *alpha, OPENBLAS_CONST void *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST void *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST void *beta, void *C, OPENBLAS_CONST blasint ldc); | |||||
void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | void cblas_ssymm(OPENBLAS_CONST enum CBLAS_ORDER Order, OPENBLAS_CONST enum CBLAS_SIDE Side, OPENBLAS_CONST enum CBLAS_UPLO Uplo, OPENBLAS_CONST blasint M, OPENBLAS_CONST blasint N, | ||||
OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | OPENBLAS_CONST float alpha, OPENBLAS_CONST float *A, OPENBLAS_CONST blasint lda, OPENBLAS_CONST float *B, OPENBLAS_CONST blasint ldb, OPENBLAS_CONST float beta, float *C, OPENBLAS_CONST blasint ldc); | ||||
@@ -498,6 +498,15 @@ void BLASFUNC(zgemm3m)(char *, char *, blasint *, blasint *, blasint *, double * | |||||
void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | void BLASFUNC(xgemm3m)(char *, char *, blasint *, blasint *, blasint *, xdouble *, | ||||
xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | xdouble *, blasint *, xdouble *, blasint *, xdouble *, xdouble *, blasint *); | ||||
void BLASFUNC(sgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(dgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
void BLASFUNC(cgemmt)(char*, char *, char *, blasint *, blasint *, float *, | |||||
float *, blasint *, float *, blasint *, float *, float *, blasint *); | |||||
void BLASFUNC(zgemmt)(char*, char *, char *, blasint *, blasint *, double *, | |||||
double *, blasint *, double *, blasint *, double *, double *, blasint *); | |||||
int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | int BLASFUNC(sge2mm)(char *, char *, char *, blasint *, blasint *, | ||||
float *, float *, blasint *, float *, blasint *, | float *, float *, blasint *, float *, blasint *, | ||||
float *, float *, blasint *); | float *, float *, blasint *); | ||||
@@ -96,7 +96,7 @@ | |||||
INTEGER ICAMAXTEST | INTEGER ICAMAXTEST | ||||
EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | EXTERNAL SCASUMTEST, SCNRM2TEST, ICAMAXTEST | ||||
* .. External Subroutines .. | * .. External Subroutines .. | ||||
EXTERNAL CSCAL, CSSCALTEST, CTEST, ITEST1, STEST1 | |||||
EXTERNAL CSCALTEST, CSSCALTEST, CTEST, ITEST1, STEST1 | |||||
* .. Intrinsic Functions .. | * .. Intrinsic Functions .. | ||||
INTRINSIC MAX | INTRINSIC MAX | ||||
* .. Common blocks .. | * .. Common blocks .. | ||||
@@ -214,8 +214,8 @@ | |||||
CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | CALL STEST1(SCASUMTEST(N,CX,INCX),STRUE4(NP1), | ||||
+ STRUE4(NP1),SFAC) | + STRUE4(NP1),SFAC) | ||||
ELSE IF (ICASE.EQ.8) THEN | ELSE IF (ICASE.EQ.8) THEN | ||||
* .. CSCAL .. | |||||
CALL CSCAL(N,CA,CX,INCX) | |||||
* .. CSCALTEST .. | |||||
CALL CSCALTEST(N,CA,CX,INCX) | |||||
CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | CALL CTEST(LEN,CX,CTRUE5(1,NP1,INCX),CTRUE5(1,NP1,INCX), | ||||
+ SFAC) | + SFAC) | ||||
ELSE IF (ICASE.EQ.9) THEN | ELSE IF (ICASE.EQ.9) THEN | ||||
@@ -236,14 +236,14 @@ | |||||
* | * | ||||
INCX = 1 | INCX = 1 | ||||
IF (ICASE.EQ.8) THEN | IF (ICASE.EQ.8) THEN | ||||
* CSCAL | |||||
* CSCALTEST | |||||
* Add a test for alpha equal to zero. | * Add a test for alpha equal to zero. | ||||
CA = (0.0E0,0.0E0) | CA = (0.0E0,0.0E0) | ||||
DO 80 I = 1, 5 | DO 80 I = 1, 5 | ||||
MWPCT(I) = (0.0E0,0.0E0) | MWPCT(I) = (0.0E0,0.0E0) | ||||
MWPCS(I) = (1.0E0,1.0E0) | MWPCS(I) = (1.0E0,1.0E0) | ||||
80 CONTINUE | 80 CONTINUE | ||||
CALL CSCAL(5,CA,CX,INCX) | |||||
CALL CSCALTEST(5,CA,CX,INCX) | |||||
CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | CALL CTEST(5,CX,MWPCT,MWPCS,SFAC) | ||||
ELSE IF (ICASE.EQ.9) THEN | ELSE IF (ICASE.EQ.9) THEN | ||||
* CSSCALTEST | * CSSCALTEST | ||||
@@ -685,7 +685,7 @@ real *sfac; | |||||
static integer i__; | static integer i__; | ||||
extern /* Subroutine */ int ctest_(); | extern /* Subroutine */ int ctest_(); | ||||
static complex mwpcs[5], mwpct[5]; | static complex mwpcs[5], mwpct[5]; | ||||
extern /* Subroutine */ int itest1_(), stest1_(); | |||||
extern /* Subroutine */ int cscaltest_(), itest1_(), stest1_(); | |||||
static complex cx[8]; | static complex cx[8]; | ||||
extern real scnrm2test_(); | extern real scnrm2test_(); | ||||
static integer np1; | static integer np1; | ||||
@@ -727,7 +727,7 @@ real *sfac; | |||||
stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | stest1_(&r__1, &strue4[np1 - 1], &strue4[np1 - 1], sfac); | ||||
} else if (combla_1.icase == 8) { | } else if (combla_1.icase == 8) { | ||||
/* .. CSCAL .. */ | /* .. CSCAL .. */ | ||||
cscal_(&combla_1.n, &ca, cx, &combla_1.incx); | |||||
cscaltest_(&combla_1.n, &ca, cx, &combla_1.incx); | |||||
ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | ctest_(&len, cx, &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], | ||||
&ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | &ctrue5[(np1 + combla_1.incx * 5 << 3) - 48], sfac); | ||||
} else if (combla_1.icase == 9) { | } else if (combla_1.icase == 9) { | ||||
@@ -761,7 +761,7 @@ real *sfac; | |||||
mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | mwpcs[i__1].r = (float)1., mwpcs[i__1].i = (float)1.; | ||||
/* L80: */ | /* L80: */ | ||||
} | } | ||||
cscal_(&c__5, &ca, cx, &combla_1.incx); | |||||
cscaltest_(&c__5, &ca, cx, &combla_1.incx); | |||||
ctest_(&c__5, cx, mwpct, mwpcs, sfac); | ctest_(&c__5, cx, mwpct, mwpcs, sfac); | ||||
} else if (combla_1.icase == 9) { | } else if (combla_1.icase == 9) { | ||||
/* CSSCALTEST */ | /* CSSCALTEST */ | ||||
@@ -35,29 +35,26 @@ | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include "common.h" | #include "common.h" | ||||
#ifdef FUNCTION_PROFILE | |||||
#include "functable.h" | |||||
#endif | |||||
#ifndef COMPLEX | #ifndef COMPLEX | ||||
#define SMP_THRESHOLD_MIN 65536.0 | #define SMP_THRESHOLD_MIN 65536.0 | ||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#define ERROR_NAME "QGEMT " | |||||
#define ERROR_NAME "QGEMMT " | |||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
#define ERROR_NAME "DGEMT " | |||||
#define ERROR_NAME "DGEMMT " | |||||
#elif defined(BFLOAT16) | #elif defined(BFLOAT16) | ||||
#define ERROR_NAME "SBGEMT " | |||||
#define ERROR_NAME "SBGEMMT " | |||||
#else | #else | ||||
#define ERROR_NAME "SGEMT " | |||||
#define ERROR_NAME "SGEMMT " | |||||
#endif | #endif | ||||
#else | #else | ||||
#define SMP_THRESHOLD_MIN 8192.0 | #define SMP_THRESHOLD_MIN 8192.0 | ||||
#ifdef XDOUBLE | #ifdef XDOUBLE | ||||
#define ERROR_NAME "XGEMT " | |||||
#define ERROR_NAME "XGEMMT " | |||||
#elif defined(DOUBLE) | #elif defined(DOUBLE) | ||||
#define ERROR_NAME "ZGEMT " | |||||
#define ERROR_NAME "ZGEMMT " | |||||
#else | #else | ||||
#define ERROR_NAME "CGEMT " | |||||
#define ERROR_NAME "CGEMMT " | |||||
#endif | #endif | ||||
#endif | #endif | ||||
@@ -68,18 +65,22 @@ | |||||
#ifndef CBLAS | #ifndef CBLAS | ||||
void NAME(char *UPLO, char *TRANSA, char *TRANSB, | void NAME(char *UPLO, char *TRANSA, char *TRANSB, | ||||
blasint * M, blasint * N, blasint * K, | |||||
blasint * M, blasint * K, | |||||
FLOAT * Alpha, | FLOAT * Alpha, | ||||
IFLOAT * a, blasint * ldA, | IFLOAT * a, blasint * ldA, | ||||
IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | IFLOAT * b, blasint * ldB, FLOAT * Beta, FLOAT * c, blasint * ldC) | ||||
{ | { | ||||
blasint m, n, k; | |||||
blasint m, k; | |||||
blasint lda, ldb, ldc; | blasint lda, ldb, ldc; | ||||
int transa, transb, uplo; | int transa, transb, uplo; | ||||
blasint info; | blasint info; | ||||
char transA, transB, Uplo; | char transA, transB, Uplo; | ||||
blasint nrowa, nrowb; | |||||
#if defined(COMPLEX) | |||||
blasint ncolb; | |||||
#endif | |||||
IFLOAT *buffer; | IFLOAT *buffer; | ||||
IFLOAT *aa, *bb; | IFLOAT *aa, *bb; | ||||
FLOAT *cc; | FLOAT *cc; | ||||
@@ -92,7 +93,6 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||||
PRINT_DEBUG_NAME; | PRINT_DEBUG_NAME; | ||||
m = *M; | m = *M; | ||||
n = *N; | |||||
k = *K; | k = *K; | ||||
#if defined(COMPLEX) | #if defined(COMPLEX) | ||||
@@ -159,32 +159,47 @@ void NAME(char *UPLO, char *TRANSA, char *TRANSB, | |||||
if (Uplo == 'L') | if (Uplo == 'L') | ||||
uplo = 1; | uplo = 1; | ||||
nrowa = m; | |||||
if (transa & 1) nrowa = k; | |||||
nrowb = k; | |||||
#if defined(COMPLEX) | |||||
ncolb = m; | |||||
#endif | |||||
if (transb & 1) { | |||||
nrowb = m; | |||||
#if defined(COMPLEX) | |||||
ncolb = k; | |||||
#endif | |||||
} | |||||
info = 0; | info = 0; | ||||
if (uplo < 0) | |||||
info = 14; | |||||
if (ldc < m) | |||||
if (ldc < MAX(1, m)) | |||||
info = 13; | info = 13; | ||||
if (ldb < MAX(1, nrowb)) | |||||
info = 10; | |||||
if (lda < MAX(1, nrowa)) | |||||
info = 8; | |||||
if (k < 0) | if (k < 0) | ||||
info = 5; | info = 5; | ||||
if (n < 0) | |||||
info = 4; | |||||
if (m < 0) | if (m < 0) | ||||
info = 3; | |||||
info = 4; | |||||
if (transb < 0) | if (transb < 0) | ||||
info = 2; | |||||
info = 3; | |||||
if (transa < 0) | if (transa < 0) | ||||
info = 2; | |||||
if (uplo < 0) | |||||
info = 1; | info = 1; | ||||
if (info) { | |||||
if (info != 0) { | |||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | ||||
return; | return; | ||||
} | } | ||||
#else | #else | ||||
void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | ||||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint M, | |||||
blasint N, blasint k, | |||||
enum CBLAS_TRANSPOSE TransA, enum CBLAS_TRANSPOSE TransB, blasint m, | |||||
blasint k, | |||||
#ifndef COMPLEX | #ifndef COMPLEX | ||||
FLOAT alpha, | FLOAT alpha, | ||||
IFLOAT * A, blasint LDA, | IFLOAT * A, blasint LDA, | ||||
@@ -205,17 +220,23 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
int transa, transb, uplo; | int transa, transb, uplo; | ||||
blasint info; | blasint info; | ||||
blasint m, n, lda, ldb; | |||||
blasint lda, ldb; | |||||
FLOAT *a, *b; | FLOAT *a, *b; | ||||
#if defined(COMPLEX) | |||||
blasint nrowb, ncolb; | |||||
#endif | |||||
XFLOAT *buffer; | XFLOAT *buffer; | ||||
PRINT_DEBUG_CNAME; | PRINT_DEBUG_CNAME; | ||||
uplo = -1; | |||||
transa = -1; | transa = -1; | ||||
transb = -1; | transb = -1; | ||||
info = 0; | info = 0; | ||||
if (order == CblasColMajor) { | if (order == CblasColMajor) { | ||||
if (Uplo == CblasUpper) uplo = 0; | |||||
if (Uplo == CblasLower) uplo = 1; | |||||
if (TransA == CblasNoTrans) | if (TransA == CblasNoTrans) | ||||
transa = 0; | transa = 0; | ||||
@@ -248,9 +269,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
transb = 3; | transb = 3; | ||||
#endif | #endif | ||||
m = M; | |||||
n = N; | |||||
a = (void *)A; | a = (void *)A; | ||||
b = (void *)B; | b = (void *)B; | ||||
lda = LDA; | lda = LDA; | ||||
@@ -258,23 +276,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
info = -1; | info = -1; | ||||
if (ldc < m) | |||||
blasint nrowa; | |||||
#if !defined(COMPLEX) | |||||
blasint nrowb; | |||||
#endif | |||||
nrowa = m; | |||||
if (transa & 1) nrowa = k; | |||||
nrowb = k; | |||||
#if defined(COMPLEX) | |||||
ncolb = m; | |||||
#endif | |||||
if (transb & 1) { | |||||
nrowb = m; | |||||
#if defined(COMPLEX) | |||||
ncolb = k; | |||||
#endif | |||||
} | |||||
if (ldc < MAX(1, m)) | |||||
info = 13; | info = 13; | ||||
if (ldb < MAX(1, nrowb)) | |||||
info = 10; | |||||
if (lda < MAX(1, nrowa)) | |||||
info = 8; | |||||
if (k < 0) | if (k < 0) | ||||
info = 5; | info = 5; | ||||
if (n < 0) | |||||
info = 4; | |||||
if (m < 0) | if (m < 0) | ||||
info = 3; | |||||
info = 4; | |||||
if (transb < 0) | if (transb < 0) | ||||
info = 2; | |||||
info = 3; | |||||
if (transa < 0) | if (transa < 0) | ||||
info = 2; | |||||
if (uplo < 0) | |||||
info = 1; | info = 1; | ||||
} | } | ||||
if (order == CblasRowMajor) { | if (order == CblasRowMajor) { | ||||
m = N; | |||||
n = M; | |||||
a = (void *)B; | a = (void *)B; | ||||
b = (void *)A; | b = (void *)A; | ||||
@@ -282,6 +319,9 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
lda = LDB; | lda = LDB; | ||||
ldb = LDA; | ldb = LDA; | ||||
if (Uplo == CblasUpper) uplo = 0; | |||||
if (Uplo == CblasLower) uplo = 1; | |||||
if (TransB == CblasNoTrans) | if (TransB == CblasNoTrans) | ||||
transa = 0; | transa = 0; | ||||
if (TransB == CblasTrans) | if (TransB == CblasTrans) | ||||
@@ -315,29 +355,42 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
info = -1; | info = -1; | ||||
if (ldc < m) | |||||
blasint ncola; | |||||
#if !defined(COMPLEX) | |||||
blasint ncolb; | |||||
#endif | |||||
ncola = m; | |||||
if (transa & 1) ncola = k; | |||||
ncolb = k; | |||||
#if defined(COMPLEX) | |||||
nrowb = m; | |||||
#endif | |||||
if (transb & 1) { | |||||
#if defined(COMPLEX) | |||||
nrowb = k; | |||||
#endif | |||||
ncolb = m; | |||||
} | |||||
if (ldc < MAX(1,m)) | |||||
info = 13; | info = 13; | ||||
if (ldb < MAX(1, ncolb)) | |||||
info = 8; | |||||
if (lda < MAX(1, ncola)) | |||||
info = 10; | |||||
if (k < 0) | if (k < 0) | ||||
info = 5; | info = 5; | ||||
if (n < 0) | |||||
info = 4; | |||||
if (m < 0) | if (m < 0) | ||||
info = 3; | |||||
info = 4; | |||||
if (transb < 0) | if (transb < 0) | ||||
info = 2; | info = 2; | ||||
if (transa < 0) | if (transa < 0) | ||||
info = 3; | |||||
if (uplo < 0) | |||||
info = 1; | info = 1; | ||||
} | } | ||||
uplo = -1; | |||||
if (Uplo == CblasUpper) | |||||
uplo = 0; | |||||
if (Uplo == CblasLower) | |||||
uplo = 1; | |||||
if (uplo < 0) | |||||
info = 14; | |||||
if (info >= 0) { | if (info >= 0) { | ||||
BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | BLASFUNC(xerbla) (ERROR_NAME, &info, sizeof(ERROR_NAME)); | ||||
return; | return; | ||||
@@ -407,37 +460,48 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
#endif | #endif | ||||
if ((m == 0) || (n == 0)) | |||||
if (m == 0) | |||||
return; | return; | ||||
IDEBUG_START; | IDEBUG_START; | ||||
FUNCTION_PROFILE_START(); | |||||
#if defined(COMPLEX) | |||||
if (transb > 1){ | |||||
#ifndef CBLAS | |||||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
#else | |||||
if (order == CblasColMajor) | |||||
IMATCOPY_K_CNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
if (order == CblasRowMajor) | |||||
IMATCOPY_K_RNC(nrowb, ncolb, (FLOAT)(1.0), (FLOAT)(0.0), b, ldb); | |||||
#endif | |||||
} | |||||
#endif | |||||
const blasint incb = (transb == 0) ? 1 : ldb; | |||||
const blasint incb = ((transb & 1) == 0) ? 1 : ldb; | |||||
if (uplo == 1) { | if (uplo == 1) { | ||||
for (i = 0; i < n; i++) { | |||||
j = n - i; | |||||
for (i = 0; i < m; i++) { | |||||
j = m - i; | |||||
l = j; | l = j; | ||||
#if defined(COMPLEX) | #if defined(COMPLEX) | ||||
aa = a + i * 2; | aa = a + i * 2; | ||||
bb = b + i * ldb * 2; | bb = b + i * ldb * 2; | ||||
if (transa) { | |||||
l = k; | |||||
if (transa & 1) { | |||||
aa = a + lda * i * 2; | aa = a + lda * i * 2; | ||||
bb = b + i * 2; | |||||
} | } | ||||
if (transb & 1) | |||||
bb = b + i * 2; | |||||
cc = c + i * 2 * ldc + i * 2; | cc = c + i * 2 * ldc + i * 2; | ||||
#else | #else | ||||
aa = a + i; | aa = a + i; | ||||
bb = b + i * ldb; | bb = b + i * ldb; | ||||
if (transa) { | |||||
l = k; | |||||
if (transa & 1) { | |||||
aa = a + lda * i; | aa = a + lda * i; | ||||
bb = b + i; | |||||
} | } | ||||
if (transb & 1) | |||||
bb = b + i; | |||||
cc = c + i * ldc + i; | cc = c + i * ldc + i; | ||||
#endif | #endif | ||||
@@ -447,7 +511,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
NULL, 0); | NULL, 0); | ||||
if (alpha_r == ZERO && alpha_i == ZERO) | if (alpha_r == ZERO && alpha_i == ZERO) | ||||
return; | |||||
continue; | |||||
#else | #else | ||||
if (beta != ONE) | if (beta != ONE) | ||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | ||||
@@ -458,8 +522,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
IDEBUG_START; | IDEBUG_START; | ||||
FUNCTION_PROFILE_START(); | |||||
buffer_size = j + k + 128 / sizeof(FLOAT); | buffer_size = j + k + 128 / sizeof(FLOAT); | ||||
#ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
buffer_size += 160 / sizeof(FLOAT); | buffer_size += 160 / sizeof(FLOAT); | ||||
@@ -479,20 +541,34 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
#endif | #endif | ||||
#if defined(COMPLEX) | #if defined(COMPLEX) | ||||
if (!(transa & 1)) | |||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | ||||
aa, lda, bb, incb, cc, 1, | aa, lda, bb, incb, cc, 1, | ||||
buffer); | buffer); | ||||
else | |||||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||||
aa, lda, bb, incb, cc, 1, | |||||
buffer); | |||||
#else | #else | ||||
if (!(transa & 1)) | |||||
(gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | (gemv[(int)transa]) (j, k, 0, alpha, aa, lda, | ||||
bb, incb, cc, 1, buffer); | bb, incb, cc, 1, buffer); | ||||
else | |||||
(gemv[(int)transa]) (k, j, 0, alpha, aa, lda, | |||||
bb, incb, cc, 1, buffer); | |||||
#endif | #endif | ||||
#ifdef SMP | #ifdef SMP | ||||
} else { | } else { | ||||
if (!(transa & 1)) | |||||
(gemv_thread[(int)transa]) (j, k, alpha, aa, | (gemv_thread[(int)transa]) (j, k, alpha, aa, | ||||
lda, bb, incb, cc, | lda, bb, incb, cc, | ||||
1, buffer, | 1, buffer, | ||||
nthreads); | nthreads); | ||||
else | |||||
(gemv_thread[(int)transa]) (k, j, alpha, aa, | |||||
lda, bb, incb, cc, | |||||
1, buffer, | |||||
nthreads); | |||||
} | } | ||||
#endif | #endif | ||||
@@ -501,21 +577,19 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
} | } | ||||
} else { | } else { | ||||
for (i = 0; i < n; i++) { | |||||
for (i = 0; i < m; i++) { | |||||
j = i + 1; | j = i + 1; | ||||
l = j; | l = j; | ||||
#if defined COMPLEX | #if defined COMPLEX | ||||
bb = b + i * ldb * 2; | bb = b + i * ldb * 2; | ||||
if (transa) { | |||||
l = k; | |||||
if (transb & 1) { | |||||
bb = b + i * 2; | bb = b + i * 2; | ||||
} | } | ||||
cc = c + i * 2 * ldc; | cc = c + i * 2 * ldc; | ||||
#else | #else | ||||
bb = b + i * ldb; | bb = b + i * ldb; | ||||
if (transa) { | |||||
l = k; | |||||
if (transb & 1) { | |||||
bb = b + i; | bb = b + i; | ||||
} | } | ||||
cc = c + i * ldc; | cc = c + i * ldc; | ||||
@@ -527,7 +601,7 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
NULL, 0); | NULL, 0); | ||||
if (alpha_r == ZERO && alpha_i == ZERO) | if (alpha_r == ZERO && alpha_i == ZERO) | ||||
return; | |||||
continue; | |||||
#else | #else | ||||
if (beta != ONE) | if (beta != ONE) | ||||
SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | SCAL_K(l, 0, 0, beta, cc, 1, NULL, 0, NULL, 0); | ||||
@@ -537,8 +611,6 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
#endif | #endif | ||||
IDEBUG_START; | IDEBUG_START; | ||||
FUNCTION_PROFILE_START(); | |||||
buffer_size = j + k + 128 / sizeof(FLOAT); | buffer_size = j + k + 128 / sizeof(FLOAT); | ||||
#ifdef WINDOWS_ABI | #ifdef WINDOWS_ABI | ||||
buffer_size += 160 / sizeof(FLOAT); | buffer_size += 160 / sizeof(FLOAT); | ||||
@@ -558,32 +630,41 @@ void CNAME(enum CBLAS_ORDER order, enum CBLAS_UPLO Uplo, | |||||
#endif | #endif | ||||
#if defined(COMPLEX) | #if defined(COMPLEX) | ||||
if (!(transa & 1)) | |||||
(gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | (gemv[(int)transa]) (j, k, 0, alpha_r, alpha_i, | ||||
a, lda, bb, incb, cc, 1, | a, lda, bb, incb, cc, 1, | ||||
buffer); | buffer); | ||||
else | |||||
(gemv[(int)transa]) (k, j, 0, alpha_r, alpha_i, | |||||
a, lda, bb, incb, cc, 1, | |||||
buffer); | |||||
#else | #else | ||||
if (!(transa & 1)) | |||||
(gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | (gemv[(int)transa]) (j, k, 0, alpha, a, lda, bb, | ||||
incb, cc, 1, buffer); | incb, cc, 1, buffer); | ||||
else | |||||
(gemv[(int)transa]) (k, j, 0, alpha, a, lda, bb, | |||||
incb, cc, 1, buffer); | |||||
#endif | #endif | ||||
#ifdef SMP | #ifdef SMP | ||||
} else { | } else { | ||||
if (!(transa & 1)) | |||||
(gemv_thread[(int)transa]) (j, k, alpha, a, lda, | (gemv_thread[(int)transa]) (j, k, alpha, a, lda, | ||||
bb, incb, cc, 1, | bb, incb, cc, 1, | ||||
buffer, nthreads); | buffer, nthreads); | ||||
else | |||||
(gemv_thread[(int)transa]) (k, j, alpha, a, lda, | |||||
bb, incb, cc, 1, | |||||
buffer, nthreads); | |||||
} | } | ||||
#endif | #endif | ||||
STACK_FREE(buffer); | STACK_FREE(buffer); | ||||
} | } | ||||
} | } | ||||
FUNCTION_PROFILE_END(COMPSIZE * COMPSIZE, | |||||
args.m * args.k + args.k * args.n + | |||||
args.m * args.n, 2 * args.m * args.n * args.k); | |||||
IDEBUG_END; | IDEBUG_END; | ||||
return; | return; | ||||
} | |||||
} |
@@ -149,10 +149,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||||
#endif | #endif | ||||
if ( *lda > *ldb ) | |||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT); | |||||
else | |||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT); | |||||
if ( *rows > *cols ) | |||||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT); | |||||
else | |||||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT); | |||||
b = malloc(msize); | b = malloc(msize); | ||||
if ( b == NULL ) | if ( b == NULL ) | ||||
@@ -96,12 +96,6 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||||
else | else | ||||
{ | { | ||||
dp2 = *dd2 * dy1; | dp2 = *dd2 * dy1; | ||||
if(dp2 == ZERO) | |||||
{ | |||||
dflag = -TWO; | |||||
dparam[0] = dflag; | |||||
return; | |||||
} | |||||
dp1 = *dd1 * *dx1; | dp1 = *dd1 * *dx1; | ||||
dq2 = dp2 * dy1; | dq2 = dp2 * dy1; | ||||
dq1 = dp1 * *dx1; | dq1 = dp1 * *dx1; | ||||
@@ -113,24 +107,10 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||||
dh12 = dp2 / dp1; | dh12 = dp2 / dp1; | ||||
du = ONE - dh12 * dh21; | du = ONE - dh12 * dh21; | ||||
if(du > ZERO) | |||||
{ | |||||
dflag = ZERO; | |||||
*dd1 = *dd1 / du; | |||||
*dd2 = *dd2 / du; | |||||
*dx1 = *dx1 * du; | |||||
} else { | |||||
dflag = -ONE; | |||||
dh11 = ZERO; | |||||
dh12 = ZERO; | |||||
dh21 = ZERO; | |||||
dh22 = ZERO; | |||||
*dd1 = ZERO; | |||||
*dd2 = ZERO; | |||||
*dx1 = ZERO; | |||||
} | |||||
dflag = ZERO; | |||||
*dd1 = *dd1 / du; | |||||
*dd2 = *dd2 / du; | |||||
*dx1 = *dx1 * du; | |||||
} | } | ||||
else | else | ||||
@@ -171,10 +171,10 @@ void CNAME( enum CBLAS_ORDER CORDER, enum CBLAS_TRANSPOSE CTRANS, blasint crows, | |||||
} | } | ||||
#endif | #endif | ||||
if ( *lda > *ldb ) | |||||
msize = (size_t)(*lda) * (*ldb) * sizeof(FLOAT) * 2; | |||||
if ( *rows > *cols ) | |||||
msize = (size_t)(*rows) * (*ldb) * sizeof(FLOAT) * 2; | |||||
else | else | ||||
msize = (size_t)(*ldb) * (*ldb) * sizeof(FLOAT) * 2; | |||||
msize = (size_t)(*cols) * (*ldb) * sizeof(FLOAT) * 2; | |||||
b = malloc(msize); | b = malloc(msize); | ||||
if ( b == NULL ) | if ( b == NULL ) | ||||
@@ -40,7 +40,6 @@ int CNAME(BLASLONG rows, BLASLONG cols, FLOAT alpha_r, FLOAT alpha_i, FLOAT *a, | |||||
if ( rows <= 0 ) return(0); | if ( rows <= 0 ) return(0); | ||||
if ( cols <= 0 ) return(0); | if ( cols <= 0 ) return(0); | ||||
if ( alpha_r == 1.0 && alpha_i == 0.0 ) return (0); | |||||
aptr = a; | aptr = a; | ||||
lda *= 2; | lda *= 2; | ||||
@@ -33,7 +33,7 @@ int CNAME(BLASLONG n, FLOAT alpha, FLOAT *x, BLASLONG inc_x, FLOAT beta, FLOAT * | |||||
BLASLONG i=0; | BLASLONG i=0; | ||||
BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
ix = 0; | ix = 0; | ||||
iy = 0; | iy = 0; | ||||
@@ -42,7 +42,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da, FLOAT *x, BLAS | |||||
BLASLONG i=0; | BLASLONG i=0; | ||||
BLASLONG ix,iy; | BLASLONG ix,iy; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
if ( da == 0.0 ) return(0); | if ( da == 0.0 ) return(0); | ||||
ix = 0; | ix = 0; | ||||
@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
BLASLONG i=0; | BLASLONG i=0; | ||||
BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
while(i < n) | while(i < n) | ||||
{ | { | ||||
@@ -46,7 +46,7 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
double dot = 0.0 ; | double dot = 0.0 ; | ||||
if ( n < 0 ) return(dot); | |||||
if ( n < 1 ) return(dot); | |||||
while(i < n) | while(i < n) | ||||
{ | { | ||||
@@ -41,7 +41,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT *x, | |||||
BLASLONG ix=0,iy=0; | BLASLONG ix=0,iy=0; | ||||
FLOAT temp; | FLOAT temp; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
while(i < n) | while(i < n) | ||||
{ | { | ||||
@@ -44,7 +44,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r, FLOAT da_i, | |||||
BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
if ( da_r == 0.0 && da_i == 0.0 ) return(0); | if ( da_r == 0.0 && da_i == 0.0 ) return(0); | ||||
ix = 0; | ix = 0; | ||||
@@ -43,7 +43,7 @@ int CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y) | |||||
BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
inc_x2 = 2 * inc_x; | inc_x2 = 2 * inc_x; | ||||
inc_y2 = 2 * inc_y; | inc_y2 = 2 * inc_y; | ||||
@@ -45,7 +45,7 @@ int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT dummy3, FLOAT dumm | |||||
BLASLONG inc_x2; | BLASLONG inc_x2; | ||||
BLASLONG inc_y2; | BLASLONG inc_y2; | ||||
if ( n < 0 ) return(0); | |||||
if ( n <= 0 ) return(0); | |||||
inc_x2 = 2 * inc_x; | inc_x2 = 2 * inc_x; | ||||
inc_y2 = 2 * inc_y; | inc_y2 = 2 * inc_y; | ||||