Browse Source

Merge pull request #5081 from XiWeiGu/kernel_generic_fixed_cscal_zscal

kernel/generic: Fixed cscal and zscal
tags/v0.3.30
Martin Kroeker GitHub 3 months ago
parent
commit
11ff18bb0f
No known key found for this signature in database GPG Key ID: B5690EEEBB952194
4 changed files with 570 additions and 51 deletions
  1. +2
    -2
      interface/zscal.c
  2. +40
    -49
      kernel/arm/zscal.c
  3. +474
    -0
      utest/test_gemv.c
  4. +54
    -0
      utest/test_zscal.c

+ 2
- 2
interface/zscal.c View File

@@ -98,7 +98,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
if (nthreads == 1) { if (nthreads == 1) {
#endif #endif


SCAL_K(n, 0, 0, alpha[0], alpha[1], x, incx, NULL, 0, NULL, 0);
SCAL_K(n, 0, 0, alpha[0], alpha[1], x, incx, NULL, 0, NULL, 1);


#ifdef SMP #ifdef SMP
} else { } else {
@@ -108,7 +108,7 @@ void CNAME(blasint n, FLOAT alpha_r, void *vx, blasint incx){
mode = BLAS_SINGLE | BLAS_COMPLEX; mode = BLAS_SINGLE | BLAS_COMPLEX;
#endif #endif


blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 0, (int (*)(void))SCAL_K, nthreads);
blas_level1_thread(mode, n, 0, 0, alpha, x, incx, NULL, 0, NULL, 1, (int (*)(void))SCAL_K, nthreads);


} }
#endif #endif


+ 40
- 49
kernel/arm/zscal.c View File

@@ -27,65 +27,56 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


/************************************************************************************** /**************************************************************************************
* 2013/09/14 Saar * 2013/09/14 Saar
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
* BLASTEST float : OK
* BLASTEST double : OK
* CTEST : OK
* TEST : OK
* *
**************************************************************************************/ **************************************************************************************/


#include "common.h" #include "common.h"


// The c/zscal_k function is called not only by cblas_c/zscal but also by other upper-level interfaces.
// In certain cases, the expected return values for cblas_s/zscal differ from those of other upper-level interfaces.
// To handle this, we use the dummy2 parameter to differentiate between them.
int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2) int CNAME(BLASLONG n, BLASLONG dummy0, BLASLONG dummy1, FLOAT da_r,FLOAT da_i, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y, FLOAT *dummy, BLASLONG dummy2)
{ {
BLASLONG i=0;
BLASLONG inc_x2;
BLASLONG ip = 0;
FLOAT temp;
BLASLONG i = 0;
BLASLONG inc_x2;
BLASLONG ip = 0;
FLOAT temp;


if ( (n <= 0) || (inc_x <= 0))
return(0);
if ((n <= 0) || (inc_x <= 0))
return(0);


inc_x2 = 2 * inc_x;
if (dummy2 == 0) {
for (i = 0; i < n; i++)
{
if (da_r == 0.0 && da_i == 0.0)
{
x[ip] = 0.0;
x[ip+1] = 0.0;
}
else
{
temp = da_r * x[ip] - da_i * x[ip+1];
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
x[ip] = temp;
}


inc_x2 = 2 * inc_x;
for ( i=0; i<n; i++ )
{
if ( da_r == 0.0 )
{
if ( da_i == 0.0 )
{
temp = 0.0;
x[ip+1] = 0.0 ;
}
else
{
temp = - da_i * x[ip+1] ;
if (isnan(x[ip]) || isinf(x[ip])) temp = NAN;
if (!isinf(x[ip+1]))
x[ip+1] = da_i * x[ip] ;
else x[ip+1] = NAN;
}
}
else
{
if ( da_i == 0.0 )
{
temp = da_r * x[ip] ;
x[ip+1] = da_r * x[ip+1];
}
else
{
temp = da_r * x[ip] - da_i * x[ip+1] ;
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;
}
}
x[ip] = temp;
ip += inc_x2;
}
return(0);
}
for (i = 0; i < n; i++)
{
temp = da_r * x[ip] - da_i * x[ip+1];
x[ip+1] = da_r * x[ip+1] + da_i * x[ip] ;


ip += inc_x2;
}

return(0);
x[ip] = temp;
ip += inc_x2;
}


return(0);
} }



+ 474
- 0
utest/test_gemv.c View File

@@ -128,3 +128,477 @@ CTEST(dgemv, 0_nan_inf_incy_2)
} }


#endif #endif

#ifdef BUILD_COMPLEX

CTEST(cgemv, 0_nan_inf)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
float alpha[2] = {0.0, 0.0};
float beta[2] = {0.0, 0.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17 * 2];
float Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = NAN;

Y[i + 2] = INFINITY;
Y[i + 3] = INFINITY;
}
Y[2 * N - 1] = NAN;
Y[2 * N - 2] = NAN;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i ++)
ASSERT_TRUE(Y[i] == 0.0);
}

CTEST(cgemv, 0_nan_inf_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
float alpha[2] = {0.0, 0.0};
float beta[2] = {0.0, 0.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17];
float Y[17 * 2 * 2];
float *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = NAN;
ay += 4;
ay[0] = INFINITY;
ay[1] = INFINITY;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = NAN;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i ++)
ASSERT_TRUE(Y[i] == 0.0);
}

CTEST(cgemv, 0_2_nan_1_inf_1)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
float alpha[2] = {0.0, 0.0};
float beta[2] = {0.0, 2.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17 * 2];
float Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = 1.0;

Y[i + 2] = INFINITY;
Y[i + 3] = 1.0;
}
Y[2 * N - 2] = NAN;
Y[2 * N - 1] = 1.0;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i += 2) {
if ((i >> 1) % 2){
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isinf(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}

CTEST(cgemv, 0_2_nan_1_inf_1_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
float alpha[2] = {0.0, 0.0};
float beta[2] = {0.0, 2.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17];
float Y[17 * 2 * 2];
float *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = 1.0;
ay += 4;
ay[0] = INFINITY;
ay[1] = 1.0;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = 1.0;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i += 2) {
if ((i >> 1) % 2) {
ASSERT_TRUE(Y[i] == 0.0);
ASSERT_TRUE(Y[i + 1] == 0.0);
}
else {
if ((i >> 2) % 2) {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isinf(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}
}

CTEST(cgemv, 2_0_nan_1_inf_1)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
float alpha[2] = {0.0, 0.0};
float beta[2] = {2.0, 0.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17 * 2];
float Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = 1.0;

Y[i + 2] = INFINITY;
Y[i + 3] = 1.0;
}
Y[2 * N - 2] = NAN;
Y[2 * N - 1] = 1.0;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i += 2) {
if ((i >> 1) % 2){
ASSERT_TRUE(isinf(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}

CTEST(cgemv, 2_0_nan_1_inf_1_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
float alpha[2] = {0.0, 0.0};
float beta[2] = {2.0, 0.0};
char trans = 'N';
float A[17 * 17 * 4];
float X[17];
float Y[17 * 2 * 2];
float *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = 1.0;
ay += 4;
ay[0] = INFINITY;
ay[1] = 1.0;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = 1.0;
BLASFUNC(cgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i += 2) {
if ((i >> 1) % 2) {
ASSERT_TRUE(Y[i] == 0.0);
ASSERT_TRUE(Y[i + 1] == 0.0);
}
else {
if ((i >> 2) % 2) {
ASSERT_TRUE(isinf(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}
}

#endif

#ifdef BUILD_COMPLEX16

CTEST(zgemv, 0_nan_inf)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
double alpha[2] = {0.0, 0.0};
double beta[2] = {0.0, 0.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17 * 2];
double Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = NAN;

Y[i + 2] = INFINITY;
Y[i + 3] = INFINITY;
}
Y[2 * N - 1] = NAN;
Y[2 * N - 2] = NAN;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i ++)
ASSERT_TRUE(Y[i] == 0.0);
}

CTEST(zgemv, 0_nan_inf_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
double alpha[2] = {0.0, 0.0};
double beta[2] = {0.0, 0.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17];
double Y[17 * 2 * 2];
double *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = NAN;
ay += 4;
ay[0] = INFINITY;
ay[1] = INFINITY;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = NAN;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i ++)
ASSERT_TRUE(Y[i] == 0.0);
}

CTEST(zgemv, 0_2_nan_1_inf_1)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
double alpha[2] = {0.0, 0.0};
double beta[2] = {0.0, 2.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17 * 2];
double Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = 1.0;

Y[i + 2] = INFINITY;
Y[i + 3] = 1.0;
}
Y[2 * N - 2] = NAN;
Y[2 * N - 1] = 1.0;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i += 2) {
if ((i >> 1) % 2){
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isinf(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}

CTEST(zgemv, 0_2_nan_1_inf_1_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
double alpha[2] = {0.0, 0.0};
double beta[2] = {0.0, 2.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17];
double Y[17 * 2 * 2];
double *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = 1.0;
ay += 4;
ay[0] = INFINITY;
ay[1] = 1.0;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = 1.0;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i += 2) {
if ((i >> 1) % 2) {
ASSERT_TRUE(Y[i] == 0.0);
ASSERT_TRUE(Y[i + 1] == 0.0);
}
else {
if ((i >> 2) % 2) {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isinf(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}
}

CTEST(zgemv, 2_0_nan_1_inf_1)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 1;
double alpha[2] = {0.0, 0.0};
double beta[2] = {2.0, 0.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17 * 2];
double Y[17 * 2];

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
for (i = 0; i < (2 * N - 2); i += 4)
{
Y[i] = NAN;
Y[i + 1] = 1.0;

Y[i + 2] = INFINITY;
Y[i + 3] = 1.0;
}
Y[2 * N - 2] = NAN;
Y[2 * N - 1] = 1.0;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 2 * N; i += 2) {
if ((i >> 1) % 2){
ASSERT_TRUE(isinf(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}

CTEST(zgemv, 2_0_nan_1_inf_1_incy_2)
{
int i;
blasint N = 17;
blasint incX = 1;
blasint incY = 2;
double alpha[2] = {0.0, 0.0};
double beta[2] = {2.0, 0.0};
char trans = 'N';
double A[17 * 17 * 4];
double X[17];
double Y[17 * 2 * 2];
double *ay = Y;

memset(A, 0, sizeof(A));
memset(X, 0, sizeof(X));
memset(Y, 0, sizeof(Y));
for (i = 0; i < (2 * N - 2); i += 4)
{
ay[0] = NAN;
ay[1] = 1.0;
ay += 4;
ay[0] = INFINITY;
ay[1] = 1.0;
ay += 4;
}
Y[4 * N - 4] = NAN;
Y[4 * N - 3] = 1.0;
BLASFUNC(zgemv)(&trans, &N, &N, alpha, A, &N, X, &incX, beta, Y, &incY);
for (i = 0; i < 4 * N; i += 2) {
if ((i >> 1) % 2) {
ASSERT_TRUE(Y[i] == 0.0);
ASSERT_TRUE(Y[i + 1] == 0.0);
}
else {
if ((i >> 2) % 2) {
ASSERT_TRUE(isinf(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
else {
ASSERT_TRUE(isnan(Y[i]));
ASSERT_TRUE(isnan(Y[i + 1]));
}
}
}
}

#endif

+ 54
- 0
utest/test_zscal.c View File

@@ -442,6 +442,33 @@ CTEST(cscal, i_0inf_inc_2)
ASSERT_TRUE(isnan(inf[17])); ASSERT_TRUE(isnan(inf[17]));
} }


CTEST(cscal, i00_NAN)
{
blasint N=9;
blasint incX=1;
float i[] = {0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
float nan[] = {NAN, 0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
BLASFUNC(cscal)(&N, i, nan, &incX);
ASSERT_TRUE(isnan(nan[0]));
ASSERT_TRUE(isnan(nan[1]));
ASSERT_TRUE(isnan(nan[16]));
ASSERT_TRUE(isnan(nan[17]));
}

CTEST(cscal, i00_NAN_incx_2)
{
blasint N=9;
blasint incX=2;
float i[] = {0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
float nan[] = {0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN,
0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN};
BLASFUNC(cscal)(&N, i, nan, &incX);
ASSERT_TRUE(isnan(nan[0]));
ASSERT_TRUE(isnan(nan[1]));
ASSERT_TRUE(isnan(nan[16]));
ASSERT_TRUE(isnan(nan[17]));
}

#endif #endif


#ifdef BUILD_COMPLEX16 #ifdef BUILD_COMPLEX16
@@ -588,4 +615,31 @@ CTEST(zscal, i_0inf_inc_2)
ASSERT_TRUE(isnan(inf[17])); ASSERT_TRUE(isnan(inf[17]));
} }


CTEST(zscal, i00_NAN)
{
blasint N=9;
blasint incX=1;
double i[] = {0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
double nan[] = {NAN, 0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0, NAN,0};
BLASFUNC(zscal)(&N, i, nan, &incX);
ASSERT_TRUE(isnan(nan[0]));
ASSERT_TRUE(isnan(nan[1]));
ASSERT_TRUE(isnan(nan[16]));
ASSERT_TRUE(isnan(nan[17]));
}

CTEST(zscal, i00_NAN_incx_2)
{
blasint N=9;
blasint incX=2;
double i[] = {0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0, 0,0 };
double nan[] = {0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN,
0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN, 0,NAN};
BLASFUNC(zscal)(&N, i, nan, &incX);
ASSERT_TRUE(isnan(nan[0]));
ASSERT_TRUE(isnan(nan[1]));
ASSERT_TRUE(isnan(nan[16]));
ASSERT_TRUE(isnan(nan[17]));
}

#endif #endif

Loading…
Cancel
Save