@@ -11,8 +11,8 @@ endif | |||||
ifeq ($(CORE), ARMV7) | ifeq ($(CORE), ARMV7) | ||||
ifeq ($(OSNAME), Android) | ifeq ($(OSNAME), Android) | ||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a | |||||
CCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
FCOMMON_OPT += -marm -mfpu=neon -mfloat-abi=hard -march=armv7-a -Wl,--no-warn-mismatch | |||||
else | else | ||||
CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | CCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | FCOMMON_OPT += -marm -mfpu=vfpv3 -mfloat-abi=hard -march=armv7-a | ||||
@@ -29,5 +29,3 @@ ifeq ($(CORE), ARMV5) | |||||
CCOMMON_OPT += -marm -march=armv5 | CCOMMON_OPT += -marm -march=armv5 | ||||
FCOMMON_OPT += -marm -march=armv5 | FCOMMON_OPT += -marm -march=armv5 | ||||
endif | endif | ||||
@@ -296,12 +296,14 @@ endif | |||||
ifneq ($(OSNAME), WINNT) | ifneq ($(OSNAME), WINNT) | ||||
ifneq ($(OSNAME), CYGWIN_NT) | ifneq ($(OSNAME), CYGWIN_NT) | ||||
ifneq ($(OSNAME), Interix) | ifneq ($(OSNAME), Interix) | ||||
ifneq ($(OSNAME), Android) | |||||
ifdef SMP | ifdef SMP | ||||
EXTRALIB += -lpthread | EXTRALIB += -lpthread | ||||
endif | endif | ||||
endif | endif | ||||
endif | endif | ||||
endif | endif | ||||
endif | |||||
# ifeq logical or | # ifeq logical or | ||||
ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ifeq ($(OSNAME), $(filter $(OSNAME),WINNT CYGWIN_NT Interix)) | ||||
@@ -379,7 +381,7 @@ FCOMMON_OPT += -m128bit-long-double | |||||
endif | endif | ||||
ifeq ($(C_COMPILER), CLANG) | ifeq ($(C_COMPILER), CLANG) | ||||
EXPRECISION = 1 | EXPRECISION = 1 | ||||
CCOMMON_OPT += -DEXPRECISION | |||||
CCOMMON_OPT += -DEXPRECISION | |||||
FCOMMON_OPT += -m128bit-long-double | FCOMMON_OPT += -m128bit-long-double | ||||
endif | endif | ||||
endif | endif | ||||
@@ -393,7 +395,7 @@ endif | |||||
ifeq ($(USE_OPENMP), 1) | ifeq ($(USE_OPENMP), 1) | ||||
#check | |||||
#check | |||||
ifeq ($(USE_THREAD), 0) | ifeq ($(USE_THREAD), 0) | ||||
$(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | $(error OpenBLAS: Cannot set both USE_OPENMP=1 and USE_THREAD=0. The USE_THREAD=0 is only for building single thread version.) | ||||
endif | endif | ||||
@@ -1185,4 +1187,3 @@ SUNPATH = /opt/sunstudio12.1 | |||||
else | else | ||||
SUNPATH = /opt/SUNWspro | SUNPATH = /opt/SUNWspro | ||||
endif | endif | ||||
@@ -34,6 +34,13 @@ | |||||
#ifndef _LAPACKE_CONFIG_H_ | #ifndef _LAPACKE_CONFIG_H_ | ||||
#define _LAPACKE_CONFIG_H_ | #define _LAPACKE_CONFIG_H_ | ||||
// For Android prior to API 21 (no <complex> include) | |||||
#if defined(__ANDROID__) | |||||
#if __ANDROID_API__ < 21 | |||||
#define LAPACK_COMPLEX_STRUCTURE | |||||
#endif | |||||
#endif | |||||
#ifdef __cplusplus | #ifdef __cplusplus | ||||
#if defined(LAPACK_COMPLEX_CPP) | #if defined(LAPACK_COMPLEX_CPP) | ||||
#include <complex> | #include <complex> | ||||
@@ -46,7 +46,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
BLASLONG n, lda; | BLASLONG n, lda; | ||||
FLOAT *a; | FLOAT *a; | ||||
FLOAT temp[2]; | |||||
FLOAT temp; | |||||
BLASLONG i; | BLASLONG i; | ||||
n = args -> n; | n = args -> n; | ||||
@@ -64,12 +64,11 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
a + i * COMPSIZE, lda, NULL, 0, NULL, 0); | a + i * COMPSIZE, lda, NULL, 0, NULL, 0); | ||||
if (i < n - 1) { | if (i < n - 1) { | ||||
temp[0] = DOTC_K(n - i - 1, | |||||
temp = CREAL(DOTC_K(n - i - 1, | |||||
a + (i + 1 + i * lda) * COMPSIZE, 1, | a + (i + 1 + i * lda) * COMPSIZE, 1, | ||||
a + (i + 1 + i * lda) * COMPSIZE, 1); | |||||
GET_IMAGE(temp[1]); | |||||
a + (i + 1 + i * lda) * COMPSIZE, 1)); | |||||
*(a + (i + i * lda) * COMPSIZE + 0) += temp[0]; | |||||
*(a + (i + i * lda) * COMPSIZE + 0) += temp; | |||||
*(a + (i + i * lda) * COMPSIZE + 1) = ZERO; | *(a + (i + i * lda) * COMPSIZE + 1) = ZERO; | ||||
GEMV_U(n - i - 1, i, 0, dp1, ZERO, | GEMV_U(n - i - 1, i, 0, dp1, ZERO, | ||||
@@ -46,7 +46,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
BLASLONG n, lda; | BLASLONG n, lda; | ||||
FLOAT *a; | FLOAT *a; | ||||
FLOAT temp[2]; | |||||
FLOAT temp; | |||||
BLASLONG i; | BLASLONG i; | ||||
n = args -> n; | n = args -> n; | ||||
@@ -64,10 +64,9 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
a + i * lda * COMPSIZE, 1, NULL, 0, NULL, 0); | a + i * lda * COMPSIZE, 1, NULL, 0, NULL, 0); | ||||
if (i < n - 1) { | if (i < n - 1) { | ||||
temp[0] = DOTC_K(n - i - 1, a + (i + (i + 1) * lda) * COMPSIZE, lda, a + (i + (i + 1) * lda) * COMPSIZE, lda); | |||||
GET_IMAGE(temp[1]); | |||||
temp = CREAL(DOTC_K(n - i - 1, a + (i + (i + 1) * lda) * COMPSIZE, lda, a + (i + (i + 1) * lda) * COMPSIZE, lda)); | |||||
*(a + (i + i * lda) * COMPSIZE + 0) += temp[0]; | |||||
*(a + (i + i * lda) * COMPSIZE + 0) += temp; | |||||
*(a + (i + i * lda) * COMPSIZE + 1) = ZERO; | *(a + (i + i * lda) * COMPSIZE + 1) = ZERO; | ||||
GEMV_O(i, n - i - 1, 0, dp1, ZERO, | GEMV_O(i, n - i - 1, 0, dp1, ZERO, | ||||
@@ -51,7 +51,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
BLASLONG n, lda; | BLASLONG n, lda; | ||||
FLOAT *a; | FLOAT *a; | ||||
FLOAT ajj[2]; | |||||
FLOAT ajj; | |||||
FLOAT *aoffset; | FLOAT *aoffset; | ||||
BLASLONG i, j; | BLASLONG i, j; | ||||
@@ -68,18 +68,17 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
for (j = 0; j < n; j++) { | for (j = 0; j < n; j++) { | ||||
ajj[0] = DOTC_K(j, a + j * 2, lda, a + j * 2, lda); | |||||
GET_IMAGE(ajj[1]); | |||||
ajj = CREAL(DOTC_K(j, a + j * 2, lda, a + j * 2, lda)); | |||||
ajj[0] = *(aoffset + j * 2) - ajj[0]; | |||||
ajj = *(aoffset + j * 2) - ajj; | |||||
if (ajj[0] <= 0){ | |||||
*(aoffset + j * 2 + 0) = ajj[0]; | |||||
if (ajj <= 0){ | |||||
*(aoffset + j * 2 + 0) = ajj; | |||||
*(aoffset + j * 2 + 1) = ZERO; | *(aoffset + j * 2 + 1) = ZERO; | ||||
return j + 1; | return j + 1; | ||||
} | } | ||||
ajj[0] = SQRT(ajj[0]); | |||||
*(aoffset + j * 2 + 0) = ajj[0]; | |||||
ajj = SQRT(ajj); | |||||
*(aoffset + j * 2 + 0) = ajj; | |||||
*(aoffset + j * 2 + 1) = ZERO; | *(aoffset + j * 2 + 1) = ZERO; | ||||
i = n - j - 1; | i = n - j - 1; | ||||
@@ -90,7 +89,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
a + j * 2, lda, | a + j * 2, lda, | ||||
aoffset + (j + 1) * 2, 1, sb); | aoffset + (j + 1) * 2, 1, sb); | ||||
SCAL_K(i, 0, 0, ONE / ajj[0], ZERO, | |||||
SCAL_K(i, 0, 0, ONE / ajj, ZERO, | |||||
aoffset + (j + 1) * 2, 1, NULL, 0, NULL, 0); | aoffset + (j + 1) * 2, 1, NULL, 0, NULL, 0); | ||||
} | } | ||||
@@ -51,7 +51,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
BLASLONG n, lda; | BLASLONG n, lda; | ||||
FLOAT *a; | FLOAT *a; | ||||
FLOAT ajj[2]; | |||||
FLOAT ajj; | |||||
BLASLONG i, j; | BLASLONG i, j; | ||||
n = args -> n; | n = args -> n; | ||||
@@ -65,19 +65,18 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
for (j = 0; j < n; j++) { | for (j = 0; j < n; j++) { | ||||
ajj[0] = DOTC_K(j, a, 1, a, 1); | |||||
GET_IMAGE(ajj[1]); | |||||
ajj = CREAL(DOTC_K(j, a, 1, a, 1)); | |||||
ajj[0] = *(a + j * 2) - ajj[0]; | |||||
ajj = *(a + j * 2) - ajj; | |||||
if (ajj[0] <= 0){ | |||||
*(a + j * 2 + 0) = ajj[0]; | |||||
if (ajj <= 0){ | |||||
*(a + j * 2 + 0) = ajj; | |||||
*(a + j * 2 + 1) = ZERO; | *(a + j * 2 + 1) = ZERO; | ||||
return j + 1; | return j + 1; | ||||
} | } | ||||
ajj[0] = SQRT(ajj[0]); | |||||
*(a + j * 2 + 0) = ajj[0]; | |||||
ajj = SQRT(ajj); | |||||
*(a + j * 2 + 0) = ajj; | |||||
*(a + j * 2 + 1) = ZERO; | *(a + j * 2 + 1) = ZERO; | ||||
i = n - j - 1; | i = n - j - 1; | ||||
@@ -88,7 +87,7 @@ blasint CNAME(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n, FLOAT *sa, | |||||
a, 1, | a, 1, | ||||
a + (j + lda) * 2, lda, sb); | a + (j + lda) * 2, lda, sb); | ||||
SCAL_K(i, 0, 0, ONE / ajj[0], ZERO, | |||||
SCAL_K(i, 0, 0, ONE / ajj, ZERO, | |||||
a + (j + lda) * 2, lda, NULL, 0, NULL, 0); | a + (j + lda) * 2, lda, NULL, 0, NULL, 0); | ||||
} | } | ||||