@@ -1,7 +1,8 @@ | |||||
OpenBLAS ChangeLog | OpenBLAS ChangeLog | ||||
==================================================================== | ==================================================================== | ||||
Version 0.1 alpha2(in development) | |||||
0;136;0c | |||||
Version 0.1 alpha2 | |||||
23-Jun-2011 | |||||
common: | common: | ||||
* Fixed blasint undefined bug in <cblas.h> file. Other software | * Fixed blasint undefined bug in <cblas.h> file. Other software | ||||
could include this header successfully(Refs issue #13 on github) | could include this header successfully(Refs issue #13 on github) | ||||
@@ -31,6 +32,8 @@ x86/x86_64: | |||||
MIPS64: | MIPS64: | ||||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | * Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | ||||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) | |||||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) | |||||
==================================================================== | ==================================================================== | ||||
Version 0.1 alpha1 | Version 0.1 alpha1 | ||||
@@ -74,7 +74,7 @@ ifeq ($(OSNAME), Darwin) | |||||
endif | endif | ||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
$(MAKE) -C exports dll | $(MAKE) -C exports dll | ||||
# -ln -fs $(LIBDLLNAME) libopenblas.dll | |||||
-ln -fs $(LIBDLLNAME) libopenblas.dll | |||||
endif | endif | ||||
ifeq ($(OSNAME), CYGWIN_NT) | ifeq ($(OSNAME), CYGWIN_NT) | ||||
$(MAKE) -C exports dll | $(MAKE) -C exports dll | ||||
@@ -72,6 +72,7 @@ Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD ve | |||||
9.Known Issues | 9.Known Issues | ||||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | * The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | ||||
is 64. On 32 bits, it is 32. | is 64. On 32 bits, it is 32. | ||||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. | |||||
10. Specification of Git Branches | 10. Specification of Git Branches | ||||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | ||||
@@ -79,4 +80,4 @@ Now, there are 4 branches in github.com. | |||||
* The master branch. This a main branch to reflect a production-ready state. | * The master branch. This a main branch to reflect a production-ready state. | ||||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | * The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | ||||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | * The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | ||||
* The gh-pages branch. This is for web pages | |||||
* The gh-pages branch. This is for web pages |
@@ -220,6 +220,11 @@ REALNAME: ;\ | |||||
#define BUFFER_SIZE ( 8 << 20) | #define BUFFER_SIZE ( 8 << 20) | ||||
#if defined(LOONGSON3A) | |||||
#define PAGESIZE (16UL << 10) | |||||
#define FIXED_PAGESIZE (16UL << 10) | |||||
#endif | |||||
#ifndef PAGESIZE | #ifndef PAGESIZE | ||||
#define PAGESIZE (64UL << 10) | #define PAGESIZE (64UL << 10) | ||||
#endif | #endif | ||||
@@ -38,7 +38,7 @@ | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include <stdlib.h> | #include <stdlib.h> | ||||
#include <sys/mman.h> | |||||
//#include <sys/mman.h> | |||||
#include "common.h" | #include "common.h" | ||||
#ifndef USE_OPENMP | #ifndef USE_OPENMP | ||||
@@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) | |||||
zip : dll | zip : dll | ||||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | ||||
dll : libgoto2.dll | |||||
dll : ../$(LIBDLLNAME) | |||||
#libgoto2.dll | |||||
dll2 : libgoto2_shared.dll | dll2 : libgoto2_shared.dll | ||||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||||
$(RANLIB) ../$(LIBNAME) | $(RANLIB) ../$(LIBNAME) | ||||
ifeq ($(BINARY32), 1) | ifeq ($(BINARY32), 1) | ||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | --entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | ||||
-lib /machine:i386 /def:libgoto2.def | -lib /machine:i386 /def:libgoto2.def | ||||
else | else | ||||
$(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | --entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | ||||
-lib /machine:X64 /def:libgoto2.def | -lib /machine:X64 /def:libgoto2.def | ||||
endif | endif | ||||
@@ -91,15 +91,37 @@ ifndef ZGEMM_BETA | |||||
ZGEMM_BETA = ../generic/zgemm_beta.c | ZGEMM_BETA = ../generic/zgemm_beta.c | ||||
endif | endif | ||||
ifndef STRSMKERNEL_LN | |||||
STRSMKERNEL_LN = trsm_kernel_LN.S | STRSMKERNEL_LN = trsm_kernel_LN.S | ||||
endif | |||||
ifndef STRSMKERNEL_LT | |||||
STRSMKERNEL_LT = trsm_kernel_LT.S | STRSMKERNEL_LT = trsm_kernel_LT.S | ||||
endif | |||||
ifndef STRSMKERNEL_RN | |||||
STRSMKERNEL_RN = trsm_kernel_LT.S | STRSMKERNEL_RN = trsm_kernel_LT.S | ||||
endif | |||||
ifndef STRSMKERNEL_RT | |||||
STRSMKERNEL_RT = trsm_kernel_RT.S | STRSMKERNEL_RT = trsm_kernel_RT.S | ||||
endif | |||||
ifndef DTRSMKERNEL_LN | |||||
DTRSMKERNEL_LN = trsm_kernel_LN.S | DTRSMKERNEL_LN = trsm_kernel_LN.S | ||||
endif | |||||
ifndef DTRSMKERNEL_LT | |||||
DTRSMKERNEL_LT = trsm_kernel_LT.S | DTRSMKERNEL_LT = trsm_kernel_LT.S | ||||
endif | |||||
ifndef DTRSMKERNEL_RN | |||||
DTRSMKERNEL_RN = trsm_kernel_LT.S | DTRSMKERNEL_RN = trsm_kernel_LT.S | ||||
endif | |||||
ifndef DTRSMKERNEL_RT | |||||
DTRSMKERNEL_RT = trsm_kernel_RT.S | DTRSMKERNEL_RT = trsm_kernel_RT.S | ||||
endif | |||||
CTRSMKERNEL_LN = ztrsm_kernel_LT.S | CTRSMKERNEL_LN = ztrsm_kernel_LT.S | ||||
CTRSMKERNEL_LT = ztrsm_kernel_LT.S | CTRSMKERNEL_LT = ztrsm_kernel_LT.S | ||||
@@ -1,2 +1,24 @@ | |||||
SAXPYKERNEL=axpy_loongson3a.S | SAXPYKERNEL=axpy_loongson3a.S | ||||
DAXPYKERNEL=daxpy_loongson3a_simd.S | DAXPYKERNEL=daxpy_loongson3a_simd.S | ||||
SGEMMKERNEL = sgemm_kernel_loongson3a.S | |||||
SGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
SGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
SGEMMONCOPYOBJ = sgemm_oncopy.o | |||||
SGEMMOTCOPYOBJ = sgemm_otcopy.o | |||||
DGEMMKERNEL = gemm_kernel_loongson3a.S | |||||
DGEMMONCOPY = ../generic/gemm_ncopy_4.c | |||||
DGEMMOTCOPY = ../generic/gemm_tcopy_4.c | |||||
DGEMMONCOPYOBJ = dgemm_oncopy.o | |||||
DGEMMOTCOPYOBJ = dgemm_otcopy.o | |||||
STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c | |||||
DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c | |||||
DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c | |||||
DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c | |||||
DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c |
@@ -1480,27 +1480,29 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define GEMM_DEFAULT_OFFSET_B 0 | #define GEMM_DEFAULT_OFFSET_B 0 | ||||
#define GEMM_DEFAULT_ALIGN 0x03fffUL | #define GEMM_DEFAULT_ALIGN 0x03fffUL | ||||
#define SGEMM_DEFAULT_UNROLL_M 2 | |||||
#define SGEMM_DEFAULT_UNROLL_N 8 | |||||
#define DGEMM_DEFAULT_UNROLL_M 2 | |||||
#define DGEMM_DEFAULT_UNROLL_N 8 | |||||
#define SGEMM_DEFAULT_UNROLL_M 4 | |||||
#define SGEMM_DEFAULT_UNROLL_N 4 | |||||
#define DGEMM_DEFAULT_UNROLL_M 4 | |||||
#define DGEMM_DEFAULT_UNROLL_N 4 | |||||
#define CGEMM_DEFAULT_UNROLL_M 1 | #define CGEMM_DEFAULT_UNROLL_M 1 | ||||
#define CGEMM_DEFAULT_UNROLL_N 4 | #define CGEMM_DEFAULT_UNROLL_N 4 | ||||
#define ZGEMM_DEFAULT_UNROLL_M 1 | #define ZGEMM_DEFAULT_UNROLL_M 1 | ||||
#define ZGEMM_DEFAULT_UNROLL_N 4 | #define ZGEMM_DEFAULT_UNROLL_N 4 | ||||
#define SGEMM_DEFAULT_P 108 | |||||
#define DGEMM_DEFAULT_P 112 | |||||
#define SGEMM_DEFAULT_P 32 | |||||
#define DGEMM_DEFAULT_P 32 | |||||
#define CGEMM_DEFAULT_P 108 | #define CGEMM_DEFAULT_P 108 | ||||
#define ZGEMM_DEFAULT_P 112 | #define ZGEMM_DEFAULT_P 112 | ||||
#define SGEMM_DEFAULT_Q 288 | |||||
#define DGEMM_DEFAULT_Q 144 | |||||
#define SGEMM_DEFAULT_Q 116 | |||||
#define DGEMM_DEFAULT_Q 116 | |||||
#define CGEMM_DEFAULT_Q 144 | #define CGEMM_DEFAULT_Q 144 | ||||
#define ZGEMM_DEFAULT_Q 72 | #define ZGEMM_DEFAULT_Q 72 | ||||
#define SGEMM_DEFAULT_R 2000 | |||||
#define DGEMM_DEFAULT_R 2000 | |||||
#define SGEMM_DEFAULT_R 1000 | |||||
#define DGEMM_DEFAULT_R 1000 | |||||
#define CGEMM_DEFAULT_R 2000 | #define CGEMM_DEFAULT_R 2000 | ||||
#define ZGEMM_DEFAULT_R 2000 | #define ZGEMM_DEFAULT_R 2000 | ||||