@@ -1,8 +1,13 @@ | |||
*.obj | |||
*.lib | |||
*.dll | |||
*.def | |||
*.o | |||
lapack-3.1.1 | |||
lapack-3.1.1.tgz | |||
*.so | |||
*.a | |||
.svn | |||
*~ | |||
config.h | |||
Makefile.conf | |||
@@ -1,13 +1,40 @@ | |||
OpenBLAS ChangeLog | |||
==================================================================== | |||
Version 0.1 alpha2(in development) | |||
Version 0.1 alpha2 | |||
23-Jun-2011 | |||
common: | |||
* | |||
* Fixed blasint undefined bug in <cblas.h> file. Other software | |||
could include this header successfully(Refs issue #13 on github) | |||
* Fixed the SEGFAULT bug on 64 cores. On SMP server, the number | |||
of CPUs or cores should be less than or equal to 64.(Refs issue #14 | |||
on github) | |||
* Support "void goto_set_num_threads(int num_threads)" and "void | |||
openblas_set_num_threads(int num_threads)" when USE_OPENMP=1 | |||
* Added extern "C" to support C++. Thank Tasio for the patch(Refs | |||
issue #21 on github) | |||
* Provided an error message when the arch is not supported.(Refs | |||
issue #19 on github) | |||
* Fixed issue #23. Fixed a bug of f_check script about generating link flags. | |||
* Added openblas_set_num_threads for Fortran. | |||
* Fixed #25 a wrong result of rotmg. | |||
* Fixed a bug about detecting underscore prefix in c_check. | |||
* Print the wall time (cycles) with enabling FUNCTION_PROFILE | |||
* Fixed #35 a build bug with NO_LAPACK=1 & DYNAMIC_ARCH=1 | |||
* Added install target. You can use "make install". (Refs #20) | |||
x86/x86_64: | |||
* | |||
* Fixed #28 a wrong result of dsdot on x86_64. | |||
* Fixed #32 a SEGFAULT bug of zdotc with gcc-4.6. | |||
* Fixed #33 ztrmm bug on Nehalem. | |||
* Walk round #27 the low performance axpy issue with small imput size & multithreads. | |||
MIPS64: | |||
* | |||
* Fixed #28 a wrong result of dsdot on Loongson3A/MIPS64. | |||
* Optimized single/double precision BLAS Level3 on Loongson3A/MIPS64. (Refs #2) | |||
* Optimized single/double precision axpy function on Loongson3A/MIPS64. (Refs #3) | |||
==================================================================== | |||
Version 0.1 alpha1 | |||
20-Mar-2011 | |||
@@ -15,6 +15,10 @@ ifdef SANITY_CHECK | |||
BLASDIRS += reference | |||
endif | |||
ifndef PREFIX | |||
PREFIX = /opt/OpenBLAS | |||
endif | |||
SUBDIRS = $(BLASDIRS) | |||
ifneq ($(NO_LAPACK), 1) | |||
SUBDIRS += lapack | |||
@@ -22,8 +26,8 @@ endif | |||
SUBDIRS_ALL = $(SUBDIRS) test ctest utest exports benchmark ../laswp ../bench | |||
.PHONY : all libs netlib test ctest shared | |||
.NOTPARALLEL : all libs prof lapack-test | |||
.PHONY : all libs netlib test ctest shared install | |||
.NOTPARALLEL : all libs prof lapack-test install | |||
all :: libs netlib tests shared | |||
@echo | |||
@@ -70,7 +74,7 @@ ifeq ($(OSNAME), Darwin) | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
$(MAKE) -C exports dll | |||
# -ln -fs $(LIBDLLNAME) libopenblas.dll | |||
-ln -fs $(LIBDLLNAME) libopenblas.dll | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
$(MAKE) -C exports dll | |||
@@ -96,18 +100,26 @@ endif | |||
endif | |||
libs : | |||
ifeq ($(CORE), UNKOWN) | |||
$(error OpenBLAS: Detecting CPU failed. Please set TARGET explicitly, e.g. make TARGET=your_cpu_target. Please read README for the detail.) | |||
endif | |||
-ln -fs $(LIBNAME) libopenblas.$(LIBSUFFIX) | |||
for d in $(SUBDIRS) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
#Save the config files for installation | |||
cp Makefile.conf Makefile.conf_last | |||
cp config.h config_last.h | |||
ifdef DYNAMIC_ARCH | |||
$(MAKE) -C kernel commonlibs || exit 1 | |||
for d in $(DYNAMIC_CORE) ; \ | |||
do $(MAKE) GOTOBLAS_MAKEFILE= -C kernel TARGET_CORE=$$d kernel || exit 1 ;\ | |||
done | |||
echo DYNAMIC_ARCH=1 >> Makefile.conf_last | |||
endif | |||
touch lib.grd | |||
prof : prof_blas prof_lapack | |||
@@ -227,19 +239,23 @@ lapack-test : | |||
dummy : | |||
install : | |||
$(MAKE) -f Makefile.install install | |||
clean :: | |||
@for d in $(SUBDIRS_ALL) ; \ | |||
do if test -d $$d; then \ | |||
$(MAKE) -C $$d $(@F) || exit 1 ; \ | |||
fi; \ | |||
done | |||
ifdef DYNAMIC_ARCH | |||
#ifdef DYNAMIC_ARCH | |||
@$(MAKE) -C kernel clean | |||
endif | |||
#endif | |||
@rm -f *.$(LIBSUFFIX) *.so *~ *.exe getarch getarch_2nd *.dll *.lib *.$(SUFFIX) *.dwf libopenblas.$(LIBSUFFIX) libopenblas_p.$(LIBSUFFIX) *.lnk myconfig.h | |||
@rm -f Makefile.conf config.h Makefile_kernel.conf config_kernel.h st* *.dylib | |||
@if test -d lapack-3.1.1; then \ | |||
echo deleting lapack-3.1.1; \ | |||
rm -rf lapack-3.1.1 ;\ | |||
fi | |||
@rm -f *.grd Makefile.conf_last config_last.h | |||
@echo Done. |
@@ -0,0 +1,65 @@ | |||
TOPDIR = . | |||
export GOTOBLAS_MAKEFILE = 1 | |||
-include $(TOPDIR)/Makefile.conf_last | |||
include ./Makefile.system | |||
.PHONY : install | |||
.NOTPARALLEL : install | |||
lib.grd : | |||
$(error OpenBLAS: Please run "make" firstly) | |||
install : lib.grd | |||
@-mkdir -p $(PREFIX) | |||
@echo Generating openblas_config.h in $(PREFIX) | |||
#for inc | |||
@echo \#ifndef OPENBLAS_CONFIG_H > $(PREFIX)/openblas_config.h | |||
@echo \#define OPENBLAS_CONFIG_H >> $(PREFIX)/openblas_config.h | |||
@cat config_last.h >> $(PREFIX)/openblas_config.h | |||
@echo \#define VERSION \" OpenBLAS $(VERSION) \" >> $(PREFIX)/openblas_config.h | |||
@cat openblas_config_template.h >> $(PREFIX)/openblas_config.h | |||
@echo \#endif >> $(PREFIX)/openblas_config.h | |||
@echo Generating f77blas.h in $(PREFIX) | |||
@echo \#ifndef OPENBLAS_F77BLAS_H > $(PREFIX)/f77blas.h | |||
@echo \#define OPENBLAS_F77BLAS_H >> $(PREFIX)/f77blas.h | |||
@echo \#include \"openblas_config.h\" >> $(PREFIX)/f77blas.h | |||
@cat common_interface.h >> $(PREFIX)/f77blas.h | |||
@echo \#endif >> $(PREFIX)/f77blas.h | |||
@echo Generating cblas.h in $(PREFIX) | |||
@sed 's/common/openblas_config/g' cblas.h > $(PREFIX)/cblas.h | |||
#for install static library | |||
@echo Copy the static library to $(PREFIX) | |||
@cp $(LIBNAME) $(PREFIX) | |||
@-ln -fs $(PREFIX)/$(LIBNAME) $(PREFIX)/libopenblas.$(LIBSUFFIX) | |||
#for install shared library | |||
@echo Copy the shared library to $(PREFIX) | |||
ifeq ($(OSNAME), Linux) | |||
-cp $(LIBSONAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
endif | |||
ifeq ($(OSNAME), FreeBSD) | |||
-cp $(LIBSONAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
endif | |||
ifeq ($(OSNAME), NetBSD) | |||
-cp $(LIBSONAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBSONAME) $(PREFIX)/libopenblas.so | |||
endif | |||
ifeq ($(OSNAME), Darwin) | |||
-cp $(LIBDYNNAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBDYNNAME) $(PREFIX)/libopenblas.dylib | |||
endif | |||
ifeq ($(OSNAME), WINNT) | |||
-cp $(LIBDLLNAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
endif | |||
ifeq ($(OSNAME), CYGWIN_NT) | |||
-cp $(LIBDLLNAME) $(PREFIX) | |||
-ln -fs $(PREFIX)/$(LIBDLLNAME) $(PREFIX)/libopenblas.dll | |||
endif | |||
@echo Install OK! | |||
@@ -91,6 +91,9 @@ VERSION = 0.1alpha2 | |||
# SANITY_CHECK to compare the result with reference BLAS. | |||
# UTEST_CHECK = 1 | |||
# The installation directory. | |||
# PREFIX = /opt/OpenBLAS | |||
# Common Optimization Flag; -O2 is enough. | |||
# DEBUG = 1 | |||
@@ -30,6 +30,10 @@ ifdef TARGET | |||
GETARCH_FLAGS += -DFORCE_$(TARGET) | |||
endif | |||
ifdef INTERFACE64 | |||
GETARCH_FLAGS += -DUSE64BITINT | |||
endif | |||
# This operation is expensive, so execution should be once. | |||
ifndef GOTOBLAS_MAKEFILE | |||
export GOTOBLAS_MAKEFILE = 1 | |||
@@ -185,7 +189,7 @@ ifeq ($(C_COMPILER), INTEL) | |||
CCOMMON_OPT += -wd981 | |||
endif | |||
ifdef USE_OPENMP | |||
ifeq ($(USE_OPENMP), 1) | |||
ifeq ($(C_COMPILER), GCC) | |||
CCOMMON_OPT += -fopenmp | |||
endif | |||
@@ -489,7 +493,8 @@ endif | |||
ifdef BINARY64 | |||
ifdef INTERFACE64 | |||
CCOMMON_OPT += -DUSE64BITINT | |||
CCOMMON_OPT += | |||
#-DUSE64BITINT | |||
endif | |||
endif | |||
@@ -510,6 +515,10 @@ ifeq ($(DYNAMIC_ARCH), 1) | |||
CCOMMON_OPT += -DDYNAMIC_ARCH | |||
endif | |||
ifeq ($(NO_LAPACK), 1) | |||
CCOMMON_OPT += -DNO_LAPACK | |||
endif | |||
ifdef SMP | |||
CCOMMON_OPT += -DSMP_SERVER | |||
@@ -8,7 +8,9 @@ Download from project homepage. http://xianyi.github.com/OpenBLAS/ | |||
Or, | |||
check out codes from git://github.com/xianyi/OpenBLAS.git | |||
1)Normal compile | |||
Please read GotoBLAS_02QuickInstall.txt or type "make" | |||
(a) type "make" to detect the CPU automatically. | |||
or | |||
(b) type "make TARGET=xxx" to set target CPU, e.g. "make TARGET=NEHALEM". The full target list is in file TargetList.txt. | |||
2)Cross compile | |||
Please set CC and FC with the cross toolchains. Then, set HOSTCC with your host C compiler. At last, set TARGET explicitly. | |||
@@ -20,6 +22,11 @@ make BINARY=64 CC=mips64el-unknown-linux-gnu-gcc FC=mips64el-unknown-linux-gnu-g | |||
3)Debug version | |||
make DEBUG=1 | |||
4)Intall to the directory (Optional) | |||
e.g. | |||
make install PREFIX=your_installation_directory | |||
The default directory is /opt/OpenBLAS | |||
3.Support CPU & OS | |||
Please read GotoBLAS_01Readme.txt | |||
@@ -39,13 +46,17 @@ export GOTO_NUM_THREADS=4 | |||
or | |||
export OMP_NUM_THREADS=4 | |||
The priorities are OPENBLAS_NUM_THREAD > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
The priorities are OPENBLAS_NUM_THREADS > GOTO_NUM_THREADS > OMP_NUM_THREADS. | |||
If you compile this lib with USE_OPENMP=1, you should only set OMP_NUM_THREADS environment variable. | |||
4.2 Set the number of threads with calling functions. for example, | |||
void goto_set_num_threads(int num_threads); | |||
or | |||
void openblas_set_num_threads(int num_threads); | |||
If you compile this lib with USE_OPENMP=1, you should use the above functions, too. | |||
5.Report Bugs | |||
Please add a issue in https://github.com/xianyi/OpenBLAS/issues | |||
@@ -56,4 +67,17 @@ Optimization on ICT Loongson 3A CPU | |||
OpenBLAS users mailing list: http://list.rdcps.ac.cn/mailman/listinfo/openblas | |||
8.ChangeLog | |||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
Please see Changelog.txt to obtain the differences between GotoBLAS2 1.13 BSD version. | |||
9.Known Issues | |||
* The number of CPUs/Cores should less than or equal to 8*sizeof(unsigned long). On 64 bits, the limit | |||
is 64. On 32 bits, it is 32. | |||
* This library is not compatible with EKOPath Compiler Suite 4.0.10 (http://www.pathscale.com/ekopath-compiler-suite). However, Path64 (https://github.com/path64/compiler) could compile the codes successfully. | |||
10. Specification of Git Branches | |||
We used the git branching model in this article (http://nvie.com/posts/a-successful-git-branching-model/). | |||
Now, there are 4 branches in github.com. | |||
* The master branch. This a main branch to reflect a production-ready state. | |||
* The develop branch. This a main branch to reflect a state with the latest delivered development changes for the next release. | |||
* The loongson3a branch. This is a feature branch. We develop Loongson3A codes on this branch. We will merge this feature to develop branch in future. | |||
* The gh-pages branch. This is for web pages |
@@ -0,0 +1,57 @@ | |||
Force Target Examples: | |||
make TARGET=NEHALEM | |||
make TARGET=LOONGSON3A BINARY=64 | |||
make TARGET=ISTANBUL | |||
Supported List: | |||
1.X86/X86_64 | |||
a)Intel CPU: | |||
P2 | |||
COPPERMINE | |||
KATMAI | |||
NORTHWOOD | |||
PRESCOTT | |||
BANIAS | |||
YONAH | |||
CORE2 | |||
PENRYN | |||
DUNNINGTON | |||
NEHALEM | |||
ATOM | |||
b)AMD CPU: | |||
ATHLON | |||
OPTERON | |||
OPTERON_SSE3 | |||
BARCELONA | |||
SHANGHAI | |||
ISTANBUL | |||
c)VIA CPU: | |||
SSE_GENERIC | |||
VIAC3 | |||
NANO | |||
2.Power CPU: | |||
POWER4 | |||
POWER5 | |||
POWER6 | |||
PPCG4 | |||
PPC970 | |||
PPC970MP | |||
PPC440 | |||
PPC440FP2 | |||
CELL | |||
3.MIPS64 CPU: | |||
SICORTEX | |||
LOONGSON3A | |||
4.IA64 CPU: | |||
ITANIUM2 | |||
5.SPARC CPU: | |||
SPARC | |||
SPARCV7 | |||
@@ -149,7 +149,7 @@ $binformat = bin64 if ($data =~ /BINARY_64/); | |||
$data = `$compiler_name -S ctest1.c && grep globl ctest1.s | head -n 1 && rm -f ctest1.s`; | |||
$data =~ /globl\ ([_\.]*)(.*)/; | |||
$data =~ /globl\s([_\.]*)(.*)/; | |||
$need_fu = $1; | |||
@@ -1,6 +1,14 @@ | |||
#ifndef CBLAS_H | |||
#define CBLAS_H | |||
#ifdef __cplusplus | |||
extern "C" { | |||
/* Assume C declarations for C++ */ | |||
#endif /* __cplusplus */ | |||
#include <stddef.h> | |||
#include "common.h" | |||
#define CBLAS_INDEX size_t | |||
enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102}; | |||
@@ -270,4 +278,10 @@ void cblas_zher2k(enum CBLAS_ORDER Order, enum CBLAS_UPLO Uplo, enum CBLAS_TRANS | |||
double *alpha, double *A, blasint lda, double *B, blasint ldb, double beta, double *C, blasint ldc); | |||
void cblas_xerbla(blasint p, char *rout, char *form, ...); | |||
#ifdef __cplusplus | |||
} | |||
#endif /* __cplusplus */ | |||
#endif |
@@ -39,6 +39,11 @@ | |||
#ifndef COMMON_H | |||
#define COMMON_H | |||
#ifdef __cplusplus | |||
extern "C" { | |||
/* Assume C declarations for C++ */ | |||
#endif /* __cplusplus */ | |||
#ifndef _GNU_SOURCE | |||
#define _GNU_SOURCE | |||
#endif | |||
@@ -607,4 +612,9 @@ extern int gotoblas_profile; | |||
#define PRINT_DEBUG_NAME if (readenv("GOTO_DEBUG")) fprintf(stderr, "GotoBLAS : %s\n", CHAR_NAME) | |||
#endif | |||
#ifdef __cplusplus | |||
} | |||
#endif /* __cplusplus */ | |||
#endif |
@@ -60,4 +60,8 @@ float _Complex BLASFUNC_REF(cdotc) (blasint *, float *, blasint *, float *, | |||
double _Complex BLASFUNC_REF(zdotu) (blasint *, double *, blasint *, double *, blasint *); | |||
double _Complex BLASFUNC_REF(zdotc) (blasint *, double *, blasint *, double *, blasint *); | |||
void BLASFUNC_REF(drotmg)(double *, double *, double *, double *, double *); | |||
double BLASFUNC_REF(dsdot)(blasint *, float *, blasint *, float *, blasint*); | |||
#endif |
@@ -1302,24 +1302,25 @@ int get_coretype(void){ | |||
case 13: | |||
return CORE_DUNNINGTON; | |||
} | |||
break; | |||
case 2: | |||
switch (model) { | |||
case 5: | |||
//Intel Core (Clarkdale) / Core (Arrandale) | |||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale) | |||
// Xeon (Clarkdale), 32nm | |||
return CORE_NEHALEM; | |||
case 12: | |||
//Xeon Processor 5600 (Westmere-EP) | |||
return CORE_NEHALEM; | |||
} | |||
break; | |||
break; | |||
case 2: | |||
switch (model) { | |||
case 5: | |||
//Intel Core (Clarkdale) / Core (Arrandale) | |||
// Pentium (Clarkdale) / Pentium Mobile (Arrandale) | |||
// Xeon (Clarkdale), 32nm | |||
return CORE_NEHALEM; | |||
case 12: | |||
//Xeon Processor 5600 (Westmere-EP) | |||
return CORE_NEHALEM; | |||
} | |||
break; | |||
} | |||
break; | |||
case 15: | |||
if (model <= 0x2) return CORE_NORTHWOOD; | |||
return CORE_PRESCOTT; | |||
if (model <= 0x2) return CORE_NORTHWOOD; | |||
else return CORE_PRESCOTT; | |||
} | |||
} | |||
@@ -6,7 +6,7 @@ COMMONOBJS = memory.$(SUFFIX) xerbla.$(SUFFIX) c_abs.$(SUFFIX) z_abs.$(SUFFIX) | |||
COMMONOBJS += slamch.$(SUFFIX) slamc3.$(SUFFIX) dlamch.$(SUFFIX) dlamc3.$(SUFFIX) | |||
ifdef SMP | |||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) | |||
COMMONOBJS += blas_server.$(SUFFIX) divtable.$(SUFFIX) blasL1thread.$(SUFFIX) openblas_set_num_threads.$(SUFFIX) | |||
ifndef NO_AFFINITY | |||
COMMONOBJS += init.$(SUFFIX) | |||
endif | |||
@@ -100,6 +100,9 @@ memory.$(SUFFIX) : $(MEMORY) ../../common.h ../../param.h | |||
blas_server.$(SUFFIX) : $(BLAS_SERVER) ../../common.h ../../common_thread.h ../../param.h | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
openblas_set_num_threads.$(SUFFIX) : openblas_set_num_threads.c | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
blasL1thread.$(SUFFIX) : blas_l1_thread.c ../../common.h ../../common_thread.h | |||
$(CC) $(CFLAGS) -c $< -o $(@F) | |||
@@ -38,7 +38,7 @@ | |||
#include <stdio.h> | |||
#include <stdlib.h> | |||
#include <sys/mman.h> | |||
//#include <sys/mman.h> | |||
#include "common.h" | |||
#ifndef USE_OPENMP | |||
@@ -49,6 +49,26 @@ | |||
int blas_server_avail = 0; | |||
void goto_set_num_threads(int num_threads) { | |||
if (num_threads < 1) num_threads = blas_num_threads; | |||
if (num_threads > MAX_CPU_NUMBER) num_threads = MAX_CPU_NUMBER; | |||
if (num_threads > blas_num_threads) { | |||
blas_num_threads = num_threads; | |||
} | |||
blas_cpu_number = num_threads; | |||
omp_set_num_threads(blas_cpu_number); | |||
} | |||
void openblas_set_num_threads(int num_threads) { | |||
goto_set_num_threads(num_threads); | |||
} | |||
int blas_thread_init(void){ | |||
blas_get_cpu_number(); | |||
@@ -172,13 +172,20 @@ static inline int rcount(unsigned long number) { | |||
return count; | |||
} | |||
/*** | |||
Known issue: The number of CPUs/cores should less | |||
than sizeof(unsigned long). On 64 bits, the limit | |||
is 64. On 32 bits, it is 32. | |||
***/ | |||
static inline unsigned long get_cpumap(int node) { | |||
int infile; | |||
unsigned long affinity; | |||
char name[160]; | |||
char cpumap[160]; | |||
char *p, *dummy; | |||
int i=0; | |||
sprintf(name, CPUMAP_NAME, node); | |||
infile = open(name, O_RDONLY); | |||
@@ -187,13 +194,19 @@ static inline unsigned long get_cpumap(int node) { | |||
if (infile != -1) { | |||
read(infile, name, sizeof(name)); | |||
read(infile, cpumap, sizeof(cpumap)); | |||
p = cpumap; | |||
while (*p != '\n' && i<160){ | |||
if(*p != ',') { | |||
name[i++]=*p; | |||
} | |||
p++; | |||
} | |||
p = name; | |||
while ((*p == '0') || (*p == ',')) p++; | |||
// while ((*p == '0') || (*p == ',')) p++; | |||
affinity = strtol(p, &dummy, 16); | |||
affinity = strtoul(p, &dummy, 16); | |||
close(infile); | |||
} | |||
@@ -347,7 +360,13 @@ static void disable_hyperthread(void) { | |||
unsigned long share; | |||
int cpu; | |||
common -> avail = (1UL << common -> num_procs) - 1; | |||
if(common->num_procs > 64){ | |||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->num_procs); | |||
exit(1); | |||
}else if(common->num_procs == 64){ | |||
common -> avail = 0xFFFFFFFFFFFFFFFFUL; | |||
}else | |||
common -> avail = (1UL << common -> num_procs) - 1; | |||
#ifdef DEBUG | |||
fprintf(stderr, "\nAvail CPUs : %04lx.\n", common -> avail); | |||
@@ -376,7 +395,13 @@ static void disable_affinity(void) { | |||
fprintf(stderr, "CPU mask : %04lx.\n\n", *(unsigned long *)&cpu_orig_mask[0]); | |||
#endif | |||
lprocmask = (1UL << common -> final_num_procs) - 1; | |||
if(common->final_num_procs > 64){ | |||
fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(64). Terminated.\n", common->final_num_procs); | |||
exit(1); | |||
}else if(common->final_num_procs == 64){ | |||
lprocmask = 0xFFFFFFFFFFFFFFFFUL; | |||
}else | |||
lprocmask = (1UL << common -> final_num_procs) - 1; | |||
#ifndef USE_OPENMP | |||
lprocmask &= *(unsigned long *)&cpu_orig_mask[0]; | |||
@@ -0,0 +1,45 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the ISCAS nor the names of its contributors may | |||
be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
#include "common.h" | |||
#ifdef SMP_SERVER | |||
#ifdef OS_LINUX | |||
extern void openblas_set_num_threads(int num_threads) ; | |||
void NAME(int* num_threads){ | |||
openblas_set_num_threads(*num_threads); | |||
} | |||
#endif | |||
#endif |
@@ -74,20 +74,21 @@ void gotoblas_profile_quit(void) { | |||
if (cycles > 0) { | |||
fprintf(stderr, "\n\t====== BLAS Profiling Result =======\n\n"); | |||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle\n"); | |||
fprintf(stderr, " Function No. of Calls Time Consumption Efficiency Bytes/cycle Wall Time(Cycles)\n"); | |||
for (i = 0; i < MAX_PROF_TABLE; i ++) { | |||
if (function_profile_table[i].calls) { | |||
#ifndef OS_WINDOWS | |||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f\n", | |||
fprintf(stderr, "%-12s : %10Ld %8.2f%% %10.3f%% %8.2f %Ld\n", | |||
#else | |||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f\n", | |||
fprintf(stderr, "%-12s : %10lld %8.2f%% %10.3f%% %8.2f %lld\n", | |||
#endif | |||
func_table[i], | |||
function_profile_table[i].calls, | |||
(double)function_profile_table[i].cycles / (double)cycles * 100., | |||
(double)function_profile_table[i].fops / (double)function_profile_table[i].tcycles * 100., | |||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles | |||
(double)function_profile_table[i].area / (double)function_profile_table[i].cycles, | |||
function_profile_table[i].cycles | |||
); | |||
} | |||
} | |||
@@ -53,18 +53,19 @@ dyn : $(LIBDYNNAME) | |||
zip : dll | |||
zip $(LIBZIPNAME) $(LIBDLLNAME) $(LIBNAME) | |||
dll : libgoto2.dll | |||
dll : ../$(LIBDLLNAME) | |||
#libgoto2.dll | |||
dll2 : libgoto2_shared.dll | |||
libgoto2.dll : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
../$(LIBDLLNAME) : ../$(LIBNAME) libgoto2.def dllinit.$(SUFFIX) | |||
$(RANLIB) ../$(LIBNAME) | |||
ifeq ($(BINARY32), 1) | |||
$(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
--entry _dllinit@12 -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
-lib /machine:i386 /def:libgoto2.def | |||
else | |||
$(DLLWRAP) -o $(@F) --def libgoto2.def \ | |||
$(DLLWRAP) -o ../$(LIBDLLNAME) --def libgoto2.def \ | |||
--entry _dllinit -s dllinit.$(SUFFIX) --dllname $(@F) ../$(LIBNAME) $(FEXTRALIB) | |||
-lib /machine:X64 /def:libgoto2.def | |||
endif | |||
@@ -84,7 +85,7 @@ libgoto_hpl.def : gensymbol | |||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) > $(@F) | |||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def | |||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o $(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
$(PREFIX)gcc $(CFLAGS) -all_load -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | |||
symbol.$(SUFFIX) : symbol.S | |||
$(CC) $(CFLAGS) -c -o $(@F) $^ | |||
@@ -274,6 +274,7 @@ if ($link ne "") { | |||
&& ($flags !~ /kernel32/) | |||
&& ($flags !~ /advapi32/) | |||
&& ($flags !~ /shell32/) | |||
&& ($flags !~ /^\-l$/) | |||
) { | |||
$linker_l .= $flags . " "; | |||
} | |||
@@ -604,30 +604,41 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
#ifndef POWER | |||
#define POWER | |||
#endif | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#if defined(__i386__) || (__x86_64__) | |||
#include "cpuid_x86.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef __ia64__ | |||
#include "cpuid_ia64.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef __alpha | |||
#include "cpuid_alpha.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef POWER | |||
#include "cpuid_power.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef sparc | |||
#include "cpuid_sparc.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifdef __mips__ | |||
#include "cpuid_mips.c" | |||
#define OPENBLAS_SUPPORTED | |||
#endif | |||
#ifndef OPENBLAS_SUPPORTED | |||
#error "This arch/CPU is not supported by OpenBLAS." | |||
#endif | |||
#else | |||
@@ -30,6 +30,10 @@ int main(int argc, char **argv) { | |||
printf("#define DLOCAL_BUFFER_SIZE\t%ld\n", (DGEMM_DEFAULT_Q * DGEMM_DEFAULT_UNROLL_N * 2 * 1 * sizeof(double))); | |||
printf("#define CLOCAL_BUFFER_SIZE\t%ld\n", (CGEMM_DEFAULT_Q * CGEMM_DEFAULT_UNROLL_N * 4 * 2 * sizeof(float))); | |||
printf("#define ZLOCAL_BUFFER_SIZE\t%ld\n", (ZGEMM_DEFAULT_Q * ZGEMM_DEFAULT_UNROLL_N * 2 * 2 * sizeof(double))); | |||
#ifdef USE64BITINT | |||
printf("#define USE64BITINT\n"); | |||
#endif | |||
} | |||
return 0; | |||
@@ -85,7 +85,11 @@ void CNAME(blasint n, FLOAT alpha, FLOAT *x, blasint incx, FLOAT *y, blasint inc | |||
//In that case, the threads would be dependent. | |||
if (incx == 0 || incy == 0) | |||
nthreads = 1; | |||
//Temporarily walk around the low performance issue with small imput size & multithreads. | |||
if (n <= 10000) | |||
nthreads = 1; | |||
if (nthreads == 1) { | |||
#endif | |||
@@ -49,6 +49,7 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
BLASLONG n = *N; | |||
BLASLONG incx = *INCX; | |||
BLASLONG incy = *INCY; | |||
double ret = 0.0; | |||
PRINT_DEBUG_NAME; | |||
@@ -61,19 +62,21 @@ double NAME(blasint *N, float *x, blasint *INCX, float *y, blasint *INCY){ | |||
if (incx < 0) x -= (n - 1) * incx; | |||
if (incy < 0) y -= (n - 1) * incy; | |||
return DSDOT_K(n, x, incx, y, incy); | |||
ret=DSDOT_K(n, x, incx, y, incy); | |||
FUNCTION_PROFILE_END(1, n, n); | |||
IDEBUG_END; | |||
return 0; | |||
return ret; | |||
} | |||
#else | |||
double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
double ret = 0.0; | |||
PRINT_DEBUG_CNAME; | |||
@@ -86,13 +89,13 @@ double CNAME(blasint n, float *x, blasint incx, float *y, blasint incy){ | |||
if (incx < 0) x -= (n - 1) * incx; | |||
if (incy < 0) y -= (n - 1) * incy; | |||
return DSDOT_K(n, x, incx, y, incy); | |||
ret=DSDOT_K(n, x, incx, y, incy); | |||
FUNCTION_PROFILE_END(1, n, n); | |||
IDEBUG_END; | |||
return 0; | |||
return ret; | |||
} | |||
@@ -7,6 +7,12 @@ | |||
#define GAMSQ 16777216.e0 | |||
#define RGAMSQ 5.9604645e-8 | |||
#ifdef DOUBLE | |||
#define ABS(x) fabs(x) | |||
#else | |||
#define ABS(x) fabsf(x) | |||
#endif | |||
#ifndef CBLAS | |||
void NAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT *DY1, FLOAT *dparam){ | |||
@@ -47,7 +53,7 @@ void CNAME(FLOAT *dd1, FLOAT *dd2, FLOAT *dx1, FLOAT dy1, FLOAT *dparam){ | |||
dq2 = dp2 * dy1; | |||
dq1 = dp1 * *dx1; | |||
if (! (abs(dq1) > abs(dq2))) goto L40; | |||
if (! (ABS(dq1) > ABS(dq2))) goto L40; | |||
dh21 = -(dy1) / *dx1; | |||
dh12 = dp2 / dp1; | |||
@@ -140,7 +146,7 @@ L150: | |||
goto L130; | |||
L160: | |||
if (! (abs(*dd2) <= RGAMSQ)) { | |||
if (! (ABS(*dd2) <= RGAMSQ)) { | |||
goto L190; | |||
} | |||
if (*dd2 == ZERO) { | |||
@@ -157,7 +163,7 @@ L180: | |||
goto L160; | |||
L190: | |||
if (! (abs(*dd2) >= GAMSQ)) { | |||
if (! (ABS(*dd2) >= GAMSQ)) { | |||
goto L220; | |||
} | |||
igo = 3; | |||
@@ -53,6 +53,11 @@ SBLASOBJS += setparam$(TSUFFIX).$(SUFFIX) | |||
CCOMMON_OPT += -DTS=$(TSUFFIX) | |||
endif | |||
KERNEL_INTERFACE = ../common_level1.h ../common_level2.h ../common_level3.h | |||
ifneq ($(NO_LAPACK), 1) | |||
KERNEL_INTERFACE += ../common_lapack.h | |||
endif | |||
ifeq ($(ARCH), x86) | |||
COMMONOBJS += cpuid.$(SUFFIX) | |||
endif | |||
@@ -88,9 +93,10 @@ setparam$(TSUFFIX).$(SUFFIX): setparam$(TSUFFIX).c kernel$(TSUFFIX).h | |||
setparam$(TSUFFIX).c : setparam-ref.c | |||
sed 's/TS/$(TSUFFIX)/g' $< > $(@F) | |||
kernel$(TSUFFIX).h : ../common_level1.h ../common_level2.h ../common_level3.h ../common_lapack.h | |||
kernel$(TSUFFIX).h : $(KERNEL_INTERFACE) | |||
sed 's/\ *(/$(TSUFFIX)(/g' $^ > $(@F) | |||
cpuid.$(SUFFIX): $(KERNELDIR)/cpuid.S | |||
$(CC) -c $(CFLAGS) $< -o $(@F) | |||
@@ -112,10 +118,10 @@ lsame.$(PSUFFIX): $(KERNELDIR)/$(LSAME_KERNEL) | |||
cpuid.$(PSUFFIX): $(KERNELDIR)/cpuid.S | |||
$(CC) -c $(PFLAGS) $< -o $(@F) | |||
ifdef DYNAMIC_ARCH | |||
#ifdef DYNAMIC_ARCH | |||
clean :: | |||
@rm -f setparam_*.c kernel_*.h setparam.h kernel.h | |||
endif | |||
#endif | |||
include $(TOPDIR)/Makefile.tail |
@@ -668,7 +668,7 @@ $(KDIR)qdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)qdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNEL | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -DXDOUBLE $< -o $@ | |||
$(KDIR)dsdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)dsdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE -DDSDOT $< -o $@ | |||
$(KDIR)sdot_k$(TSUFFIX).$(SUFFIX) $(KDIR)sdot_k$(TPSUFFIX).$(PSUFFIX) : $(KERNELDIR)/$(SDOTKERNEL) | |||
$(CC) -c $(CFLAGS) -UCOMPLEX -UDOUBLE $< -o $@ | |||
@@ -300,7 +300,11 @@ | |||
.align 3 | |||
.L999: | |||
j $31 | |||
ADD s1, s1, s2 | |||
#ifdef DSDOT | |||
cvt.d.s s1, s1 | |||
#endif | |||
j $31 | |||
NOP | |||
EPILOGUE |
@@ -101,7 +101,11 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
ssymm_outcopyTS, ssymm_oltcopyTS, | |||
#ifndef NO_LAPACK | |||
sneg_tcopyTS, slaswp_ncopyTS, | |||
#else | |||
NULL,NULL, | |||
#endif | |||
0, 0, 0, | |||
DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N, MAX(DGEMM_DEFAULT_UNROLL_M, DGEMM_DEFAULT_UNROLL_N), | |||
@@ -147,7 +151,11 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
dsymm_outcopyTS, dsymm_oltcopyTS, | |||
#ifndef NO_LAPACK | |||
dneg_tcopyTS, dlaswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
#ifdef EXPRECISION | |||
@@ -195,7 +203,11 @@ gotoblas_t TABLE_NAME = { | |||
#endif | |||
qsymm_outcopyTS, qsymm_oltcopyTS, | |||
#ifndef NO_LAPACK | |||
qneg_tcopyTS, qlaswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
@@ -286,7 +298,11 @@ gotoblas_t TABLE_NAME = { | |||
chemm3m_oucopyrTS, chemm3m_olcopyrTS, | |||
chemm3m_oucopyiTS, chemm3m_olcopyiTS, | |||
#ifndef NO_LAPACK | |||
cneg_tcopyTS, claswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
0, 0, 0, | |||
ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N, MAX(ZGEMM_DEFAULT_UNROLL_M, ZGEMM_DEFAULT_UNROLL_N), | |||
@@ -375,7 +391,11 @@ gotoblas_t TABLE_NAME = { | |||
zhemm3m_oucopyrTS, zhemm3m_olcopyrTS, | |||
zhemm3m_oucopyiTS, zhemm3m_olcopyiTS, | |||
#ifndef NO_LAPACK | |||
zneg_tcopyTS, zlaswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
#ifdef EXPRECISION | |||
@@ -466,7 +486,11 @@ gotoblas_t TABLE_NAME = { | |||
xhemm3m_oucopyrTS, xhemm3m_olcopyrTS, | |||
xhemm3m_oucopyiTS, xhemm3m_olcopyiTS, | |||
#ifndef NO_LAPACK | |||
xneg_tcopyTS, xlaswp_ncopyTS, | |||
#else | |||
NULL, NULL, | |||
#endif | |||
#endif | |||
@@ -1541,5 +1541,8 @@ | |||
popl %ebx | |||
popl %esi | |||
popl %edi | |||
/*remove the hidden return value address from the stack.*/ | |||
popl %ecx | |||
xchgl %ecx, 0(%esp) | |||
ret | |||
EPILOGUE |
@@ -1286,6 +1286,10 @@ | |||
haddps %xmm0, %xmm0 | |||
#endif | |||
#ifdef DSDOT | |||
cvtss2sd %xmm0, %xmm0 | |||
#endif | |||
RESTOREREGISTERS | |||
ret | |||
@@ -544,7 +544,7 @@ | |||
jg .L11 | |||
#if defined(TRMMKERNEL) && !defined(LEFT) | |||
addq $1, KK | |||
addq $4, KK | |||
#endif | |||
leaq (C, LDC, 4), C | |||
@@ -594,7 +594,7 @@ | |||
jg .L11 | |||
#if defined(TRMMKERNEL) && !defined(LEFT) | |||
addq $1, KK | |||
addq $4, KK | |||
#endif | |||
leaq (C, LDC, 4), C | |||
@@ -0,0 +1,21 @@ | |||
/*This is only for "make install" target.*/ | |||
#ifdef NEEDBUNDERSCORE | |||
#define BLASFUNC(FUNC) FUNC##_ | |||
#else | |||
#define BLASFUNC(FUNC) FUNC | |||
#endif | |||
#if defined(OS_WINDOWS) && defined(__64BIT__) | |||
typedef long long BLASLONG; | |||
typedef unsigned long long BLASULONG; | |||
#else | |||
typedef long BLASLONG; | |||
typedef unsigned long BLASULONG; | |||
#endif | |||
#ifdef USE64BITINT | |||
typedef BLASLONG blasint; | |||
#else | |||
typedef int blasint; | |||
#endif |
@@ -128,6 +128,8 @@ CBLASOBJS = $(CBLAS1OBJS) $(CBLAS2OBJS) $(CBLAS3OBJS) | |||
ZBLASOBJS = $(ZBLAS1OBJS) $(ZBLAS2OBJS) $(ZBLAS3OBJS) | |||
XBLASOBJS = $(XBLAS1OBJS) $(XBLAS2OBJS) $(XBLAS3OBJS) | |||
ifneq ($(NO_LAPACK), 1) | |||
SBLASOBJS += \ | |||
sgetf2f.$(SUFFIX) sgetrff.$(SUFFIX) slauu2f.$(SUFFIX) slauumf.$(SUFFIX) \ | |||
spotf2f.$(SUFFIX) spotrff.$(SUFFIX) strti2f.$(SUFFIX) strtrif.$(SUFFIX) \ | |||
@@ -160,6 +162,7 @@ XBLASOBJS += | |||
xpotf2f.$(SUFFIX) xpotrff.$(SUFFIX) xtrti2f.$(SUFFIX) xtrtrif.$(SUFFIX) \ | |||
xlaswpf.$(SUFFIX) xgetrsf.$(SUFFIX) xgesvf.$(SUFFIX) xpotrif.$(SUFFIX) \ | |||
endif | |||
include $(TOPDIR)/Makefile.tail | |||
@@ -5,12 +5,12 @@ include $(TOPDIR)/Makefile.system | |||
TARGET=openblas_utest | |||
CUNIT_LIB=/usr/local/lib/libcunit.a | |||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o | |||
OBJS=main.o test_rot.o test_swap.o test_axpy.o test_dotu.o test_rotmg.o test_dsdot.o | |||
all : run_test | |||
$(TARGET): $(OBJS) | |||
$(CC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
$(FC) -o $@ $^ ../$(LIBNAME) $(CUNIT_LIB) $(EXTRALIB) | |||
run_test: $(TARGET) | |||
./$(TARGET) | |||
@@ -57,4 +57,8 @@ void test_caxpy_inc_0(void); | |||
void test_zdotu_n_1(void); | |||
void test_zdotu_offset_1(void); | |||
void test_drotmg(void); | |||
void test_dsdot_n_1(void); | |||
#endif |
@@ -54,7 +54,10 @@ CU_TestInfo test_level1[]={ | |||
{"Testing zdotu with n == 1",test_zdotu_n_1}, | |||
{"Testing zdotu with input x & y offset == 1",test_zdotu_offset_1}, | |||
{"Testing drotmg",test_drotmg}, | |||
{"Testing dsdot with n == 1",test_dsdot_n_1}, | |||
CU_TEST_INFO_NULL, | |||
}; | |||
@@ -0,0 +1,50 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the ISCAS nor the names of its contributors may | |||
be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
#include "common_utest.h" | |||
void test_dsdot_n_1() | |||
{ | |||
float x= 0.172555164; | |||
float y= -0.0138700781; | |||
int incx=1; | |||
int incy=1; | |||
int n=1; | |||
double res1=0.0f, res2=0.0f; | |||
res1=BLASFUNC(dsdot)(&n, &x, &incx, &y, &incy); | |||
res2=BLASFUNC_REF(dsdot)(&n, &x, &incx, &y, &incy); | |||
CU_ASSERT_DOUBLE_EQUAL(res1, res2, CHECK_EPS); | |||
} |
@@ -0,0 +1,60 @@ | |||
/***************************************************************************** | |||
Copyright (c) 2011, Lab of Parallel Software and Computational Science,ICSAS | |||
All rights reserved. | |||
Redistribution and use in source and binary forms, with or without | |||
modification, are permitted provided that the following conditions are | |||
met: | |||
1. Redistributions of source code must retain the above copyright | |||
notice, this list of conditions and the following disclaimer. | |||
2. Redistributions in binary form must reproduce the above copyright | |||
notice, this list of conditions and the following disclaimer in | |||
the documentation and/or other materials provided with the | |||
distribution. | |||
3. Neither the name of the ISCAS nor the names of its contributors may | |||
be used to endorse or promote products derived from this software | |||
without specific prior written permission. | |||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
**********************************************************************************/ | |||
#include "common_utest.h" | |||
void test_drotmg() | |||
{ | |||
double te_d1, tr_d1; | |||
double te_d2, tr_d2; | |||
double te_x1, tr_x1; | |||
double te_y1, tr_y1; | |||
double te_param[5],tr_param[5]; | |||
int i=0; | |||
te_d1= tr_d1=0.21149573940783739; | |||
te_d2= tr_d2=0.046892057172954082; | |||
te_x1= tr_x1=-0.42272687517106533; | |||
te_y1= tr_y1=0.42211309121921659; | |||
//OpenBLAS | |||
BLASFUNC(drotmg)(&te_d1, &te_d2, &te_x1, &te_y1, te_param); | |||
//reference | |||
BLASFUNC_REF(drotmg)(&tr_d1, &tr_d2, &tr_x1, &tr_y1, tr_param); | |||
CU_ASSERT_DOUBLE_EQUAL(te_d1, tr_d1, CHECK_EPS); | |||
CU_ASSERT_DOUBLE_EQUAL(te_d2, tr_d2, CHECK_EPS); | |||
CU_ASSERT_DOUBLE_EQUAL(te_x1, tr_x1, CHECK_EPS); | |||
CU_ASSERT_DOUBLE_EQUAL(te_y1, tr_y1, CHECK_EPS); | |||
for(i=0; i<5; i++){ | |||
CU_ASSERT_DOUBLE_EQUAL(te_param[i], tr_param[i], CHECK_EPS); | |||
} | |||
} |