@@ -324,14 +324,16 @@ ifeq ($(ARCH), x86) | |||||
DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | DYNAMIC_CORE = KATMAI COPPERMINE NORTHWOOD PRESCOTT BANIAS \ | ||||
CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | CORE2 PENRYN DUNNINGTON NEHALEM ATHLON OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
DYNAMIC_CORE += SANDYBRIDGE | |||||
#BULLDOZER PILEDRIVER | |||||
endif | endif | ||||
endif | endif | ||||
ifeq ($(ARCH), x86_64) | ifeq ($(ARCH), x86_64) | ||||
DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | DYNAMIC_CORE = PRESCOTT CORE2 PENRYN DUNNINGTON NEHALEM OPTERON OPTERON_SSE3 BARCELONA BOBCAT ATOM NANO | ||||
ifneq ($(NO_AVX), 1) | ifneq ($(NO_AVX), 1) | ||||
DYNAMIC_CORE += SANDYBRIDGE BULLDOZER PILEDRIVER | |||||
DYNAMIC_CORE += SANDYBRIDGE | |||||
#BULLDOZER PILEDRIVER | |||||
endif | endif | ||||
endif | endif | ||||
@@ -895,6 +897,7 @@ export CC | |||||
export FC | export FC | ||||
export BU | export BU | ||||
export FU | export FU | ||||
export NEED2UNDERSCORES | |||||
export USE_THREAD | export USE_THREAD | ||||
export NUM_THREADS | export NUM_THREADS | ||||
export NUM_CORES | export NUM_CORES | ||||
@@ -105,8 +105,8 @@ | |||||
#define CORE_NANO 19 | #define CORE_NANO 19 | ||||
#define CORE_SANDYBRIDGE 20 | #define CORE_SANDYBRIDGE 20 | ||||
#define CORE_BOBCAT 21 | #define CORE_BOBCAT 21 | ||||
#define CORE_BULLDOZER 22 | |||||
#define CORE_PILEDRIVER 23 | |||||
#define CORE_BULLDOZER CORE_BARCELONA | |||||
#define CORE_PILEDRIVER CORE_BARCELONA | |||||
#define CORE_HASWELL CORE_SANDYBRIDGE | #define CORE_HASWELL CORE_SANDYBRIDGE | ||||
#define HAVE_SSE (1 << 0) | #define HAVE_SSE (1 << 0) | ||||
@@ -198,8 +198,8 @@ typedef struct { | |||||
#define CPUTYPE_NANO 43 | #define CPUTYPE_NANO 43 | ||||
#define CPUTYPE_SANDYBRIDGE 44 | #define CPUTYPE_SANDYBRIDGE 44 | ||||
#define CPUTYPE_BOBCAT 45 | #define CPUTYPE_BOBCAT 45 | ||||
#define CPUTYPE_BULLDOZER 46 | |||||
#define CPUTYPE_PILEDRIVER 47 | |||||
#define CPUTYPE_BULLDOZER CPUTYPE_BARCELONA | |||||
#define CPUTYPE_PILEDRIVER CPUTYPE_BARCELONA | |||||
// this define is because BLAS doesn't have haswell specific optimizations yet | // this define is because BLAS doesn't have haswell specific optimizations yet | ||||
#define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | #define CPUTYPE_HASWELL CPUTYPE_SANDYBRIDGE | ||||
@@ -63,14 +63,16 @@ extern gotoblas_t gotoblas_BARCELONA; | |||||
extern gotoblas_t gotoblas_BOBCAT; | extern gotoblas_t gotoblas_BOBCAT; | ||||
#ifndef NO_AVX | #ifndef NO_AVX | ||||
extern gotoblas_t gotoblas_SANDYBRIDGE; | extern gotoblas_t gotoblas_SANDYBRIDGE; | ||||
extern gotoblas_t gotoblas_BULLDOZER; | |||||
extern gotoblas_t gotoblas_PILEDRIVER; | |||||
//extern gotoblas_t gotoblas_BULLDOZER; | |||||
//extern gotoblas_t gotoblas_PILEDRIVER; | |||||
#else | #else | ||||
//Use NEHALEM kernels for sandy bridge | //Use NEHALEM kernels for sandy bridge | ||||
#define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | #define gotoblas_SANDYBRIDGE gotoblas_NEHALEM | ||||
#endif | |||||
#define gotoblas_BULLDOZER gotoblas_BARCELONA | #define gotoblas_BULLDOZER gotoblas_BARCELONA | ||||
#define gotoblas_PILEDRIVER gotoblas_BARCELONA | #define gotoblas_PILEDRIVER gotoblas_BARCELONA | ||||
#endif | |||||
//Use sandy bridge kernels for haswell. | //Use sandy bridge kernels for haswell. | ||||
#define gotoblas_HASWELL gotoblas_SANDYBRIDGE | #define gotoblas_HASWELL gotoblas_SANDYBRIDGE | ||||
@@ -18,6 +18,10 @@ ifndef NO_LAPACKE | |||||
NO_LAPACKE = 0 | NO_LAPACKE = 0 | ||||
endif | endif | ||||
ifndef NEED2UNDERSCORES | |||||
NEED2UNDERSCORES=0 | |||||
endif | |||||
ifeq ($(OSNAME), WINNT) | ifeq ($(OSNAME), WINNT) | ||||
ifeq ($(F_COMPILER), GFORTRAN) | ifeq ($(F_COMPILER), GFORTRAN) | ||||
EXTRALIB += -lgfortran | EXTRALIB += -lgfortran | ||||
@@ -94,13 +98,13 @@ libgoto2_shared.dll : ../$(LIBNAME) libgoto2_shared.def | |||||
-Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) | -Wl,--out-implib,libgoto2_shared.lib $(FEXTRALIB) | ||||
libopenblas.def : gensymbol | libopenblas.def : gensymbol | ||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
libgoto2_shared.def : gensymbol | libgoto2_shared.def : gensymbol | ||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol win2k $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
libgoto_hpl.def : gensymbol | libgoto_hpl.def : gensymbol | ||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol win2khpl $(ARCH) dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
$(LIBDYNNAME) : ../$(LIBNAME) osx.def | $(LIBDYNNAME) : ../$(LIBNAME) osx.def | ||||
$(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | $(CC) $(CFLAGS) -all_load -headerpad_max_install_names -install_name $(CURDIR)/../$(LIBDYNNAME) -dynamiclib -o ../$(LIBDYNNAME) $< -Wl,-exported_symbols_list,osx.def $(FEXTRALIB) | ||||
@@ -187,23 +191,23 @@ static : ../$(LIBNAME) | |||||
rm -f goto.$(SUFFIX) | rm -f goto.$(SUFFIX) | ||||
linux.def : gensymbol ../Makefile.system ../getarch.c | linux.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol linux $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
osx.def : gensymbol ../Makefile.system ../getarch.c | osx.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol osx $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
aix.def : gensymbol ../Makefile.system ../getarch.c | aix.def : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > $(@F) | |||||
perl ./gensymbol aix $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > $(@F) | |||||
symbol.S : gensymbol | symbol.S : gensymbol | ||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > symbol.S | |||||
perl ./gensymbol win2kasm noarch dummy $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > symbol.S | |||||
test : linktest.c | test : linktest.c | ||||
$(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | $(CC) $(CFLAGS) $(LDFLAGS) -w -o linktest linktest.c ../$(LIBSONAME) -lm && echo OK. | ||||
rm -f linktest | rm -f linktest | ||||
linktest.c : gensymbol ../Makefile.system ../getarch.c | linktest.c : gensymbol ../Makefile.system ../getarch.c | ||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) > linktest.c | |||||
perl ./gensymbol linktest $(ARCH) $(BU) $(EXPRECISION) $(NO_CBLAS) $(NO_LAPACK) $(NO_LAPACKE) $(NEED2UNDERSCORES) > linktest.c | |||||
clean :: | clean :: | ||||
@rm -f *.def *.dylib __.SYMDEF* | @rm -f *.def *.dylib __.SYMDEF* | ||||
@@ -114,8 +114,8 @@ | |||||
# ALLAUX -- Auxiliary routines called from all precisions | # ALLAUX -- Auxiliary routines called from all precisions | ||||
# already provided by @blasobjs: xerbla, lsame | # already provided by @blasobjs: xerbla, lsame | ||||
ilaenv, ieeeck, lsamen, xerbla_array, iparmq, | |||||
ilaprec, ilatrans, ilauplo, iladiag, chla_transtype, | |||||
ilaenv, ieeeck, lsamen, iparmq, | |||||
ilaprec, ilatrans, ilauplo, iladiag, | |||||
ilaver, slamch, slamc3, | ilaver, slamch, slamc3, | ||||
# SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. | # SCLAUX -- Auxiliary routines called from both REAL and COMPLEX. | ||||
@@ -2672,12 +2672,25 @@ | |||||
#LAPACKE_zlagsy_work, | #LAPACKE_zlagsy_work, | ||||
); | ); | ||||
#These function may need 2 underscores. | |||||
@lapack_embeded_underscore_objs=(xerbla_array, chla_transtype,); | |||||
if ($ARGV[5] == 1) { | if ($ARGV[5] == 1) { | ||||
#NO_LAPACK=1 | #NO_LAPACK=1 | ||||
@underscore_objs = (@blasobjs, @misc_underscore_objs); | @underscore_objs = (@blasobjs, @misc_underscore_objs); | ||||
} elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || | } elsif (-d "../lapack-3.1.1" || -d "../lapack-3.4.0" || -d "../lapack-3.4.1" || | ||||
-d "../lapack-3.4.2" || -d "../lapack-netlib") { | -d "../lapack-3.4.2" || -d "../lapack-netlib") { | ||||
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); | |||||
if ($ARGV[7] == 0){ | |||||
# NEED2UNDERSCORES=0 | |||||
# Don't need 2 underscores | |||||
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs, @lapack_embeded_underscore_objs); | |||||
}else{ | |||||
# Need 2 underscores | |||||
@underscore_objs = (@blasobjs, @lapackobjs, @lapackobjs2, @misc_underscore_objs); | |||||
@need_2underscore_objs = (@lapack_embeded_underscore_objs); | |||||
}; | |||||
} else { | } else { | ||||
@underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | @underscore_objs = (@blasobjs, @lapackobjs, @misc_underscore_objs); | ||||
} | } | ||||
@@ -2729,6 +2742,10 @@ if ($ARGV[0] eq "linux"){ | |||||
print $objs, $bu, "\n"; | print $objs, $bu, "\n"; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
print $objs, $bu, $bu, "\n"; | |||||
} | |||||
# if ($ARGV[4] == 0) { | # if ($ARGV[4] == 0) { | ||||
foreach $objs (@no_underscore_objs) { | foreach $objs (@no_underscore_objs) { | ||||
print $objs, "\n"; | print $objs, "\n"; | ||||
@@ -2750,6 +2767,10 @@ if ($ARGV[0] eq "osx"){ | |||||
print "_", $objs, $bu, "\n"; | print "_", $objs, $bu, "\n"; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
print "_", $objs, $bu, $bu, "\n"; | |||||
} | |||||
# if ($ARGV[4] == 0) { | # if ($ARGV[4] == 0) { | ||||
foreach $objs (@no_underscore_objs) { | foreach $objs (@no_underscore_objs) { | ||||
print "_", $objs, "\n"; | print "_", $objs, "\n"; | ||||
@@ -2767,6 +2788,10 @@ if ($ARGV[0] eq "aix"){ | |||||
print $objs, $bu, "\n"; | print $objs, $bu, "\n"; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
print $objs, $bu, $bu, "\n"; | |||||
} | |||||
# if ($ARGV[4] == 0) { | # if ($ARGV[4] == 0) { | ||||
foreach $objs (@no_underscore_objs) { | foreach $objs (@no_underscore_objs) { | ||||
print $objs, "\n"; | print $objs, "\n"; | ||||
@@ -2791,6 +2816,17 @@ if ($ARGV[0] eq "win2k"){ | |||||
print "\t$uppercase=$objs", "_ \@", $count, "\n"; | print "\t$uppercase=$objs", "_ \@", $count, "\n"; | ||||
$count ++; | $count ++; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
$uppercase = $objs; | |||||
$uppercase =~ tr/[a-z]/[A-Z]/; | |||||
print "\t$objs=$objs","__ \@", $count, "\n"; | |||||
$count ++; | |||||
print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||||
$count ++; | |||||
print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||||
$count ++; | |||||
} | |||||
#for misc_common_objs | #for misc_common_objs | ||||
foreach $objs (@misc_common_objs) { | foreach $objs (@misc_common_objs) { | ||||
@@ -2852,6 +2888,18 @@ if ($ARGV[0] eq "microsoft"){ | |||||
print "\t$uppercase\_ = $objs","_\n"; | print "\t$uppercase\_ = $objs","_\n"; | ||||
$count ++; | $count ++; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
$uppercase = $objs; | |||||
$uppercase =~ tr/[a-z]/[A-Z]/; | |||||
print "\t$objs=$objs","__ \@", $count, "\n"; | |||||
$count ++; | |||||
print "\t",$objs, "__=$objs","__ \@", $count, "\n"; | |||||
$count ++; | |||||
print "\t$uppercase=$objs", "__ \@", $count, "\n"; | |||||
$count ++; | |||||
} | |||||
exit(0); | exit(0); | ||||
} | } | ||||
@@ -2868,6 +2916,16 @@ if ($ARGV[0] eq "win2kasm"){ | |||||
print "_", $uppercase, "_:\n"; | print "_", $uppercase, "_:\n"; | ||||
print "\tjmp\t_", $objs, "_\n"; | print "\tjmp\t_", $objs, "_\n"; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
$uppercase = $objs; | |||||
$uppercase =~ tr/[a-z]/[A-Z]/; | |||||
print "\t.align 16\n"; | |||||
print "\t.globl _", $uppercase, "__\n"; | |||||
print "_", $uppercase, "__:\n"; | |||||
print "\tjmp\t_", $objs, "__\n"; | |||||
} | |||||
exit(0); | exit(0); | ||||
} | } | ||||
@@ -2880,6 +2938,11 @@ if ($ARGV[0] eq "linktest"){ | |||||
foreach $objs (@underscore_objs) { | foreach $objs (@underscore_objs) { | ||||
print $objs, $bu, "();\n" if $objs ne "xerbla"; | print $objs, $bu, "();\n" if $objs ne "xerbla"; | ||||
} | } | ||||
foreach $objs (@need_2underscore_objs) { | |||||
print $objs, $bu, $bu, "();\n"; | |||||
} | |||||
# if ($ARGV[4] == 0) { | # if ($ARGV[4] == 0) { | ||||
foreach $objs (@no_underscore_objs) { | foreach $objs (@no_underscore_objs) { | ||||
print $objs, "();\n"; | print $objs, "();\n"; | ||||
@@ -114,6 +114,12 @@ if ($compiler eq "") { | |||||
$vendor = IBM; | $vendor = IBM; | ||||
$openmp = "-openmp"; | $openmp = "-openmp"; | ||||
} | } | ||||
# for embeded underscore name, e.g. zho_ge, it may append 2 underscores. | |||||
$data = `$compiler -O2 -S ftest3.f > /dev/null 2>&1 && cat ftest3.s && rm -f ftest3.s`; | |||||
if ($data =~ /zho_ge__/) { | |||||
$need2bu = 1; | |||||
} | |||||
} | } | ||||
if ($vendor eq "") { | if ($vendor eq "") { | ||||
@@ -245,6 +251,8 @@ if ($link ne "") { | |||||
$link =~ s/\-rpath\s+/\-rpath\@/g; | $link =~ s/\-rpath\s+/\-rpath\@/g; | ||||
$link =~ s/\-rpath-link\s+/\-rpath-link\@/g; | |||||
@flags = split(/[\s\,\n]/, $link); | @flags = split(/[\s\,\n]/, $link); | ||||
# remove leading and trailing quotes from each flag. | # remove leading and trailing quotes from each flag. | ||||
@flags = map {s/^['"]|['"]$//g; $_} @flags; | @flags = map {s/^['"]|['"]$//g; $_} @flags; | ||||
@@ -265,7 +273,15 @@ if ($link ne "") { | |||||
$linker_L .= "-Wl,". $flags . " "; | $linker_L .= "-Wl,". $flags . " "; | ||||
} | } | ||||
if ($flags =~ /^\-rpath/) { | |||||
if ($flags =~ /^\-rpath\@/) { | |||||
$flags =~ s/\@/\,/g; | |||||
if ($vendor eq "PGI") { | |||||
$flags =~ s/lib$/libso/; | |||||
} | |||||
$linker_L .= "-Wl,". $flags . " " ; | |||||
} | |||||
if ($flags =~ /^\-rpath-link\@/) { | |||||
$flags =~ s/\@/\,/g; | $flags =~ s/\@/\,/g; | ||||
if ($vendor eq "PGI") { | if ($vendor eq "PGI") { | ||||
$flags =~ s/lib$/libso/; | $flags =~ s/lib$/libso/; | ||||
@@ -309,6 +325,9 @@ print MAKEFILE "NOFORTRAN=1\n" if $nofortran == 1; | |||||
print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; | print CONFFILE "#define BUNDERSCORE\t$bu\n" if $bu ne ""; | ||||
print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; | print CONFFILE "#define NEEDBUNDERSCORE\t1\n" if $bu ne ""; | ||||
print CONFFILE "#define NEED2UNDERSCORES\t1\n" if $need2bu ne ""; | |||||
print MAKEFILE "NEED2UNDERSCORES=1\n" if $need2bu ne ""; | |||||
if (($linker_l ne "") || ($linker_a ne "")) { | if (($linker_l ne "") || ($linker_a ne "")) { | ||||
print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; | print MAKEFILE "FEXTRALIB=$linker_L $linker_l $linker_a\n"; | ||||
@@ -0,0 +1,6 @@ | |||||
double complex function zho_ge() | |||||
zho_ge = (0.0d0,0.0d0) | |||||
return | |||||
end |
@@ -354,7 +354,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "OPTERON" | #define CORENAME "OPTERON" | ||||
#endif | #endif | ||||
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) | |||||
#if defined(FORCE_BARCELONA) || defined(FORCE_SHANGHAI) || defined(FORCE_ISTANBUL) || defined (FORCE_PILEDRIVER) || defined (FORCE_BULLDOZER) | |||||
#define FORCE | #define FORCE | ||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
#define ARCHITECTURE "X86" | #define ARCHITECTURE "X86" | ||||
@@ -384,7 +384,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "BOBCAT" | #define CORENAME "BOBCAT" | ||||
#endif | #endif | ||||
#if defined (FORCE_BULLDOZER) | |||||
#if 0 | |||||
#define FORCE | #define FORCE | ||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
#define ARCHITECTURE "X86" | #define ARCHITECTURE "X86" | ||||
@@ -400,7 +400,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
#define CORENAME "BULLDOZER" | #define CORENAME "BULLDOZER" | ||||
#endif | #endif | ||||
#if defined (FORCE_PILEDRIVER) | |||||
#if 0 | |||||
#define FORCE | #define FORCE | ||||
#define FORCE_INTEL | #define FORCE_INTEL | ||||
#define ARCHITECTURE "X86" | #define ARCHITECTURE "X86" | ||||
@@ -8,7 +8,7 @@ | |||||
int main(int argc, char **argv) { | int main(int argc, char **argv) { | ||||
if ( (argc <= 1) || (argc >= 2) && (*argv[1] == '0')) { | |||||
if ( (argc <= 1) || ((argc >= 2) && (*argv[1] == '0'))) { | |||||
printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); | printf("SGEMM_UNROLL_M=%d\n", SGEMM_DEFAULT_UNROLL_M); | ||||
printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); | printf("SGEMM_UNROLL_N=%d\n", SGEMM_DEFAULT_UNROLL_N); | ||||
printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); | printf("DGEMM_UNROLL_M=%d\n", DGEMM_DEFAULT_UNROLL_M); | ||||
@@ -60,7 +60,7 @@ static blasint (*trtri_parallel[])(blas_arg_t *, BLASLONG *, BLASLONG *, FLOAT * | |||||
}; | }; | ||||
#endif | #endif | ||||
extern void dtrtri_lapack_(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); | |||||
extern void BLASFUNC(dtrtrilapack)(char *UPLO, char *DIAG, int *N, double *a, int *ldA, int *Info); | |||||
int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ | int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *Info){ | ||||
@@ -137,7 +137,10 @@ int NAME(char *UPLO, char *DIAG, blasint *N, FLOAT *a, blasint *ldA, blasint *In | |||||
// double trtri_U single thread error | // double trtri_U single thread error | ||||
// call dtrtri from lapack for a walk around. | // call dtrtri from lapack for a walk around. | ||||
if(uplo==0){ | if(uplo==0){ | ||||
dtrtri_lapack_(UPLO, DIAG, N, a, ldA, Info); | |||||
BLASFUNC(dtrtrilapack)(UPLO, DIAG, N, a, ldA, Info); | |||||
#ifndef PPC440 | |||||
blas_memory_free(buffer); | |||||
#endif | |||||
return 0; | return 0; | ||||
} | } | ||||
#endif | #endif | ||||
@@ -107,7 +107,7 @@ | |||||
*> \ingroup doubleOTHERcomputational | *> \ingroup doubleOTHERcomputational | ||||
* | * | ||||
* ===================================================================== | * ===================================================================== | ||||
SUBROUTINE DTRTRI_LAPACK( UPLO, DIAG, N, A, LDA, INFO ) | |||||
SUBROUTINE DTRTRILAPACK( UPLO, DIAG, N, A, LDA, INFO ) | |||||
* | * | ||||
* -- LAPACK computational routine (version 3.4.0) -- | * -- LAPACK computational routine (version 3.4.0) -- | ||||
* -- LAPACK is a software package provided by Univ. of Tennessee, -- | * -- LAPACK is a software package provided by Univ. of Tennessee, -- | ||||