@@ -17,24 +17,28 @@ goto :: slinpack.goto dlinpack.goto clinpack.goto zlinpack.goto \ | |||||
scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | scholesky.goto dcholesky.goto ccholesky.goto zcholesky.goto \ | ||||
sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | sgemm.goto dgemm.goto cgemm.goto zgemm.goto \ | ||||
strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | strmm.goto dtrmm.goto ctrmm.goto ztrmm.goto \ | ||||
strsm.goto dtrsm.goto ctrsm.goto ztrsm.goto \ | |||||
ssymm.goto dsymm.goto csymm.goto zsymm.goto | ssymm.goto dsymm.goto csymm.goto zsymm.goto | ||||
acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | acml :: slinpack.acml dlinpack.acml clinpack.acml zlinpack.acml \ | ||||
scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | scholesky.acml dcholesky.acml ccholesky.acml zcholesky.acml \ | ||||
sgemm.acml dgemm.acml cgemm.acml zgemm.acml \ | sgemm.acml dgemm.acml cgemm.acml zgemm.acml \ | ||||
strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \ | strmm.acml dtrmm.acml ctrmm.acml ztrmm.acml \ | ||||
strsm.acml dtrsm.acml ctrsm.acml ztrsm.acml \ | |||||
ssymm.acml dsymm.acml csymm.acml zsymm.acml | ssymm.acml dsymm.acml csymm.acml zsymm.acml | ||||
atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | atlas :: slinpack.atlas dlinpack.atlas clinpack.atlas zlinpack.atlas \ | ||||
scholesky.atlas dcholesky.atlas ccholesky.atlas zcholesky.atlas \ | scholesky.atlas dcholesky.atlas ccholesky.atlas zcholesky.atlas \ | ||||
sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \ | sgemm.atlas dgemm.atlas cgemm.atlas zgemm.atlas \ | ||||
strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \ | strmm.atlas dtrmm.atlas ctrmm.atlas ztrmm.atlas \ | ||||
strsm.atlas dtrsm.atlas ctrsm.atlas ztrsm.atlas \ | |||||
ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | ssymm.atlas dsymm.atlas csymm.atlas zsymm.atlas | ||||
mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | mkl :: slinpack.mkl dlinpack.mkl clinpack.mkl zlinpack.mkl \ | ||||
scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | scholesky.mkl dcholesky.mkl ccholesky.mkl zcholesky.mkl \ | ||||
sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \ | sgemm.mkl dgemm.mkl cgemm.mkl zgemm.mkl \ | ||||
strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \ | strmm.mkl dtrmm.mkl ctrmm.mkl ztrmm.mkl \ | ||||
strsm.mkl dtrsm.mkl ctrsm.mkl ztrsm.mkl \ | |||||
ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | ssymm.mkl dsymm.mkl csymm.mkl zsymm.mkl | ||||
all :: goto atlas acml mkl | all :: goto atlas acml mkl | ||||
@@ -316,6 +320,61 @@ ztrmm.mkl : ztrmm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | -$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | ||||
##################################### Strsm #################################################### | |||||
strsm.goto : strsm.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
strsm.acml : strsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
strsm.atlas : strsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
strsm.mkl : strsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Dtrsm #################################################### | |||||
dtrsm.goto : dtrsm.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
dtrsm.acml : dtrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
dtrsm.atlas : dtrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
dtrsm.mkl : dtrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ctrsm #################################################### | |||||
ctrsm.goto : ctrsm.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
ctrsm.acml : ctrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
ctrsm.atlas : ctrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
ctrsm.mkl : ctrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
##################################### Ztrsm #################################################### | |||||
ztrsm.goto : ztrsm.$(SUFFIX) ../$(LIBNAME) | |||||
$(CC) $(CFLAGS) -o $(@F) $^ $(CEXTRALIB) $(EXTRALIB) -lm | |||||
ztrsm.acml : ztrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBACML) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
ztrsm.atlas : ztrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBATLAS) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
ztrsm.mkl : ztrsm.$(SUFFIX) | |||||
-$(CC) $(CFLAGS) -o $(@F) $^ $(LIBMKL) $(CEXTRALIB) $(EXTRALIB) $(FEXTRALIB) | |||||
################################################################################################### | ################################################################################################### | ||||
@@ -380,7 +439,17 @@ ctrmm.$(SUFFIX) : trmm.c | |||||
ztrmm.$(SUFFIX) : trmm.c | ztrmm.$(SUFFIX) : trmm.c | ||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | $(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | ||||
strsm.$(SUFFIX) : trsm.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
dtrsm.$(SUFFIX) : trsm.c | |||||
$(CC) $(CFLAGS) -c -UCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
ctrsm.$(SUFFIX) : trsm.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -UDOUBLE -o $(@F) $^ | |||||
ztrsm.$(SUFFIX) : trsm.c | |||||
$(CC) $(CFLAGS) -c -DCOMPLEX -DDOUBLE -o $(@F) $^ | |||||
clean :: | clean :: | ||||
@@ -0,0 +1,202 @@ | |||||
/*************************************************************************** | |||||
Copyright (c) 2014, The OpenBLAS Project | |||||
All rights reserved. | |||||
Redistribution and use in source and binary forms, with or without | |||||
modification, are permitted provided that the following conditions are | |||||
met: | |||||
1. Redistributions of source code must retain the above copyright | |||||
notice, this list of conditions and the following disclaimer. | |||||
2. Redistributions in binary form must reproduce the above copyright | |||||
notice, this list of conditions and the following disclaimer in | |||||
the documentation and/or other materials provided with the | |||||
distribution. | |||||
3. Neither the name of the OpenBLAS project nor the names of | |||||
its contributors may be used to endorse or promote products | |||||
derived from this software without specific prior written permission. | |||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |||||
ARE DISCLAIMED. IN NO EVENT SHALL THE OPENBLAS PROJECT OR CONTRIBUTORS BE | |||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |||||
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |||||
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |||||
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE | |||||
USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
*****************************************************************************/ | |||||
#include <stdio.h> | |||||
#include <stdlib.h> | |||||
#ifdef __CYGWIN32__ | |||||
#include <sys/time.h> | |||||
#endif | |||||
#include "common.h" | |||||
#undef TRSM | |||||
#ifndef COMPLEX | |||||
#ifdef DOUBLE | |||||
#define TRSM BLASFUNC(dtrsm) | |||||
#else | |||||
#define TRSM BLASFUNC(strsm) | |||||
#endif | |||||
#else | |||||
#ifdef DOUBLE | |||||
#define TRSM BLASFUNC(ztrsm) | |||||
#else | |||||
#define TRSM BLASFUNC(ctrsm) | |||||
#endif | |||||
#endif | |||||
#if defined(__WIN32__) || defined(__WIN64__) | |||||
#ifndef DELTA_EPOCH_IN_MICROSECS | |||||
#define DELTA_EPOCH_IN_MICROSECS 11644473600000000ULL | |||||
#endif | |||||
int gettimeofday(struct timeval *tv, void *tz){ | |||||
FILETIME ft; | |||||
unsigned __int64 tmpres = 0; | |||||
static int tzflag; | |||||
if (NULL != tv) | |||||
{ | |||||
GetSystemTimeAsFileTime(&ft); | |||||
tmpres |= ft.dwHighDateTime; | |||||
tmpres <<= 32; | |||||
tmpres |= ft.dwLowDateTime; | |||||
/*converting file time to unix epoch*/ | |||||
tmpres /= 10; /*convert into microseconds*/ | |||||
tmpres -= DELTA_EPOCH_IN_MICROSECS; | |||||
tv->tv_sec = (long)(tmpres / 1000000UL); | |||||
tv->tv_usec = (long)(tmpres % 1000000UL); | |||||
} | |||||
return 0; | |||||
} | |||||
#endif | |||||
#if !defined(__WIN32__) && !defined(__WIN64__) && !defined(__CYGWIN32__) && 0 | |||||
static void *huge_malloc(BLASLONG size){ | |||||
int shmid; | |||||
void *address; | |||||
#ifndef SHM_HUGETLB | |||||
#define SHM_HUGETLB 04000 | |||||
#endif | |||||
if ((shmid =shmget(IPC_PRIVATE, | |||||
(size + HUGE_PAGESIZE) & ~(HUGE_PAGESIZE - 1), | |||||
SHM_HUGETLB | IPC_CREAT |0600)) < 0) { | |||||
printf( "Memory allocation failed(shmget).\n"); | |||||
exit(1); | |||||
} | |||||
address = shmat(shmid, NULL, SHM_RND); | |||||
if ((BLASLONG)address == -1){ | |||||
printf( "Memory allocation failed(shmat).\n"); | |||||
exit(1); | |||||
} | |||||
shmctl(shmid, IPC_RMID, 0); | |||||
return address; | |||||
} | |||||
#define malloc huge_malloc | |||||
#endif | |||||
int MAIN__(int argc, char *argv[]){ | |||||
FLOAT *a, *b; | |||||
FLOAT alpha[] = {1.0, 1.0}; | |||||
FLOAT beta [] = {1.0, 1.0}; | |||||
char *p; | |||||
char side ='L'; | |||||
char uplo ='U'; | |||||
char trans='N'; | |||||
char diag ='U'; | |||||
if ((p = getenv("OPENBLAS_SIDE"))) side=*p; | |||||
if ((p = getenv("OPENBLAS_UPLO"))) uplo=*p; | |||||
if ((p = getenv("OPENBLAS_TRANS"))) trans=*p; | |||||
if ((p = getenv("OPENBLAS_DIAG"))) diag=*p; | |||||
blasint m, i, j; | |||||
int from = 1; | |||||
int to = 200; | |||||
int step = 1; | |||||
struct timeval start, stop; | |||||
double time1; | |||||
argc--;argv++; | |||||
if (argc > 0) { from = atol(*argv); argc--; argv++;} | |||||
if (argc > 0) { to = MAX(atol(*argv), from); argc--; argv++;} | |||||
if (argc > 0) { step = atol(*argv); argc--; argv++;} | |||||
fprintf(stderr, "From : %3d To : %3d Step = %3d Side = %c Uplo = %c Trans = %c Diag = %c\n", from, to, step,side,uplo,trans,diag); | |||||
if (( a = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
if (( b = (FLOAT *)malloc(sizeof(FLOAT) * to * to * COMPSIZE)) == NULL){ | |||||
fprintf(stderr,"Out of Memory!!\n");exit(1); | |||||
} | |||||
#ifdef linux | |||||
srandom(getpid()); | |||||
#endif | |||||
fprintf(stderr, " SIZE Flops\n"); | |||||
for(m = from; m <= to; m += step) | |||||
{ | |||||
fprintf(stderr, " %6d : ", (int)m); | |||||
for(j = 0; j < m; j++){ | |||||
for(i = 0; i < m * COMPSIZE; i++){ | |||||
a[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
b[i + j * m * COMPSIZE] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
} | |||||
} | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
TRSM (&side, &uplo, &trans, &diag, &m, &m, alpha, a, &m, b, &m); | |||||
gettimeofday( &stop, (struct timezone *)0); | |||||
time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
gettimeofday( &start, (struct timezone *)0); | |||||
fprintf(stderr, | |||||
" %10.2f MFlops\n", | |||||
COMPSIZE * COMPSIZE * 1. * (double)m * (double)m * (double)m / time1 * 1.e-6); | |||||
} | |||||
return 0; | |||||
} | |||||
void main(int argc, char *argv[]) __attribute__((weak, alias("MAIN__"))); |