Browse Source

Merge pull request #34 from xianyi/develop

rebase
tags/v0.3.9
Martin Kroeker GitHub 5 years ago
parent
commit
531c6b96d6
No known key found for this signature in database GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 151 additions and 27 deletions
  1. +1
    -0
      Makefile.system
  2. +22
    -22
      benchmark/copy.c
  3. +1
    -1
      cmake/arch.cmake
  4. +23
    -0
      cmake/prebuild.cmake
  5. +21
    -2
      cpuid_arm64.c
  6. +12
    -1
      driver/others/dynamic_arm64.c
  7. +16
    -0
      getarch.c
  8. +3
    -0
      kernel/arm64/KERNEL.EMAG8180
  9. +24
    -0
      kernel/mips64/KERNEL
  10. +23
    -0
      kernel/power/KERNEL
  11. +2
    -0
      lapack-netlib/SRC/dcombssq.f
  12. +2
    -0
      lapack-netlib/SRC/scombssq.f
  13. +1
    -1
      param.h

+ 1
- 0
Makefile.system View File

@@ -558,6 +558,7 @@ DYNAMIC_CORE += FALKOR
DYNAMIC_CORE += THUNDERX DYNAMIC_CORE += THUNDERX
DYNAMIC_CORE += THUNDERX2T99 DYNAMIC_CORE += THUNDERX2T99
DYNAMIC_CORE += TSV110 DYNAMIC_CORE += TSV110
DYNAMIC_CORE += EMAG8180
endif endif


ifeq ($(ARCH), zarch) ifeq ($(ARCH), zarch)


+ 22
- 22
benchmark/copy.c View File

@@ -129,7 +129,10 @@ int main(int argc, char *argv[]){
int step = 1; int step = 1;


struct timeval start, stop; struct timeval start, stop;
double time1,timeg;
double time1 = 0.0, timeg = 0.0;
long nanos = 0;
time_t seconds = 0;
struct timespec time_start = { 0, 0 }, time_end = { 0, 0 };


argc--;argv++; argc--;argv++;


@@ -163,35 +166,32 @@ int main(int argc, char *argv[]){
timeg=0; timeg=0;


fprintf(stderr, " %6d : ", (int)m); fprintf(stderr, " %6d : ", (int)m);
for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}


for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}


for (l=0; l<loops; l++) for (l=0; l<loops; l++)
{ {
clock_gettime(CLOCK_REALTIME, &time_start);
COPY (&m, x, &inc_x, y, &inc_y );
clock_gettime(CLOCK_REALTIME, &time_end);


for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){
x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
nanos = time_end.tv_nsec - time_start.tv_nsec;
seconds = time_end.tv_sec - time_start.tv_sec;


for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){
y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5;
}
gettimeofday( &start, (struct timezone *)0);
time1 = seconds + nanos / 1.e9;
timeg += time1;
}


COPY (&m, x, &inc_x, y, &inc_y );
timeg /= loops;


gettimeofday( &stop, (struct timezone *)0);

time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6;

timeg += time1;

}

timeg /= loops;

fprintf(stderr,
" %10.2f MBytes %10.6f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg);
fprintf(stderr,
" %10.2f MBytes %12.9f sec\n",
COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg);


} }




+ 1
- 1
cmake/arch.cmake View File

@@ -45,7 +45,7 @@ endif ()


if (DYNAMIC_ARCH) if (DYNAMIC_ARCH)
if (ARM64) if (ARM64)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110)
set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180)
endif () endif ()
if (POWER) if (POWER)


+ 23
- 0
cmake/prebuild.cmake View File

@@ -332,6 +332,29 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS
set(ZGEMM_UNROLL_M 4) set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4) set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16) set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "EMAG8180")
file(APPEND ${TARGET_CONF_TEMP}
"#define ARMV8\n"
"#define L1_CODE_SIZE\t32768\n"
"#define L1_CODE_LINESIZE\t64\n"
"#define L1_CODE_ASSOCIATIVE\t4\n"
"#define L1_DATA_SIZE\t32768\n"
"#define L1_DATA_LINESIZE\t64\n"
"#define L1_DATA_ASSOCIATIVE\t4\n"
"#define L2_SIZE\t5262144\n"
"#define L2_LINESIZE\t64\n"
"#define L2_ASSOCIATIVE\t8\n"
"#define DTB_DEFAULT_ENTRIES\t64\n"
"#define DTB_SIZE\t4096\n")
set(SGEMM_UNROLL_M 16)
set(SGEMM_UNROLL_N 4)
set(DGEMM_UNROLL_M 8)
set(DGEMM_UNROLL_N 4)
set(CGEMM_UNROLL_M 8)
set(CGEMM_UNROLL_N 4)
set(ZGEMM_UNROLL_M 4)
set(ZGEMM_UNROLL_N 4)
set(SYMV_P 16)
elseif ("${TCORE}" STREQUAL "POWER6") elseif ("${TCORE}" STREQUAL "POWER6")
file(APPEND ${TARGET_CONF_TEMP} file(APPEND ${TARGET_CONF_TEMP}
"#define L1_DATA_SIZE 32768\n" "#define L1_DATA_SIZE 32768\n"


+ 21
- 2
cpuid_arm64.c View File

@@ -41,6 +41,8 @@
#define CPU_THUNDERX2T99 8 #define CPU_THUNDERX2T99 8
//Hisilicon //Hisilicon
#define CPU_TSV110 9 #define CPU_TSV110 9
// Ampere
#define CPU_EMAG8180 10


static char *cpuname[] = { static char *cpuname[] = {
"UNKNOWN", "UNKNOWN",
@@ -52,7 +54,8 @@ static char *cpuname[] = {
"FALKOR", "FALKOR",
"THUNDERX", "THUNDERX",
"THUNDERX2T99", "THUNDERX2T99",
"TSV110"
"TSV110",
"EMAG8180"
}; };


static char *cpuname_lower[] = { static char *cpuname_lower[] = {
@@ -65,7 +68,8 @@ static char *cpuname_lower[] = {
"falkor", "falkor",
"thunderx", "thunderx",
"thunderx2t99", "thunderx2t99",
"tsv110"
"tsv110",
"emag8180"
}; };


int get_feature(char *search) int get_feature(char *search)
@@ -152,6 +156,9 @@ int detect(void)
// HiSilicon // HiSilicon
else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01"))
return CPU_TSV110; return CPU_TSV110;
// Ampere
else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000"))
return CPU_EMAG8180;
} }


p = (char *) NULL ; p = (char *) NULL ;
@@ -335,6 +342,18 @@ void get_cpuconfig(void)
printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_DEFAULT_ENTRIES 64 \n");
printf("#define DTB_SIZE 4096 \n"); printf("#define DTB_SIZE 4096 \n");
break; break;

case CPU_EMAG8180:
// Minimum parameters for ARMv8 (based on A53)
printf("#define EMAG8180\n");
printf("#define L1_CODE_SIZE 32768\n");
printf("#define L1_DATA_SIZE 32768\n");
printf("#define L1_DATA_LINESIZE 64\n");
printf("#define L2_SIZE 262144\n");
printf("#define L2_LINESIZE 64\n");
printf("#define DTB_DEFAULT_ENTRIES 64\n");
printf("#define DTB_SIZE 4096\n");

} }
get_cpucount(); get_cpucount();
} }


+ 12
- 1
driver/others/dynamic_arm64.c View File

@@ -51,10 +51,11 @@ extern gotoblas_t gotoblas_FALKOR;
extern gotoblas_t gotoblas_THUNDERX; extern gotoblas_t gotoblas_THUNDERX;
extern gotoblas_t gotoblas_THUNDERX2T99; extern gotoblas_t gotoblas_THUNDERX2T99;
extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_TSV110;
extern gotoblas_t gotoblas_EMAG8180;


extern void openblas_warning(int verbose, const char * msg); extern void openblas_warning(int verbose, const char * msg);


#define NUM_CORETYPES 9
#define NUM_CORETYPES 10


/* /*
* In case asm/hwcap.h is outdated on the build system, make sure * In case asm/hwcap.h is outdated on the build system, make sure
@@ -78,6 +79,7 @@ static char *corename[] = {
"thunderx", "thunderx",
"thunderx2t99", "thunderx2t99",
"tsv110", "tsv110",
"emag8180",
"unknown" "unknown"
}; };


@@ -91,6 +93,7 @@ char *gotoblas_corename(void) {
if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; if (gotoblas == &gotoblas_THUNDERX) return corename[ 6];
if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7];
if (gotoblas == &gotoblas_TSV110) return corename[ 8]; if (gotoblas == &gotoblas_TSV110) return corename[ 8];
if (gotoblas == &gotoblas_EMAG8180) return corename[ 9];
return corename[NUM_CORETYPES]; return corename[NUM_CORETYPES];
} }


@@ -119,6 +122,7 @@ static gotoblas_t *force_coretype(char *coretype) {
case 6: return (&gotoblas_THUNDERX); case 6: return (&gotoblas_THUNDERX);
case 7: return (&gotoblas_THUNDERX2T99); case 7: return (&gotoblas_THUNDERX2T99);
case 8: return (&gotoblas_TSV110); case 8: return (&gotoblas_TSV110);
case 9: return (&gotoblas_EMAG8180);
} }
snprintf(message, 128, "Core not found: %s\n", coretype); snprintf(message, 128, "Core not found: %s\n", coretype);
openblas_warning(1, message); openblas_warning(1, message);
@@ -189,6 +193,13 @@ static gotoblas_t *get_coretype(void) {
return &gotoblas_TSV110; return &gotoblas_TSV110;
} }
break; break;
case 0x50: // Ampere
switch (part)
{
case 0x000: // Skylark/EMAG8180
return &gotoblas_EMAG8180;
}
break;
case 0x51: // Qualcomm case 0x51: // Qualcomm
switch (part) switch (part)
{ {


+ 16
- 0
getarch.c View File

@@ -1093,6 +1093,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#else #else
#endif #endif


#ifdef FORCE_EMAG8180
#define ARMV8
#define FORCE
#define ARCHITECTURE "ARM64"
#define SUBARCHITECTURE "EMAG8180"
#define SUBDIRNAME "arm64"
#define ARCHCONFIG "-DEMAG8180 " \
"-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \
"-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \
"-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \
"-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \
"-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \
"-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8"
#define LIBNAME "emag8180"
#define CORENAME "EMAG8180"
#endif


#ifdef FORCE_ZARCH_GENERIC #ifdef FORCE_ZARCH_GENERIC
#define FORCE #define FORCE


+ 3
- 0
kernel/arm64/KERNEL.EMAG8180 View File

@@ -0,0 +1,3 @@
include $(KERNELDIR)/KERNEL.CORTEXA57



+ 24
- 0
kernel/mips64/KERNEL View File

@@ -167,3 +167,27 @@ endif


CGEMM3MKERNEL = zgemm3m_kernel.S CGEMM3MKERNEL = zgemm3m_kernel.S
ZGEMM3MKERNEL = zgemm3m_kernel.S ZGEMM3MKERNEL = zgemm3m_kernel.S

ifndef ISMINKERNEL
ISMINKERNEL = imin.S
endif

ifndef IDMINKERNEL
IDMINKERNEL = imin.S
endif

ifndef IQMINKERNEL
IQMINKERNEL = imin.S
endif

ifndef ISMAXKERNEL
ISMAXKERNEL = imax.S
endif

ifndef IDMAXKERNEL
IDMAXKERNEL = imax.S
endif

ifndef IQMAXKERNEL
IQMAXKERNEL = imax.S
endif

+ 23
- 0
kernel/power/KERNEL View File

@@ -50,3 +50,26 @@ ifndef DSDOTKERNEL
DSDOTKERNEL = ../generic/dot.c DSDOTKERNEL = ../generic/dot.c
endif endif


ifndef ISMINKERNEL
ISMINKERNEL = imin.S
endif

ifndef IDMINKERNEL
IDMINKERNEL = imin.S
endif

ifndef IQMINKERNEL
IQMINKERNEL = imin.S
endif

ifndef ISMAXKERNEL
ISMAXKERNEL = imax.S
endif

ifndef IDMAXKERNEL
IDMAXKERNEL = imax.S
endif

ifndef IQMAXKERNEL
IQMAXKERNEL = imax.S
endif

+ 2
- 0
lapack-netlib/SRC/dcombssq.f View File

@@ -80,6 +80,8 @@
IF( V1( 1 ).GE.V2( 1 ) ) THEN IF( V1( 1 ).GE.V2( 1 ) ) THEN
IF( V1( 1 ).NE.ZERO ) THEN IF( V1( 1 ).NE.ZERO ) THEN
V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 )
ELSE
V1( 2 ) = V1( 2 ) + V2( 2 )
END IF END IF
ELSE ELSE
V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 )


+ 2
- 0
lapack-netlib/SRC/scombssq.f View File

@@ -80,6 +80,8 @@
IF( V1( 1 ).GE.V2( 1 ) ) THEN IF( V1( 1 ).GE.V2( 1 ) ) THEN
IF( V1( 1 ).NE.ZERO ) THEN IF( V1( 1 ).NE.ZERO ) THEN
V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 )
ELSE
V1( 2 ) = V1( 2 ) + V2( 2 )
END IF END IF
ELSE ELSE
V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 )


+ 1
- 1
param.h View File

@@ -2603,7 +2603,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#if defined(CORTEXA53) || defined(CORTEXA57) || \ #if defined(CORTEXA53) || defined(CORTEXA57) || \
defined(CORTEXA72) || defined(CORTEXA73) || \ defined(CORTEXA72) || defined(CORTEXA73) || \
defined(FALKOR) || defined(TSV110)
defined(FALKOR) || defined(TSV110) || defined(EMAG8180)


#define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_M 16
#define SGEMM_DEFAULT_UNROLL_N 4 #define SGEMM_DEFAULT_UNROLL_N 4


Loading…
Cancel
Save