| @@ -558,6 +558,7 @@ DYNAMIC_CORE += FALKOR | |||
| DYNAMIC_CORE += THUNDERX | |||
| DYNAMIC_CORE += THUNDERX2T99 | |||
| DYNAMIC_CORE += TSV110 | |||
| DYNAMIC_CORE += EMAG8180 | |||
| endif | |||
| ifeq ($(ARCH), zarch) | |||
| @@ -129,7 +129,10 @@ int main(int argc, char *argv[]){ | |||
| int step = 1; | |||
| struct timeval start, stop; | |||
| double time1,timeg; | |||
| double time1 = 0.0, timeg = 0.0; | |||
| long nanos = 0; | |||
| time_t seconds = 0; | |||
| struct timespec time_start = { 0, 0 }, time_end = { 0, 0 }; | |||
| argc--;argv++; | |||
| @@ -163,35 +166,32 @@ int main(int argc, char *argv[]){ | |||
| timeg=0; | |||
| fprintf(stderr, " %6d : ", (int)m); | |||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| for (l=0; l<loops; l++) | |||
| { | |||
| clock_gettime(CLOCK_REALTIME, &time_start); | |||
| COPY (&m, x, &inc_x, y, &inc_y ); | |||
| clock_gettime(CLOCK_REALTIME, &time_end); | |||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| nanos = time_end.tv_nsec - time_start.tv_nsec; | |||
| seconds = time_end.tv_sec - time_start.tv_sec; | |||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||
| } | |||
| gettimeofday( &start, (struct timezone *)0); | |||
| time1 = seconds + nanos / 1.e9; | |||
| timeg += time1; | |||
| } | |||
| COPY (&m, x, &inc_x, y, &inc_y ); | |||
| timeg /= loops; | |||
| gettimeofday( &stop, (struct timezone *)0); | |||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||
| timeg += time1; | |||
| } | |||
| timeg /= loops; | |||
| fprintf(stderr, | |||
| " %10.2f MBytes %10.6f sec\n", | |||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||
| fprintf(stderr, | |||
| " %10.2f MBytes %12.9f sec\n", | |||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg); | |||
| } | |||
| @@ -45,7 +45,7 @@ endif () | |||
| if (DYNAMIC_ARCH) | |||
| if (ARM64) | |||
| set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110) | |||
| set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180) | |||
| endif () | |||
| if (POWER) | |||
| @@ -332,6 +332,29 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||
| set(ZGEMM_UNROLL_M 4) | |||
| set(ZGEMM_UNROLL_N 4) | |||
| set(SYMV_P 16) | |||
| elseif ("${TCORE}" STREQUAL "EMAG8180") | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define ARMV8\n" | |||
| "#define L1_CODE_SIZE\t32768\n" | |||
| "#define L1_CODE_LINESIZE\t64\n" | |||
| "#define L1_CODE_ASSOCIATIVE\t4\n" | |||
| "#define L1_DATA_SIZE\t32768\n" | |||
| "#define L1_DATA_LINESIZE\t64\n" | |||
| "#define L1_DATA_ASSOCIATIVE\t4\n" | |||
| "#define L2_SIZE\t5262144\n" | |||
| "#define L2_LINESIZE\t64\n" | |||
| "#define L2_ASSOCIATIVE\t8\n" | |||
| "#define DTB_DEFAULT_ENTRIES\t64\n" | |||
| "#define DTB_SIZE\t4096\n") | |||
| set(SGEMM_UNROLL_M 16) | |||
| set(SGEMM_UNROLL_N 4) | |||
| set(DGEMM_UNROLL_M 8) | |||
| set(DGEMM_UNROLL_N 4) | |||
| set(CGEMM_UNROLL_M 8) | |||
| set(CGEMM_UNROLL_N 4) | |||
| set(ZGEMM_UNROLL_M 4) | |||
| set(ZGEMM_UNROLL_N 4) | |||
| set(SYMV_P 16) | |||
| elseif ("${TCORE}" STREQUAL "POWER6") | |||
| file(APPEND ${TARGET_CONF_TEMP} | |||
| "#define L1_DATA_SIZE 32768\n" | |||
| @@ -41,6 +41,8 @@ | |||
| #define CPU_THUNDERX2T99 8 | |||
| //Hisilicon | |||
| #define CPU_TSV110 9 | |||
| // Ampere | |||
| #define CPU_EMAG8180 10 | |||
| static char *cpuname[] = { | |||
| "UNKNOWN", | |||
| @@ -52,7 +54,8 @@ static char *cpuname[] = { | |||
| "FALKOR", | |||
| "THUNDERX", | |||
| "THUNDERX2T99", | |||
| "TSV110" | |||
| "TSV110", | |||
| "EMAG8180" | |||
| }; | |||
| static char *cpuname_lower[] = { | |||
| @@ -65,7 +68,8 @@ static char *cpuname_lower[] = { | |||
| "falkor", | |||
| "thunderx", | |||
| "thunderx2t99", | |||
| "tsv110" | |||
| "tsv110", | |||
| "emag8180" | |||
| }; | |||
| int get_feature(char *search) | |||
| @@ -152,6 +156,9 @@ int detect(void) | |||
| // HiSilicon | |||
| else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) | |||
| return CPU_TSV110; | |||
| // Ampere | |||
| else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) | |||
| return CPU_EMAG8180; | |||
| } | |||
| p = (char *) NULL ; | |||
| @@ -335,6 +342,18 @@ void get_cpuconfig(void) | |||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | |||
| printf("#define DTB_SIZE 4096 \n"); | |||
| break; | |||
| case CPU_EMAG8180: | |||
| // Minimum parameters for ARMv8 (based on A53) | |||
| printf("#define EMAG8180\n"); | |||
| printf("#define L1_CODE_SIZE 32768\n"); | |||
| printf("#define L1_DATA_SIZE 32768\n"); | |||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||
| printf("#define L2_SIZE 262144\n"); | |||
| printf("#define L2_LINESIZE 64\n"); | |||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||
| printf("#define DTB_SIZE 4096\n"); | |||
| } | |||
| get_cpucount(); | |||
| } | |||
| @@ -51,10 +51,11 @@ extern gotoblas_t gotoblas_FALKOR; | |||
| extern gotoblas_t gotoblas_THUNDERX; | |||
| extern gotoblas_t gotoblas_THUNDERX2T99; | |||
| extern gotoblas_t gotoblas_TSV110; | |||
| extern gotoblas_t gotoblas_EMAG8180; | |||
| extern void openblas_warning(int verbose, const char * msg); | |||
| #define NUM_CORETYPES 9 | |||
| #define NUM_CORETYPES 10 | |||
| /* | |||
| * In case asm/hwcap.h is outdated on the build system, make sure | |||
| @@ -78,6 +79,7 @@ static char *corename[] = { | |||
| "thunderx", | |||
| "thunderx2t99", | |||
| "tsv110", | |||
| "emag8180", | |||
| "unknown" | |||
| }; | |||
| @@ -91,6 +93,7 @@ char *gotoblas_corename(void) { | |||
| if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; | |||
| if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; | |||
| if (gotoblas == &gotoblas_TSV110) return corename[ 8]; | |||
| if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; | |||
| return corename[NUM_CORETYPES]; | |||
| } | |||
| @@ -119,6 +122,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||
| case 6: return (&gotoblas_THUNDERX); | |||
| case 7: return (&gotoblas_THUNDERX2T99); | |||
| case 8: return (&gotoblas_TSV110); | |||
| case 9: return (&gotoblas_EMAG8180); | |||
| } | |||
| snprintf(message, 128, "Core not found: %s\n", coretype); | |||
| openblas_warning(1, message); | |||
| @@ -189,6 +193,13 @@ static gotoblas_t *get_coretype(void) { | |||
| return &gotoblas_TSV110; | |||
| } | |||
| break; | |||
| case 0x50: // Ampere | |||
| switch (part) | |||
| { | |||
| case 0x000: // Skylark/EMAG8180 | |||
| return &gotoblas_EMAG8180; | |||
| } | |||
| break; | |||
| case 0x51: // Qualcomm | |||
| switch (part) | |||
| { | |||
| @@ -1093,6 +1093,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #else | |||
| #endif | |||
| #ifdef FORCE_EMAG8180 | |||
| #define ARMV8 | |||
| #define FORCE | |||
| #define ARCHITECTURE "ARM64" | |||
| #define SUBARCHITECTURE "EMAG8180" | |||
| #define SUBDIRNAME "arm64" | |||
| #define ARCHCONFIG "-DEMAG8180 " \ | |||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||
| #define LIBNAME "emag8180" | |||
| #define CORENAME "EMAG8180" | |||
| #endif | |||
| #ifdef FORCE_ZARCH_GENERIC | |||
| #define FORCE | |||
| @@ -0,0 +1,3 @@ | |||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||
| @@ -167,3 +167,27 @@ endif | |||
| CGEMM3MKERNEL = zgemm3m_kernel.S | |||
| ZGEMM3MKERNEL = zgemm3m_kernel.S | |||
| ifndef ISMINKERNEL | |||
| ISMINKERNEL = imin.S | |||
| endif | |||
| ifndef IDMINKERNEL | |||
| IDMINKERNEL = imin.S | |||
| endif | |||
| ifndef IQMINKERNEL | |||
| IQMINKERNEL = imin.S | |||
| endif | |||
| ifndef ISMAXKERNEL | |||
| ISMAXKERNEL = imax.S | |||
| endif | |||
| ifndef IDMAXKERNEL | |||
| IDMAXKERNEL = imax.S | |||
| endif | |||
| ifndef IQMAXKERNEL | |||
| IQMAXKERNEL = imax.S | |||
| endif | |||
| @@ -50,3 +50,26 @@ ifndef DSDOTKERNEL | |||
| DSDOTKERNEL = ../generic/dot.c | |||
| endif | |||
| ifndef ISMINKERNEL | |||
| ISMINKERNEL = imin.S | |||
| endif | |||
| ifndef IDMINKERNEL | |||
| IDMINKERNEL = imin.S | |||
| endif | |||
| ifndef IQMINKERNEL | |||
| IQMINKERNEL = imin.S | |||
| endif | |||
| ifndef ISMAXKERNEL | |||
| ISMAXKERNEL = imax.S | |||
| endif | |||
| ifndef IDMAXKERNEL | |||
| IDMAXKERNEL = imax.S | |||
| endif | |||
| ifndef IQMAXKERNEL | |||
| IQMAXKERNEL = imax.S | |||
| endif | |||
| @@ -80,6 +80,8 @@ | |||
| IF( V1( 1 ).GE.V2( 1 ) ) THEN | |||
| IF( V1( 1 ).NE.ZERO ) THEN | |||
| V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | |||
| ELSE | |||
| V1( 2 ) = V1( 2 ) + V2( 2 ) | |||
| END IF | |||
| ELSE | |||
| V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | |||
| @@ -80,6 +80,8 @@ | |||
| IF( V1( 1 ).GE.V2( 1 ) ) THEN | |||
| IF( V1( 1 ).NE.ZERO ) THEN | |||
| V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | |||
| ELSE | |||
| V1( 2 ) = V1( 2 ) + V2( 2 ) | |||
| END IF | |||
| ELSE | |||
| V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | |||
| @@ -2603,7 +2603,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
| #if defined(CORTEXA53) || defined(CORTEXA57) || \ | |||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | |||
| defined(FALKOR) || defined(TSV110) | |||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) | |||
| #define SGEMM_DEFAULT_UNROLL_M 16 | |||
| #define SGEMM_DEFAULT_UNROLL_N 4 | |||