| @@ -558,6 +558,7 @@ DYNAMIC_CORE += FALKOR | |||||
| DYNAMIC_CORE += THUNDERX | DYNAMIC_CORE += THUNDERX | ||||
| DYNAMIC_CORE += THUNDERX2T99 | DYNAMIC_CORE += THUNDERX2T99 | ||||
| DYNAMIC_CORE += TSV110 | DYNAMIC_CORE += TSV110 | ||||
| DYNAMIC_CORE += EMAG8180 | |||||
| endif | endif | ||||
| ifeq ($(ARCH), zarch) | ifeq ($(ARCH), zarch) | ||||
| @@ -129,7 +129,10 @@ int main(int argc, char *argv[]){ | |||||
| int step = 1; | int step = 1; | ||||
| struct timeval start, stop; | struct timeval start, stop; | ||||
| double time1,timeg; | |||||
| double time1 = 0.0, timeg = 0.0; | |||||
| long nanos = 0; | |||||
| time_t seconds = 0; | |||||
| struct timespec time_start = { 0, 0 }, time_end = { 0, 0 }; | |||||
| argc--;argv++; | argc--;argv++; | ||||
| @@ -163,35 +166,32 @@ int main(int argc, char *argv[]){ | |||||
| timeg=0; | timeg=0; | ||||
| fprintf(stderr, " %6d : ", (int)m); | fprintf(stderr, " %6d : ", (int)m); | ||||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| for (l=0; l<loops; l++) | for (l=0; l<loops; l++) | ||||
| { | { | ||||
| clock_gettime(CLOCK_REALTIME, &time_start); | |||||
| COPY (&m, x, &inc_x, y, &inc_y ); | |||||
| clock_gettime(CLOCK_REALTIME, &time_end); | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_x); i++){ | |||||
| x[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| nanos = time_end.tv_nsec - time_start.tv_nsec; | |||||
| seconds = time_end.tv_sec - time_start.tv_sec; | |||||
| for(i = 0; i < m * COMPSIZE * abs(inc_y); i++){ | |||||
| y[i] = ((FLOAT) rand() / (FLOAT) RAND_MAX) - 0.5; | |||||
| } | |||||
| gettimeofday( &start, (struct timezone *)0); | |||||
| time1 = seconds + nanos / 1.e9; | |||||
| timeg += time1; | |||||
| } | |||||
| COPY (&m, x, &inc_x, y, &inc_y ); | |||||
| timeg /= loops; | |||||
| gettimeofday( &stop, (struct timezone *)0); | |||||
| time1 = (double)(stop.tv_sec - start.tv_sec) + (double)((stop.tv_usec - start.tv_usec)) * 1.e-6; | |||||
| timeg += time1; | |||||
| } | |||||
| timeg /= loops; | |||||
| fprintf(stderr, | |||||
| " %10.2f MBytes %10.6f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg * 1.e-6, timeg); | |||||
| fprintf(stderr, | |||||
| " %10.2f MBytes %12.9f sec\n", | |||||
| COMPSIZE * sizeof(FLOAT) * 1. * (double)m / timeg / 1.e6, timeg); | |||||
| } | } | ||||
| @@ -45,7 +45,7 @@ endif () | |||||
| if (DYNAMIC_ARCH) | if (DYNAMIC_ARCH) | ||||
| if (ARM64) | if (ARM64) | ||||
| set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110) | |||||
| set(DYNAMIC_CORE ARMV8 CORTEXA53 CORTEXA57 CORTEXA72 CORTEXA73 FALKOR THUNDERX THUNDERX2T99 TSV110 EMAG8180) | |||||
| endif () | endif () | ||||
| if (POWER) | if (POWER) | ||||
| @@ -332,6 +332,29 @@ if (DEFINED CORE AND CMAKE_CROSSCOMPILING AND NOT (${HOST_OS} STREQUAL "WINDOWSS | |||||
| set(ZGEMM_UNROLL_M 4) | set(ZGEMM_UNROLL_M 4) | ||||
| set(ZGEMM_UNROLL_N 4) | set(ZGEMM_UNROLL_N 4) | ||||
| set(SYMV_P 16) | set(SYMV_P 16) | ||||
| elseif ("${TCORE}" STREQUAL "EMAG8180") | |||||
| file(APPEND ${TARGET_CONF_TEMP} | |||||
| "#define ARMV8\n" | |||||
| "#define L1_CODE_SIZE\t32768\n" | |||||
| "#define L1_CODE_LINESIZE\t64\n" | |||||
| "#define L1_CODE_ASSOCIATIVE\t4\n" | |||||
| "#define L1_DATA_SIZE\t32768\n" | |||||
| "#define L1_DATA_LINESIZE\t64\n" | |||||
| "#define L1_DATA_ASSOCIATIVE\t4\n" | |||||
| "#define L2_SIZE\t5262144\n" | |||||
| "#define L2_LINESIZE\t64\n" | |||||
| "#define L2_ASSOCIATIVE\t8\n" | |||||
| "#define DTB_DEFAULT_ENTRIES\t64\n" | |||||
| "#define DTB_SIZE\t4096\n") | |||||
| set(SGEMM_UNROLL_M 16) | |||||
| set(SGEMM_UNROLL_N 4) | |||||
| set(DGEMM_UNROLL_M 8) | |||||
| set(DGEMM_UNROLL_N 4) | |||||
| set(CGEMM_UNROLL_M 8) | |||||
| set(CGEMM_UNROLL_N 4) | |||||
| set(ZGEMM_UNROLL_M 4) | |||||
| set(ZGEMM_UNROLL_N 4) | |||||
| set(SYMV_P 16) | |||||
| elseif ("${TCORE}" STREQUAL "POWER6") | elseif ("${TCORE}" STREQUAL "POWER6") | ||||
| file(APPEND ${TARGET_CONF_TEMP} | file(APPEND ${TARGET_CONF_TEMP} | ||||
| "#define L1_DATA_SIZE 32768\n" | "#define L1_DATA_SIZE 32768\n" | ||||
| @@ -41,6 +41,8 @@ | |||||
| #define CPU_THUNDERX2T99 8 | #define CPU_THUNDERX2T99 8 | ||||
| //Hisilicon | //Hisilicon | ||||
| #define CPU_TSV110 9 | #define CPU_TSV110 9 | ||||
| // Ampere | |||||
| #define CPU_EMAG8180 10 | |||||
| static char *cpuname[] = { | static char *cpuname[] = { | ||||
| "UNKNOWN", | "UNKNOWN", | ||||
| @@ -52,7 +54,8 @@ static char *cpuname[] = { | |||||
| "FALKOR", | "FALKOR", | ||||
| "THUNDERX", | "THUNDERX", | ||||
| "THUNDERX2T99", | "THUNDERX2T99", | ||||
| "TSV110" | |||||
| "TSV110", | |||||
| "EMAG8180" | |||||
| }; | }; | ||||
| static char *cpuname_lower[] = { | static char *cpuname_lower[] = { | ||||
| @@ -65,7 +68,8 @@ static char *cpuname_lower[] = { | |||||
| "falkor", | "falkor", | ||||
| "thunderx", | "thunderx", | ||||
| "thunderx2t99", | "thunderx2t99", | ||||
| "tsv110" | |||||
| "tsv110", | |||||
| "emag8180" | |||||
| }; | }; | ||||
| int get_feature(char *search) | int get_feature(char *search) | ||||
| @@ -152,6 +156,9 @@ int detect(void) | |||||
| // HiSilicon | // HiSilicon | ||||
| else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) | else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) | ||||
| return CPU_TSV110; | return CPU_TSV110; | ||||
| // Ampere | |||||
| else if (strstr(cpu_implementer, "0x50") && strstr(cpu_part, "0x000")) | |||||
| return CPU_EMAG8180; | |||||
| } | } | ||||
| p = (char *) NULL ; | p = (char *) NULL ; | ||||
| @@ -335,6 +342,18 @@ void get_cpuconfig(void) | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | printf("#define DTB_DEFAULT_ENTRIES 64 \n"); | ||||
| printf("#define DTB_SIZE 4096 \n"); | printf("#define DTB_SIZE 4096 \n"); | ||||
| break; | break; | ||||
| case CPU_EMAG8180: | |||||
| // Minimum parameters for ARMv8 (based on A53) | |||||
| printf("#define EMAG8180\n"); | |||||
| printf("#define L1_CODE_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_SIZE 32768\n"); | |||||
| printf("#define L1_DATA_LINESIZE 64\n"); | |||||
| printf("#define L2_SIZE 262144\n"); | |||||
| printf("#define L2_LINESIZE 64\n"); | |||||
| printf("#define DTB_DEFAULT_ENTRIES 64\n"); | |||||
| printf("#define DTB_SIZE 4096\n"); | |||||
| } | } | ||||
| get_cpucount(); | get_cpucount(); | ||||
| } | } | ||||
| @@ -51,10 +51,11 @@ extern gotoblas_t gotoblas_FALKOR; | |||||
| extern gotoblas_t gotoblas_THUNDERX; | extern gotoblas_t gotoblas_THUNDERX; | ||||
| extern gotoblas_t gotoblas_THUNDERX2T99; | extern gotoblas_t gotoblas_THUNDERX2T99; | ||||
| extern gotoblas_t gotoblas_TSV110; | extern gotoblas_t gotoblas_TSV110; | ||||
| extern gotoblas_t gotoblas_EMAG8180; | |||||
| extern void openblas_warning(int verbose, const char * msg); | extern void openblas_warning(int verbose, const char * msg); | ||||
| #define NUM_CORETYPES 9 | |||||
| #define NUM_CORETYPES 10 | |||||
| /* | /* | ||||
| * In case asm/hwcap.h is outdated on the build system, make sure | * In case asm/hwcap.h is outdated on the build system, make sure | ||||
| @@ -78,6 +79,7 @@ static char *corename[] = { | |||||
| "thunderx", | "thunderx", | ||||
| "thunderx2t99", | "thunderx2t99", | ||||
| "tsv110", | "tsv110", | ||||
| "emag8180", | |||||
| "unknown" | "unknown" | ||||
| }; | }; | ||||
| @@ -91,6 +93,7 @@ char *gotoblas_corename(void) { | |||||
| if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; | if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; | ||||
| if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; | if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; | ||||
| if (gotoblas == &gotoblas_TSV110) return corename[ 8]; | if (gotoblas == &gotoblas_TSV110) return corename[ 8]; | ||||
| if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; | |||||
| return corename[NUM_CORETYPES]; | return corename[NUM_CORETYPES]; | ||||
| } | } | ||||
| @@ -119,6 +122,7 @@ static gotoblas_t *force_coretype(char *coretype) { | |||||
| case 6: return (&gotoblas_THUNDERX); | case 6: return (&gotoblas_THUNDERX); | ||||
| case 7: return (&gotoblas_THUNDERX2T99); | case 7: return (&gotoblas_THUNDERX2T99); | ||||
| case 8: return (&gotoblas_TSV110); | case 8: return (&gotoblas_TSV110); | ||||
| case 9: return (&gotoblas_EMAG8180); | |||||
| } | } | ||||
| snprintf(message, 128, "Core not found: %s\n", coretype); | snprintf(message, 128, "Core not found: %s\n", coretype); | ||||
| openblas_warning(1, message); | openblas_warning(1, message); | ||||
| @@ -189,6 +193,13 @@ static gotoblas_t *get_coretype(void) { | |||||
| return &gotoblas_TSV110; | return &gotoblas_TSV110; | ||||
| } | } | ||||
| break; | break; | ||||
| case 0x50: // Ampere | |||||
| switch (part) | |||||
| { | |||||
| case 0x000: // Skylark/EMAG8180 | |||||
| return &gotoblas_EMAG8180; | |||||
| } | |||||
| break; | |||||
| case 0x51: // Qualcomm | case 0x51: // Qualcomm | ||||
| switch (part) | switch (part) | ||||
| { | { | ||||
| @@ -1093,6 +1093,22 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #else | #else | ||||
| #endif | #endif | ||||
| #ifdef FORCE_EMAG8180 | |||||
| #define ARMV8 | |||||
| #define FORCE | |||||
| #define ARCHITECTURE "ARM64" | |||||
| #define SUBARCHITECTURE "EMAG8180" | |||||
| #define SUBDIRNAME "arm64" | |||||
| #define ARCHCONFIG "-DEMAG8180 " \ | |||||
| "-DL1_CODE_SIZE=32768 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=8 " \ | |||||
| "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=8 " \ | |||||
| "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ | |||||
| "-DL3_SIZE=33554432 -DL3_LINESIZE=64 -DL3_ASSOCIATIVE=32 " \ | |||||
| "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ | |||||
| "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" | |||||
| #define LIBNAME "emag8180" | |||||
| #define CORENAME "EMAG8180" | |||||
| #endif | |||||
| #ifdef FORCE_ZARCH_GENERIC | #ifdef FORCE_ZARCH_GENERIC | ||||
| #define FORCE | #define FORCE | ||||
| @@ -0,0 +1,3 @@ | |||||
| include $(KERNELDIR)/KERNEL.CORTEXA57 | |||||
| @@ -167,3 +167,27 @@ endif | |||||
| CGEMM3MKERNEL = zgemm3m_kernel.S | CGEMM3MKERNEL = zgemm3m_kernel.S | ||||
| ZGEMM3MKERNEL = zgemm3m_kernel.S | ZGEMM3MKERNEL = zgemm3m_kernel.S | ||||
| ifndef ISMINKERNEL | |||||
| ISMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef IDMINKERNEL | |||||
| IDMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef IQMINKERNEL | |||||
| IQMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef ISMAXKERNEL | |||||
| ISMAXKERNEL = imax.S | |||||
| endif | |||||
| ifndef IDMAXKERNEL | |||||
| IDMAXKERNEL = imax.S | |||||
| endif | |||||
| ifndef IQMAXKERNEL | |||||
| IQMAXKERNEL = imax.S | |||||
| endif | |||||
| @@ -50,3 +50,26 @@ ifndef DSDOTKERNEL | |||||
| DSDOTKERNEL = ../generic/dot.c | DSDOTKERNEL = ../generic/dot.c | ||||
| endif | endif | ||||
| ifndef ISMINKERNEL | |||||
| ISMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef IDMINKERNEL | |||||
| IDMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef IQMINKERNEL | |||||
| IQMINKERNEL = imin.S | |||||
| endif | |||||
| ifndef ISMAXKERNEL | |||||
| ISMAXKERNEL = imax.S | |||||
| endif | |||||
| ifndef IDMAXKERNEL | |||||
| IDMAXKERNEL = imax.S | |||||
| endif | |||||
| ifndef IQMAXKERNEL | |||||
| IQMAXKERNEL = imax.S | |||||
| endif | |||||
| @@ -80,6 +80,8 @@ | |||||
| IF( V1( 1 ).GE.V2( 1 ) ) THEN | IF( V1( 1 ).GE.V2( 1 ) ) THEN | ||||
| IF( V1( 1 ).NE.ZERO ) THEN | IF( V1( 1 ).NE.ZERO ) THEN | ||||
| V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | ||||
| ELSE | |||||
| V1( 2 ) = V1( 2 ) + V2( 2 ) | |||||
| END IF | END IF | ||||
| ELSE | ELSE | ||||
| V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | ||||
| @@ -80,6 +80,8 @@ | |||||
| IF( V1( 1 ).GE.V2( 1 ) ) THEN | IF( V1( 1 ).GE.V2( 1 ) ) THEN | ||||
| IF( V1( 1 ).NE.ZERO ) THEN | IF( V1( 1 ).NE.ZERO ) THEN | ||||
| V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | V1( 2 ) = V1( 2 ) + ( V2( 1 ) / V1( 1 ) )**2 * V2( 2 ) | ||||
| ELSE | |||||
| V1( 2 ) = V1( 2 ) + V2( 2 ) | |||||
| END IF | END IF | ||||
| ELSE | ELSE | ||||
| V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | V1( 2 ) = V2( 2 ) + ( V1( 1 ) / V2( 1 ) )**2 * V1( 2 ) | ||||
| @@ -2603,7 +2603,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||||
| #if defined(CORTEXA53) || defined(CORTEXA57) || \ | #if defined(CORTEXA53) || defined(CORTEXA57) || \ | ||||
| defined(CORTEXA72) || defined(CORTEXA73) || \ | defined(CORTEXA72) || defined(CORTEXA73) || \ | ||||
| defined(FALKOR) || defined(TSV110) | |||||
| defined(FALKOR) || defined(TSV110) || defined(EMAG8180) | |||||
| #define SGEMM_DEFAULT_UNROLL_M 16 | #define SGEMM_DEFAULT_UNROLL_M 16 | ||||
| #define SGEMM_DEFAULT_UNROLL_N 4 | #define SGEMM_DEFAULT_UNROLL_N 4 | ||||