- use on arm, arm64 and any new platform - use faster integer math instead of double - use similar scale as rdtsc so that timeouts worktags/v0.2.15^2
| @@ -410,7 +410,35 @@ typedef char env_var_t[MAX_PATH]; | |||||
| typedef char* env_var_t; | typedef char* env_var_t; | ||||
| #define readenv(p, n) ((p)=getenv(n)) | #define readenv(p, n) ((p)=getenv(n)) | ||||
| #endif | #endif | ||||
| #if !defined(RPCC_DEFINED) && !defined(OS_WINDOWS) | |||||
| #ifdef _POSIX_MONOTONIC_CLOCK | |||||
| #if defined(__GNUC_PREREQ) && __GLIBC_PREREQ(2, 17) // don't require -lrt | |||||
| #define USE_MONOTONIC | |||||
| #elif defined(OS_ANDROID) | |||||
| #define USE_MONOTONIC | |||||
| #endif | |||||
| #endif | |||||
| /* use similar scale as x86 rdtsc for timeouts to work correctly */ | |||||
| static inline unsigned long long rpcc(void){ | |||||
| #ifdef USE_MONOTONIC | |||||
| struct timespec ts; | |||||
| clock_gettime(CLOCK_MONOTONIC, &ts); | |||||
| return (unsigned long long)ts.tv_sec * 1000000000ull + ts.tv_nsec; | |||||
| #else | |||||
| struct timeval tv; | |||||
| gettimeofday(&tv,NULL); | |||||
| return (unsigned long long)tv.tv_sec * 1000000000ull + tv.tv_usec * 1000; | |||||
| #endif | |||||
| } | |||||
| #define RPCC_DEFINED | |||||
| #define RPCC64BIT | |||||
| #endif // !RPCC_DEFINED | |||||
| #ifndef RPCC_DEFINED | |||||
| #error "rpcc() implementation is missing for your platform" | |||||
| #endif | #endif | ||||
| #endif // !ASSEMBLER | |||||
| #ifdef OS_LINUX | #ifdef OS_LINUX | ||||
| #include "common_linux.h" | #include "common_linux.h" | ||||
| @@ -89,6 +89,7 @@ static __inline unsigned int rpcc(void){ | |||||
| return r0; | return r0; | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| #define HALT ldq $0, 0($0) | #define HALT ldq $0, 0($0) | ||||
| @@ -72,16 +72,6 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| } | } | ||||
| static inline unsigned long long rpcc(void){ | |||||
| unsigned long long ret=0; | |||||
| double v; | |||||
| struct timeval tv; | |||||
| gettimeofday(&tv,NULL); | |||||
| v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; | |||||
| ret = (unsigned long long) ( v * 1000.0d ); | |||||
| return ret; | |||||
| } | |||||
| static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
| return x / y; | return x / y; | ||||
| } | } | ||||
| @@ -71,16 +71,6 @@ static void __inline blas_lock(volatile BLASULONG *address){ | |||||
| } | } | ||||
| static inline unsigned long long rpcc(void){ | |||||
| unsigned long long ret=0; | |||||
| double v; | |||||
| struct timeval tv; | |||||
| gettimeofday(&tv,NULL); | |||||
| v=(double) tv.tv_sec + (double) tv.tv_usec * 1e-6; | |||||
| ret = (unsigned long long) ( v * 1000.0d ); | |||||
| return ret; | |||||
| } | |||||
| static inline int blas_quickdivide(blasint x, blasint y){ | static inline int blas_quickdivide(blasint x, blasint y){ | ||||
| return x / y; | return x / y; | ||||
| } | } | ||||
| @@ -75,6 +75,7 @@ static __inline unsigned long rpcc(void) { | |||||
| __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks)); | __asm__ __volatile__ ("mov %0=ar.itc" : "=r"(clocks)); | ||||
| return clocks; | return clocks; | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| static __inline unsigned long stmxcsr(void){ | static __inline unsigned long stmxcsr(void){ | ||||
| @@ -103,6 +104,7 @@ static __inline void blas_lock(volatile unsigned long *address){ | |||||
| static __inline unsigned int rpcc(void) { | static __inline unsigned int rpcc(void) { | ||||
| return __getReg(_IA64_REG_AR_ITC); | return __getReg(_IA64_REG_AR_ITC); | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| static __inline unsigned int stmxcsr(void) { | static __inline unsigned int stmxcsr(void) { | ||||
| return __getReg(_IA64_REG_AR_FPSR); | return __getReg(_IA64_REG_AR_FPSR); | ||||
| @@ -118,6 +118,7 @@ static inline unsigned int rpcc(void){ | |||||
| #endif | #endif | ||||
| return ret; | return ret; | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| #if defined(LOONGSON3A) || defined(LOONGSON3B) | #if defined(LOONGSON3A) || defined(LOONGSON3B) | ||||
| #ifndef NO_AFFINITY | #ifndef NO_AFFINITY | ||||
| @@ -103,6 +103,7 @@ static inline unsigned long rpcc(void){ | |||||
| #endif | #endif | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define RPCC64BIT | #define RPCC64BIT | ||||
| @@ -66,6 +66,7 @@ static __inline unsigned long rpcc(void){ | |||||
| return clocks; | return clocks; | ||||
| }; | }; | ||||
| #define RPCC_DEFINED | |||||
| #ifdef __64BIT__ | #ifdef __64BIT__ | ||||
| #define RPCC64BIT | #define RPCC64BIT | ||||
| @@ -73,6 +73,7 @@ static __inline unsigned long long rpcc(void){ | |||||
| return ((unsigned long long)a + ((unsigned long long)d << 32)); | return ((unsigned long long)a + ((unsigned long long)d << 32)); | ||||
| }; | }; | ||||
| #define RPCC_DEFINED | |||||
| static __inline unsigned long getstackaddr(void){ | static __inline unsigned long getstackaddr(void){ | ||||
| unsigned long addr; | unsigned long addr; | ||||
| @@ -82,6 +82,7 @@ static __inline BLASULONG rpcc(void){ | |||||
| return ((BLASULONG)a + ((BLASULONG)d << 32)); | return ((BLASULONG)a + ((BLASULONG)d << 32)); | ||||
| } | } | ||||
| #define RPCC_DEFINED | |||||
| #define RPCC64BIT | #define RPCC64BIT | ||||