Browse Source

Benchmarks: align malloc'ed buffers.

Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable timings. This technique, storing the offset in the
byte before the pointer, doesn't require C11's aligned_alloc for
compatibility with older compilers.

For example, Glibc's x86_64 malloc returns 16-byte aligned buffers, which is
not sufficient for AVX/AVX2 (32-byte preferred) or AVX512 (64-byte).
tags/v0.3.22^2
Bart Oldeman 2 years ago
parent
commit
9959a60873
1 changed files with 18 additions and 0 deletions
  1. +18
    -0
      benchmark/bench.h

+ 18
- 0
benchmark/bench.h View File

@@ -74,6 +74,24 @@ static void *huge_malloc(BLASLONG size){

#endif

/* Benchmarks should allocate with cacheline (often 64 bytes) alignment
to avoid unreliable results. This technique, storing the offset in the
byte before the pointer, doesn't require C11's aligned_alloc for
compatibility with older compilers. */
static void *aligned_alloc_cacheline(size_t n)
{
void *p = malloc((size_t)(void *) + n + L1_DATA_LINESIZE - 1);
if (p) {
void **newp = (void **)
(((uintptr_t)p + L1_DATA_LINESIZE) & (uintptr_t)-L1_DATA_LINESIZE);
newp[-1] = p;
p = newp;
}
return p;
}
#define malloc aligned_alloc_cacheline
#define free(p) free((p) ? ((void **)(p))[-1] : (p))

#if defined(__WIN32__) || defined(__WIN64__) || !defined(_POSIX_TIMERS)
struct timeval start, stop;
#elif defined(__APPLE__)


Loading…
Cancel
Save