Browse Source

Add missing barriers in gemm scheduler

a few places in the gemm scheduler code were missing barriers;
the code likely worked OK due to heavy use of volatile / _Atomic
but there's no reason to get this incorrect
tags/v0.3.1
Arjan van de Ven 7 years ago
parent
commit
73de17664d
1 changed files with 4 additions and 3 deletions
  1. +4
    -3
      driver/level3/level3_thread.c

+ 4
- 3
driver/level3/level3_thread.c View File

@@ -347,7 +347,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Make sure if no one is using workspace */ /* Make sure if no one is using workspace */
START_RPCC(); START_RPCC();
for (i = 0; i < args -> nthreads; i++) for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB;};
STOP_RPCC(waiting1); STOP_RPCC(waiting1);


#if defined(FUSED_GEMM) && !defined(TIMING) #if defined(FUSED_GEMM) && !defined(TIMING)
@@ -409,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,


/* Wait until other region of B is initialized */ /* Wait until other region of B is initialized */
START_RPCC(); START_RPCC();
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB;};
STOP_RPCC(waiting2); STOP_RPCC(waiting2);


/* Apply kernel with local region of A and part of other region of B */ /* Apply kernel with local region of A and part of other region of B */
@@ -427,6 +427,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Clear synchronization flag if this thread is done with other region of B */ /* Clear synchronization flag if this thread is done with other region of B */
if (m_to - m_from == min_i) { if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0; job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
} }
} }
} while (current != mypos); } while (current != mypos);
@@ -488,7 +489,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
START_RPCC(); START_RPCC();
for (i = 0; i < args -> nthreads; i++) { for (i = 0; i < args -> nthreads; i++) {
for (js = 0; js < DIVIDE_RATE; js++) { for (js = 0; js < DIVIDE_RATE; js++) {
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB;};
} }
} }
STOP_RPCC(waiting3); STOP_RPCC(waiting3);


Loading…
Cancel
Save