@@ -347,7 +347,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Make sure if no one is using workspace */
/* Make sure if no one is using workspace */
START_RPCC();
START_RPCC();
for (i = 0; i < args -> nthreads; i++)
for (i = 0; i < args -> nthreads; i++)
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * bufferside]) {YIELDING;MB; };
STOP_RPCC(waiting1);
STOP_RPCC(waiting1);
#if defined(FUSED_GEMM) && !defined(TIMING)
#if defined(FUSED_GEMM) && !defined(TIMING)
@@ -409,7 +409,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Wait until other region of B is initialized */
/* Wait until other region of B is initialized */
START_RPCC();
START_RPCC();
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;};
while(job[current].working[mypos][CACHE_LINE_SIZE * bufferside] == 0) {YIELDING;MB; };
STOP_RPCC(waiting2);
STOP_RPCC(waiting2);
/* Apply kernel with local region of A and part of other region of B */
/* Apply kernel with local region of A and part of other region of B */
@@ -427,6 +427,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
/* Clear synchronization flag if this thread is done with other region of B */
/* Clear synchronization flag if this thread is done with other region of B */
if (m_to - m_from == min_i) {
if (m_to - m_from == min_i) {
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
job[current].working[mypos][CACHE_LINE_SIZE * bufferside] &= 0;
WMB;
}
}
}
}
} while (current != mypos);
} while (current != mypos);
@@ -488,7 +489,7 @@ static int inner_thread(blas_arg_t *args, BLASLONG *range_m, BLASLONG *range_n,
START_RPCC();
START_RPCC();
for (i = 0; i < args -> nthreads; i++) {
for (i = 0; i < args -> nthreads; i++) {
for (js = 0; js < DIVIDE_RATE; js++) {
for (js = 0; js < DIVIDE_RATE; js++) {
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;};
while (job[mypos].working[i][CACHE_LINE_SIZE * js] ) {YIELDING;MB; };
}
}
}
}
STOP_RPCC(waiting3);
STOP_RPCC(waiting3);