|
|
@@ -284,8 +284,12 @@ static void exec_threads(blas_queue_t *queue, int buf_index){ |
|
|
|
sb = queue -> sb; |
|
|
|
|
|
|
|
#ifdef CONSISTENT_FPCSR |
|
|
|
#ifdef __aarch64__ |
|
|
|
__asm__ __volatile__ ("msr fpcr, %0" : : "r" (queue -> sse_mode)); |
|
|
|
#else |
|
|
|
__asm__ __volatile__ ("ldmxcsr %0" : : "m" (queue -> sse_mode)); |
|
|
|
__asm__ __volatile__ ("fldcw %0" : : "m" (queue -> x87_mode)); |
|
|
|
#endif |
|
|
|
#endif |
|
|
|
|
|
|
|
if ((sa == NULL) && (sb == NULL) && ((queue -> mode & BLAS_PTHREAD) == 0)) { |
|
|
@@ -383,8 +387,12 @@ int exec_blas(BLASLONG num, blas_queue_t *queue){ |
|
|
|
|
|
|
|
#ifdef CONSISTENT_FPCSR |
|
|
|
for (i = 0; i < num; i ++) { |
|
|
|
#ifdef __aarch64__ |
|
|
|
__asm__ __volatile__ ("mrs %0, fpcr" : "=r" (queue[i].sse_mode)); |
|
|
|
#else |
|
|
|
__asm__ __volatile__ ("fnstcw %0" : "=m" (queue[i].x87_mode)); |
|
|
|
__asm__ __volatile__ ("stmxcsr %0" : "=m" (queue[i].sse_mode)); |
|
|
|
#endif |
|
|
|
} |
|
|
|
#endif |
|
|
|
|
|
|
|