Browse Source

undo slow dgemm/skylake microoptimization

the compare is more costly than the work
tags/v0.3.4
Arjan van de Ven 7 years ago
parent
commit
6d43c51ccf
1 changed files with 3 additions and 5 deletions
  1. +3
    -5
      kernel/x86_64/dgemm_kernel_4x8_skylakex.c

+ 3
- 5
kernel/x86_64/dgemm_kernel_4x8_skylakex.c View File

@@ -647,11 +647,9 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


#define SAVE2x2(ALPHA) \
if (ALPHA != 1.0) { \
xmm0 = _mm_set1_pd(ALPHA); \
xmm4 *= xmm0; \
xmm6 *= xmm0; \
} \
xmm0 = _mm_set1_pd(ALPHA); \
xmm4 *= xmm0; \
xmm6 *= xmm0; \
\
xmm4 += _mm_loadu_pd(CO1); \
xmm6 += _mm_loadu_pd(CO1 + ldc); \


Loading…
Cancel
Save