Browse Source

Refs #189. Fixed the bug of s/cdot about invalid reading NAN on x86_64.

tags/v0.2.6
Zhang Xianyi 12 years ago
parent
commit
d311236dfd
2 changed files with 8 additions and 7 deletions
  1. +4
    -3
      kernel/x86_64/dot_sse.S
  2. +4
    -4
      kernel/x86_64/zdot_sse.S

+ 4
- 3
kernel/x86_64/dot_sse.S View File

@@ -530,7 +530,7 @@
#endif
movsd -32 * SIZE(Y), %xmm8

pshufd $0x39, %xmm4, %xmm5
pshufd $0x29, %xmm4, %xmm5

mulps %xmm8, %xmm5
addps %xmm5, %xmm3
@@ -750,7 +750,8 @@
xorps %xmm5, %xmm5
movhlps %xmm4, %xmm5

mulps -32 * SIZE(Y), %xmm5
movlps -32 * SIZE(Y), %xmm4
mulps %xmm4, %xmm5
addps %xmm5, %xmm0

addq $2 * SIZE, X
@@ -992,7 +993,7 @@
movsd -32 * SIZE(Y), %xmm8

movss %xmm5, %xmm4
shufps $0x93, %xmm5, %xmm4
shufps $0x93, %xmm4, %xmm4

mulps %xmm8, %xmm4
addps %xmm4, %xmm3


+ 4
- 4
kernel/x86_64/zdot_sse.S View File

@@ -699,7 +699,7 @@
movsd -32 * SIZE(X), %xmm4

pshufd $0xb1, %xmm4, %xmm12
shufps $0x39, %xmm8, %xmm8
shufps $0x59, %xmm8, %xmm8
mulps %xmm8, %xmm4
addps %xmm4, %xmm0
mulps %xmm8, %xmm12
@@ -1336,7 +1336,7 @@

movss %xmm9, %xmm8
pshufd $0xb1, %xmm4, %xmm12
shufps $0x93, %xmm8, %xmm8
shufps $0x03, %xmm8, %xmm8
mulps %xmm8, %xmm4
addps %xmm4, %xmm0
mulps %xmm8, %xmm12
@@ -1697,7 +1697,7 @@
movsd -32 * SIZE(Y), %xmm4

pshufd $0xb1, %xmm4, %xmm12
shufps $0x39, %xmm8, %xmm8
shufps $0xa9, %xmm8, %xmm8
mulps %xmm8, %xmm4
addps %xmm4, %xmm0
mulps %xmm8, %xmm12
@@ -2024,7 +2024,7 @@

movss %xmm9, %xmm8
pshufd $0xb1, %xmm4, %xmm12
shufps $0x93, %xmm8, %xmm8
shufps $0x03, %xmm8, %xmm8
mulps %xmm8, %xmm4
addps %xmm4, %xmm0
mulps %xmm8, %xmm12


Loading…
Cancel
Save