This website works better with JavaScript.
Home
Issues
Pull Requests
Milestones
AI流水线
Repositories
Datasets
Forum
实训
竞赛
大数据
Register
Sign In
OSchip
/
OpenBLAS
Not watched
Unwatch
Watch all
Watch but not notify
1
Star
0
Fork
0
Code
Releases
66
Wiki
evaluate
Activity
Issues
0
Pull Requests
0
Datasets
Model
Cloudbrain
HPC
Browse Source
make NAN handling depend on dummy2 parameter
tags/v0.3.28^2
Martin Kroeker
GitHub
1 year ago
parent
dd6c33d34d
commit
c2ffd90e8c
No known key found for this signature in database
GPG Key ID:
B5690EEEBB952194
2 changed files
with
11 additions
and
2 deletions
Split View
Diff Options
Show Stats
Download Patch File
Download Diff File
+5
-1
kernel/x86_64/scal_sse.S
+6
-1
kernel/x86_64/scal_sse2.S
+ 5
- 1
kernel/x86_64/scal_sse.S
View File
@@ -60,8 +60,10 @@
#ifdef WINDOWS_ABI
movq 40(%rsp), X
movq 48(%rsp), INCX
movq 64(%rsp), %r9
movaps %xmm3, %xmm0
#else
movq 24(%rsp), %r9
#endif
SAVEREGISTERS
@@ -76,6 +78,8 @@
shufps $0, %xmm0, %xmm0
jne .L100 # Alpha != ZERO
cmpq $1, %r9
je .L100
/* Alpha == ZERO */
cmpq $SIZE, INCX
+ 6
- 1
kernel/x86_64/scal_sse2.S
View File
@@ -48,6 +48,7 @@
#define X ARG2
#define INCX ARG3
#endif
#define FLAG %r9
#define XX %r10
#define I %rax
@@ -60,8 +61,10 @@
#ifdef WINDOWS_ABI
movq 40(%rsp), X
movq 48(%rsp), INCX
movq 64(%rsp), FLAG
movaps %xmm3, %xmm0
#else
movq 24(%rsp), FLAG
#endif
SAVEREGISTERS
@@ -75,6 +78,8 @@
comisd %xmm0, %xmm1
jne .L100 # Alpha != ZERO
jp .L100 # For Alpha = NaN
cmpq $1, FLAG
je .L100 # disable the Alpha=zero path as it does not handle x=inf or nan
/* Alpha == ZERO */
cmpq $SIZE, INCX
Write
Preview
Loading…
Cancel
Save