|
|
@@ -409,12 +409,20 @@ KERNEL_S1_END_\@: |
|
|
|
|
|
|
|
#if defined(DOUBLE) |
|
|
|
|
|
|
|
znrm2_zero: |
|
|
|
.word 0x00000000 |
|
|
|
.word 0x00000000 |
|
|
|
|
|
|
|
|
|
|
|
znrm2_one: |
|
|
|
.word 0x00000000 |
|
|
|
.word 0x3ff00000 |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
cnrm2_zero: |
|
|
|
.word 0x00000000 |
|
|
|
|
|
|
|
cnrm2_one: |
|
|
|
.word 0x3f800000 |
|
|
|
|
|
|
@@ -424,12 +432,20 @@ cnrm2_one: |
|
|
|
|
|
|
|
#if defined(DOUBLE) |
|
|
|
|
|
|
|
dnrm2_zero: |
|
|
|
.word 0x00000000 |
|
|
|
.word 0x00000000 |
|
|
|
|
|
|
|
|
|
|
|
dnrm2_one: |
|
|
|
.word 0x00000000 |
|
|
|
.word 0x3ff00000 |
|
|
|
|
|
|
|
#else |
|
|
|
|
|
|
|
snrm2_zero: |
|
|
|
.word 0x00000000 |
|
|
|
|
|
|
|
snrm2_one: |
|
|
|
.word 0x3f800000 |
|
|
|
|
|
|
@@ -446,12 +462,12 @@ nrm2_begin: |
|
|
|
#if defined(COMPLEX) |
|
|
|
|
|
|
|
#if defined(DOUBLE) |
|
|
|
vsub.f64 d0 , d0 , d0 // scale=0.0 |
|
|
|
vldr.64 d0 , znrm2_zero |
|
|
|
vldr.64 d1 , znrm2_one // ssq=1.0 |
|
|
|
vmov.f64 d7 , d1 // value 1.0 |
|
|
|
vmov.f64 d6 , d0 // value 0.0 |
|
|
|
#else |
|
|
|
vsub.f32 s0 , s0 , s0 // scale=0.0 |
|
|
|
vldr.32 s0 , cnrm2_zero |
|
|
|
vldr.32 s1 , cnrm2_one // ssq=1.0 |
|
|
|
vmov.f32 s7 , s1 // value 1.0 |
|
|
|
vmov.f32 s6 , s0 // value 0.0 |
|
|
@@ -460,12 +476,12 @@ nrm2_begin: |
|
|
|
#else |
|
|
|
|
|
|
|
#if defined(DOUBLE) |
|
|
|
vsub.f64 d0 , d0 , d0 // scale=0.0 |
|
|
|
vldr.64 d0 , dnrm2_zero |
|
|
|
vldr.64 d1 , dnrm2_one // ssq=1.0 |
|
|
|
vmov.f64 d7 , d1 // value 1.0 |
|
|
|
vmov.f64 d6 , d0 // value 0.0 |
|
|
|
#else |
|
|
|
vsub.f32 s0 , s0 , s0 // scale=0.0 |
|
|
|
vldr.32 s0 , snrm2_zero |
|
|
|
vldr.32 s1 , snrm2_one // ssq=1.0 |
|
|
|
vmov.f32 s7 , s1 // value 1.0 |
|
|
|
vmov.f32 s6 , s0 // value 0.0 |
|
|
|