|
|
@@ -18,20 +18,26 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { |
|
|
|
|
|
|
|
#ifdef DOUBLE |
|
|
|
long double safmin = DBL_MIN; |
|
|
|
long double rtmin = sqrt(DBL_MIN/DBL_EPSILON); |
|
|
|
#else |
|
|
|
long double safmin = FLT_MIN; |
|
|
|
long double rtmin = sqrt(FLT_MIN/FLT_EPSILON); |
|
|
|
#endif |
|
|
|
|
|
|
|
#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__) || defined(_M_X64) || defined(_M_IX86) |
|
|
|
|
|
|
|
long double da_r = *(DA + 0); |
|
|
|
long double da_i = *(DA + 1); |
|
|
|
long double db_r = *(DB + 0); |
|
|
|
long double db_i = *(DB + 1); |
|
|
|
long double r; |
|
|
|
FLOAT da_r = *(DA+0); |
|
|
|
FLOAT da_i = *(DA+1); |
|
|
|
FLOAT db_r = *(DB+0); |
|
|
|
FLOAT db_i = *(DB+1); |
|
|
|
//long double r; |
|
|
|
FLOAT *r, *S1=(FLOAT *)malloc(2*sizeof(FLOAT)); |
|
|
|
FLOAT *R=(FLOAT *)malloc(2*sizeof(FLOAT)); |
|
|
|
long double d; |
|
|
|
|
|
|
|
long double ada = fabsl(da_r) + fabsl(da_i); |
|
|
|
long double adb = sqrt(db_r * db_r + db_i * db_i); |
|
|
|
FLOAT ada = da_r * da_r + da_i * da_i; |
|
|
|
FLOAT adb = db_r * db_r + db_i * db_i; |
|
|
|
FLOAT adart = sqrt( da_r * da_r + da_i * da_i); |
|
|
|
FLOAT adbrt = sqrt( db_r * db_r + db_i * db_i); |
|
|
|
|
|
|
|
PRINT_DEBUG_NAME; |
|
|
|
|
|
|
@@ -39,128 +45,137 @@ void CNAME(void *VDA, void *VDB, FLOAT *C, void *VS) { |
|
|
|
|
|
|
|
FUNCTION_PROFILE_START(); |
|
|
|
|
|
|
|
if (ada == ZERO) { |
|
|
|
*C = ZERO; |
|
|
|
*(S + 0) = ONE; |
|
|
|
if (db_r == ZERO && db_i == ZERO) { |
|
|
|
*C = ONE; |
|
|
|
*(S + 0) = ZERO; |
|
|
|
*(S + 1) = ZERO; |
|
|
|
*(DA + 0) = db_r; |
|
|
|
*(DA + 1) = db_i; |
|
|
|
} else { |
|
|
|
long double alpha_r, alpha_i; |
|
|
|
long double safmax = 1./safmin; |
|
|
|
long double sigma; |
|
|
|
long double maxab = MAX(ada,adb); |
|
|
|
long double scale = MIN(MAX(safmin,maxab), safmax); |
|
|
|
|
|
|
|
|
|
|
|
long double aa_r = da_r / scale; |
|
|
|
long double aa_i = da_i / scale; |
|
|
|
long double bb_r = db_r / scale; |
|
|
|
long double bb_i = db_i / scale; |
|
|
|
|
|
|
|
if (ada > adb) |
|
|
|
sigma = copysign(1.,da_r); |
|
|
|
else |
|
|
|
sigma = copysign(1.,db_r); |
|
|
|
|
|
|
|
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); |
|
|
|
|
|
|
|
|
|
|
|
alpha_r = da_r / ada; |
|
|
|
alpha_i = da_i / ada; |
|
|
|
|
|
|
|
*(C + 0) = ada / r; |
|
|
|
*(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; |
|
|
|
*(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; |
|
|
|
*(DA + 0) = alpha_r * r; |
|
|
|
*(DA + 1) = alpha_i * r; |
|
|
|
return; |
|
|
|
} |
|
|
|
#else |
|
|
|
FLOAT da_r = *(DA + 0); |
|
|
|
FLOAT da_i = *(DA + 1); |
|
|
|
FLOAT db_r = *(DB + 0); |
|
|
|
FLOAT db_i = *(DB + 1); |
|
|
|
FLOAT r; |
|
|
|
|
|
|
|
FLOAT ada = fabs(da_r) + fabs(da_i); |
|
|
|
FLOAT adb = fabs(db_r) + fabs(db_i); |
|
|
|
|
|
|
|
PRINT_DEBUG_NAME; |
|
|
|
|
|
|
|
IDEBUG_START; |
|
|
|
|
|
|
|
FUNCTION_PROFILE_START(); |
|
|
|
|
|
|
|
if (ada == ZERO) { |
|
|
|
*C = ZERO; |
|
|
|
*(S + 0) = ONE; |
|
|
|
*(S + 1) = ZERO; |
|
|
|
*(DA + 0) = db_r; |
|
|
|
*(DA + 1) = db_i; |
|
|
|
} else { |
|
|
|
long double safmax = 1./safmin; |
|
|
|
FLOAT scale, sigma; |
|
|
|
FLOAT aa_r, aa_i, bb_r, bb_i; |
|
|
|
FLOAT alpha_r, alpha_i; |
|
|
|
|
|
|
|
aa_r = fabs(da_r); |
|
|
|
aa_i = fabs(da_i); |
|
|
|
|
|
|
|
if (aa_i > aa_r) { |
|
|
|
aa_r = fabs(da_i); |
|
|
|
aa_i = fabs(da_r); |
|
|
|
} |
|
|
|
|
|
|
|
if (aa_r == ZERO) { |
|
|
|
ada = 0.; |
|
|
|
} else { |
|
|
|
scale = (aa_i / aa_r); |
|
|
|
ada = aa_r * sqrt(ONE + scale * scale); |
|
|
|
} |
|
|
|
|
|
|
|
bb_r = fabs(db_r); |
|
|
|
bb_i = fabs(db_i); |
|
|
|
|
|
|
|
if (bb_i > bb_r) { |
|
|
|
bb_r = fabs(bb_i); |
|
|
|
bb_i = fabs(bb_r); |
|
|
|
} |
|
|
|
|
|
|
|
if (bb_r == ZERO) { |
|
|
|
adb = 0.; |
|
|
|
} else { |
|
|
|
scale = (bb_i / bb_r); |
|
|
|
adb = bb_r * sqrt(ONE + scale * scale); |
|
|
|
} |
|
|
|
FLOAT maxab = MAX(ada,adb); |
|
|
|
scale = MIN(MAX(safmin,maxab), safmax); |
|
|
|
|
|
|
|
aa_r = da_r / scale; |
|
|
|
aa_i = da_i / scale; |
|
|
|
bb_r = db_r / scale; |
|
|
|
bb_i = db_i / scale; |
|
|
|
|
|
|
|
if (ada > adb) |
|
|
|
sigma = copysign(1.,da_r); |
|
|
|
else |
|
|
|
sigma = copysign(1.,db_r); |
|
|
|
|
|
|
|
r = sigma * scale * sqrt(aa_r * aa_r + aa_i * aa_i + bb_r * bb_r + bb_i * bb_i); |
|
|
|
|
|
|
|
alpha_r = da_r / ada; |
|
|
|
alpha_i = da_i / ada; |
|
|
|
|
|
|
|
*(C + 0) = ada / r; |
|
|
|
*(S + 0) = (alpha_r * db_r + alpha_i *db_i) / r; |
|
|
|
*(S + 1) = (alpha_i * db_r - alpha_r *db_i) / r; |
|
|
|
*(DA + 0) = alpha_r * r; |
|
|
|
*(DA + 1) = alpha_i * r; |
|
|
|
} |
|
|
|
long double safmax = 1./safmin; |
|
|
|
#if defined DOUBLE |
|
|
|
long double rtmax = safmax /DBL_EPSILON; |
|
|
|
#else |
|
|
|
long double rtmax = safmax /FLT_EPSILON; |
|
|
|
#endif |
|
|
|
|
|
|
|
FUNCTION_PROFILE_END(4, 4, 4); |
|
|
|
|
|
|
|
IDEBUG_END; |
|
|
|
|
|
|
|
return; |
|
|
|
*(S1 + 0) = *(DB + 0); |
|
|
|
*(S1 + 1) = *(DB + 1) *-1; |
|
|
|
if (da_r == ZERO && da_i == ZERO) { |
|
|
|
*C = ZERO; |
|
|
|
if (db_r == ZERO) { |
|
|
|
(*DA) = fabsl(db_i); |
|
|
|
*S = *S1 /da_r; |
|
|
|
*(S+1) = *(S1+1) /da_r; |
|
|
|
return; |
|
|
|
} else if ( db_i == ZERO) { |
|
|
|
*DA = fabsl(db_r); |
|
|
|
*S = *S1 /da_r; |
|
|
|
*(S+1) = *(S1+1) /da_r; |
|
|
|
return; |
|
|
|
} else { |
|
|
|
long double g1 = MAX( fabsl(db_r), fabsl(db_i)); |
|
|
|
rtmax =sqrt(safmax/2.); |
|
|
|
if (g1 > rtmin && g1 < rtmax) { // unscaled |
|
|
|
d = sqrt(adb); |
|
|
|
*S = *S1 /d; |
|
|
|
*(S+1) = *(S1+1) /d; |
|
|
|
*DA = d ; |
|
|
|
*(DA+1) = ZERO; |
|
|
|
return; |
|
|
|
} else { // scaled algorithm |
|
|
|
long double u = MIN ( safmax, MAX ( safmin, g1)); |
|
|
|
FLOAT gs_r = db_r/u; |
|
|
|
FLOAT gs_i = db_i/u; |
|
|
|
d = sqrt ( gs_r*gs_r + gs_i*gs_i); |
|
|
|
*S = gs_r / d; |
|
|
|
*(S + 1) = (gs_i * -1) / d; |
|
|
|
*DA = d * u; |
|
|
|
*(DA+1) = ZERO; |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
} else { |
|
|
|
FLOAT f1 = MAX ( fabsl(da_r), fabsl(da_i)); |
|
|
|
FLOAT g1 = MAX ( fabsl(db_r), fabsl(db_i)); |
|
|
|
rtmax = sqrt(safmax / 4.); |
|
|
|
if ( f1 > rtmin && f1 < rtmax && g1 > rtmin && g1 < rtmax) { //unscaled |
|
|
|
long double h = ada + adb; |
|
|
|
double adahsq = sqrt(ada * h); |
|
|
|
if (ada >= h *safmin) { |
|
|
|
*C = sqrt(ada/h); |
|
|
|
*R = *DA / *C; |
|
|
|
*(R+1) = *(DA+1) / *(C+1); |
|
|
|
rtmax *= 2.; |
|
|
|
if ( ada > rtmin && h < rtmax) { // no risk of intermediate overflow |
|
|
|
*S = *S1 * (*DA / adahsq) - *(S1+1)* (*(DA+1)/adahsq); |
|
|
|
*(S+1) = *S1 * (*(DA+1) / adahsq) + *(S1+1) * (*DA/adahsq); |
|
|
|
} else { |
|
|
|
*S = *S1 * (*R/h) - *(S1+1) * (*(R+1)/h); |
|
|
|
*(S+1) = *S1 * (*(R+1)/h) + *(S1+1) * (*(R)/h); |
|
|
|
} |
|
|
|
} else { |
|
|
|
*C = ada / adahsq; |
|
|
|
if (*C >= safmin) |
|
|
|
*R = *DA / *C; |
|
|
|
else |
|
|
|
*R = *DA * (h / adahsq); |
|
|
|
*S = *S1 * ada / adahsq; |
|
|
|
*(S+1) = *(S1+1) * ada / adahsq; |
|
|
|
} |
|
|
|
*DA=*R; |
|
|
|
*(DA+1)=*(R+1); |
|
|
|
return; |
|
|
|
} else { // scaled |
|
|
|
FLOAT fs_r, fs_i, gs_r, gs_i; |
|
|
|
long double v,w,f2,g2,h; |
|
|
|
long double u = MIN ( safmax, MAX ( safmin, MAX(f1,g1))); |
|
|
|
gs_r = db_r/u; |
|
|
|
gs_i = db_i/u; |
|
|
|
g2 = sqrt ( gs_r*gs_r + gs_i*gs_i); |
|
|
|
if (f1 /u < rtmin) { |
|
|
|
v = MIN (safmax, MAX (safmin, f1)); |
|
|
|
w = v / u; |
|
|
|
fs_r = *DA/ v; |
|
|
|
fs_i = *(DA+1) / v; |
|
|
|
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); |
|
|
|
h = f2 * w * w + g2; |
|
|
|
} else { // use same scaling for both |
|
|
|
w = 1.; |
|
|
|
fs_r = *DA/ u; |
|
|
|
fs_i = *(DA+1) / u; |
|
|
|
f2 = sqrt ( fs_r*fs_r + fs_i*fs_i); |
|
|
|
h = f2 + g2; |
|
|
|
} |
|
|
|
if ( f2 >= h * safmin) { |
|
|
|
*C = sqrt ( f2 / h ); |
|
|
|
*DA = fs_r / *C; |
|
|
|
*(DA+1) = fs_i / *C; |
|
|
|
rtmax *= 2; |
|
|
|
if ( f2 > rtmin && h < rtmax) { |
|
|
|
*S = gs_r * (fs_r /sqrt(f2*h)) - gs_i * (fs_i / sqrt(f2*h)); |
|
|
|
*(S+1) = gs_r * (fs_i /sqrt(f2*h)) + gs_i * -1. * (fs_r / sqrt(f2*h)); |
|
|
|
} else { |
|
|
|
*S = gs_r * (*DA/h) - gs_i * (*(DA+1) / h); |
|
|
|
*(S+1) = gs_r * (*(DA+1) /h) + gs_i * -1. * (*DA / h); |
|
|
|
} |
|
|
|
} else { // intermediates might overflow |
|
|
|
d = sqrt ( f2 * h); |
|
|
|
*C = f2 /d; |
|
|
|
if (*C >= safmin) { |
|
|
|
*DA = fs_r / *C; |
|
|
|
*(DA+1) = fs_i / *C; |
|
|
|
} else { |
|
|
|
*DA = fs_r * (h / d); |
|
|
|
*(DA+1) = fs_i / (h / d); |
|
|
|
} |
|
|
|
*S = gs_r * (fs_r /d) - gs_i * (fs_i / d); |
|
|
|
*(S+1) = gs_r * (fs_i /d) + gs_i * -1. * (fs_r / d); |
|
|
|
} |
|
|
|
*C *= w; |
|
|
|
*DA *= u; |
|
|
|
*(DA+1) *= u; |
|
|
|
return; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
|