/* * vim: syntax=c * * Implement some C99-compatible complex math functions * * Most of the code is taken from the msun library in FreeBSD (HEAD @ 4th * October 2013), under the following license: * * Copyright (c) 2007, 2011 David Schultz * Copyright (c) 2012 Stephen Montgomery-Smith * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "npy_math_common.h" #include "npy_math_private.h" #include /* * Hack inherited from BSD, the intent is to set the FPU inexact * flag in an efficient way. The flag is IEEE specific. See * https://github.com/freebsd/freebsd/blob/4c6378299/lib/msun/src/catrig.c#L42 */ #if !defined(HAVE_CACOSF) || !defined(HAVE_CACOSL) || !defined(HAVE_CASINHF) || !defined(HAVE_CASINHL) #define raise_inexact() do { \ volatile npy_float NPY_UNUSED(junk) = 1 + tiny; \ } while (0) static const volatile npy_float tiny = 3.9443045e-31f; #endif /**begin repeat * #type = npy_float, npy_double, npy_longdouble# * #ctype = npy_cfloat,npy_cdouble,npy_clongdouble# * #c = f, , l# * #C = F, , L# * #NAME = CFLOAT, CDOUBLE, CLONGDOUBLE# * #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX# * #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN# * #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG# * #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON# * #precision = 1, 2, 3# */ /*========================================================== * Constants *=========================================================*/ #if defined(_MSC_VER) && !defined(__INTEL_COMPILER) static const @ctype@ c_1@c@ = {1.0@C@, 0.0@C@}; #else static const @ctype@ c_1@c@ = 1.0@C@; #endif /*========================================================== * Helper functions * * These are necessary because we do not count on using a * C99 compiler. *=========================================================*/ static inline @ctype@ cmul@c@(@ctype@ a, @ctype@ b) { @type@ ar, ai, br, bi; ar = npy_creal@c@(a); ai = npy_cimag@c@(a); br = npy_creal@c@(b); bi = npy_cimag@c@(b); return npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br); } static inline @ctype@ cdiv@c@(@ctype@ a, @ctype@ b) { @type@ ar, ai, br, bi, abs_br, abs_bi; ar = npy_creal@c@(a); ai = npy_cimag@c@(a); br = npy_creal@c@(b); bi = npy_cimag@c@(b); abs_br = npy_fabs@c@(br); abs_bi = npy_fabs@c@(bi); if (abs_br >= abs_bi) { if (abs_br == 0 && abs_bi == 0) { /* divide by zeros should yield a complex inf or nan */ return npy_cpack@c@(ar/abs_br, ai/abs_bi); } else { @type@ rat = bi/br; @type@ scl = 1.0@C@/(br+bi*rat); return npy_cpack@c@((ar + ai*rat)*scl, (ai - ar*rat)*scl); } } else { @type@ rat = br/bi; @type@ scl = 1.0@C@/(bi + br*rat); return npy_cpack@c@((ar*rat + ai)*scl, (ai*rat - ar)*scl); } } /*========================================================== * Custom implementation of missing complex C99 functions *=========================================================*/ #ifndef HAVE_CABS@C@ @type@ npy_cabs@c@(@ctype@ z) { return npy_hypot@c@(npy_creal@c@(z), npy_cimag@c@(z)); } #endif #ifndef HAVE_CARG@C@ @type@ npy_carg@c@(@ctype@ z) { return npy_atan2@c@(npy_cimag@c@(z), npy_creal@c@(z)); } #endif /* * cexp and (ccos, csin)h functions need to calculate exp scaled by another * number. This can be difficult if exp(x) overflows. By doing this way, we * don't risk overflowing exp. This likely raises floating-point exceptions, * if we decide that we care. * * This is only useful over a limited range, (see below) an expects that the * input values are in this range. * * This is based on the technique used in FreeBSD's __frexp_exp and * __ldexp_(c)exp functions by David Schultz. * * SCALED_CEXP_LOWER = log(FLT_MAX) * SCALED_CEXP_UPPER = log(2) + log(FLT_MAX) - log(FLT_TRUE_MIN), * where FLT_TRUE_MIN is the smallest possible subnormal number. */ #define SCALED_CEXP_LOWERF 88.722839f #define SCALED_CEXP_UPPERF 192.69492f #define SCALED_CEXP_LOWER 710.47586007394386 #define SCALED_CEXP_UPPER 1454.9159319953251 #define SCALED_CEXP_LOWERL 11357.216553474703895L #define SCALED_CEXP_UPPERL 22756.021937783004509L #if !defined(HAVE_CSINH@C@) || \ !defined(HAVE_CCOSH@C@) || \ !defined(HAVE_CEXP@C@) static @ctype@ _npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt) { #if @precision@ == 1 const npy_int k = 235; #endif #if @precision@ == 2 const npy_int k = 1799; #endif #if @precision@ == 3 const npy_int k = 19547; #endif const @type@ kln2 = k * NPY_LOGE2@c@; @type@ mant, mantcos, mantsin; npy_int ex, excos, exsin; mant = npy_frexp@c@(npy_exp@c@(x - kln2), &ex); mantcos = npy_frexp@c@(npy_cos@c@(y), &excos); mantsin = npy_frexp@c@(npy_sin@c@(y), &exsin); expt += ex + k; return npy_cpack@c@( npy_ldexp@c@(mant * mantcos, expt + excos), npy_ldexp@c@(mant * mantsin, expt + exsin)); } #endif #ifndef HAVE_CEXP@C@ @ctype@ npy_cexp@c@(@ctype@ z) { @type@ x, c, s; @type@ r, i; @ctype@ ret; r = npy_creal@c@(z); i = npy_cimag@c@(z); if (npy_isfinite(r)) { if (r >= SCALED_CEXP_LOWER@C@ && r <= SCALED_CEXP_UPPER@C@) { ret = _npy_scaled_cexp@c@(r, i, 0); } else { x = npy_exp@c@(r); c = npy_cos@c@(i); s = npy_sin@c@(i); if (npy_isfinite(i)) { ret = npy_cpack@c@(x * c, x * s); } else { ret = npy_cpack@c@(NPY_NAN@C@, npy_copysign@c@(NPY_NAN@C@, i)); } } } else if (npy_isnan(r)) { /* r is nan */ if (i == 0) { ret = z; } else { ret = npy_cpack@c@(r, npy_copysign@c@(NPY_NAN@C@, i)); } } else { /* r is +- inf */ if (r > 0) { if (i == 0) { ret = npy_cpack@c@(r, i); } else if (npy_isfinite(i)) { c = npy_cos@c@(i); s = npy_sin@c@(i); ret = npy_cpack@c@(r * c, r * s); } else { /* x = +inf, y = +-inf | nan */ npy_set_floatstatus_invalid(); ret = npy_cpack@c@(r, NPY_NAN@C@); } } else { if (npy_isfinite(i)) { x = npy_exp@c@(r); c = npy_cos@c@(i); s = npy_sin@c@(i); ret = npy_cpack@c@(x * c, x * s); } else { /* x = -inf, y = nan | +i inf */ ret = npy_cpack@c@(0, 0); } } } return ret; } #endif #ifndef HAVE_CLOG@C@ /* algorithm from cpython, rev. d86f5686cef9 * * The usual formula for the real part is log(hypot(z.real, z.imag)). * There are four situations where this formula is potentially * problematic: * * (1) the absolute value of z is subnormal. Then hypot is subnormal, * so has fewer than the usual number of bits of accuracy, hence may * have large relative error. This then gives a large absolute error * in the log. This can be solved by rescaling z by a suitable power * of 2. * * (2) the absolute value of z is greater than DBL_MAX (e.g. when both * z.real and z.imag are within a factor of 1/sqrt(2) of DBL_MAX) * Again, rescaling solves this. * * (3) the absolute value of z is close to 1. In this case it's * difficult to achieve good accuracy, at least in part because a * change of 1ulp in the real or imaginary part of z can result in a * change of billions of ulps in the correctly rounded answer. * * (4) z = 0. The simplest thing to do here is to call the * floating-point log with an argument of 0, and let its behaviour * (returning -infinity, signaling a floating-point exception, setting * errno, or whatever) determine that of c_log. So the usual formula * is fine here. */ @ctype@ npy_clog@c@(@ctype@ z) { @type@ ax = npy_fabs@c@(npy_creal@c@(z)); @type@ ay = npy_fabs@c@(npy_cimag@c@(z)); @type@ rr, ri; if (ax > @TMAX@/4 || ay > @TMAX@/4) { rr = npy_log@c@(npy_hypot@c@(ax/2, ay/2)) + NPY_LOGE2@c@; } else if (ax < @TMIN@ && ay < @TMIN@) { if (ax > 0 || ay > 0) { /* catch cases where hypot(ax, ay) is subnormal */ rr = npy_log@c@(npy_hypot@c@(npy_ldexp@c@(ax, @TMANT_DIG@), npy_ldexp@c@(ay, @TMANT_DIG@))) - @TMANT_DIG@*NPY_LOGE2@c@; } else { /* log(+/-0 +/- 0i) */ /* raise divide-by-zero floating point exception */ rr = -1.0@c@ / npy_creal@c@(z); rr = npy_copysign@c@(rr, -1); ri = npy_carg@c@(z); return npy_cpack@c@(rr, ri); } } else { @type@ h = npy_hypot@c@(ax, ay); if (0.71 <= h && h <= 1.73) { @type@ am = ax > ay ? ax : ay; /* max(ax, ay) */ @type@ an = ax > ay ? ay : ax; /* min(ax, ay) */ rr = npy_log1p@c@((am-1)*(am+1)+an*an)/2; } else { rr = npy_log@c@(h); } } ri = npy_carg@c@(z); return npy_cpack@c@(rr, ri); } #endif #ifndef HAVE_CSQRT@C@ /* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */ #define THRESH (@TMAX@ / (1 + NPY_SQRT2@c@)) @ctype@ npy_csqrt@c@(@ctype@ z) { @ctype@ result; @type@ a, b; @type@ t; int scale; a = npy_creal@c@(z); b = npy_cimag@c@(z); /* Handle special cases. */ if (a == 0 && b == 0) { return (npy_cpack@c@(0, b)); } if (npy_isinf(b)) { return (npy_cpack@c@(NPY_INFINITY@C@, b)); } if (npy_isnan(a)) { t = (b - b) / (b - b); /* raise invalid if b is not a NaN */ return (npy_cpack@c@(a, t)); /* return NaN + NaN i */ } if (npy_isinf(a)) { /* * csqrt(inf + NaN i) = inf + NaN i * csqrt(inf + y i) = inf + 0 i * csqrt(-inf + NaN i) = NaN +- inf i * csqrt(-inf + y i) = 0 + inf i */ if (npy_signbit(a)) { return (npy_cpack@c@(npy_fabs@c@(b - b), npy_copysign@c@(a, b))); } else { return (npy_cpack@c@(a, npy_copysign@c@(b - b, b))); } } /* * The remaining special case (b is NaN) is handled just fine by * the normal code path below. */ /* Scale to avoid overflow. */ if (npy_fabs@c@(a) >= THRESH || npy_fabs@c@(b) >= THRESH) { a *= 0.25; b *= 0.25; scale = 1; } else { scale = 0; } /* Algorithm 312, CACM vol 10, Oct 1967. */ if (a >= 0) { t = npy_sqrt@c@((a + npy_hypot@c@(a, b)) * 0.5@c@); result = npy_cpack@c@(t, b / (2 * t)); } else { t = npy_sqrt@c@((-a + npy_hypot@c@(a, b)) * 0.5@c@); result = npy_cpack@c@(npy_fabs@c@(b) / (2 * t), npy_copysign@c@(t, b)); } /* Rescale. */ if (scale) { return (npy_cpack@c@(npy_creal@c@(result) * 2, npy_cimag@c@(result))); } else { return (result); } } #undef THRESH #endif /* * Always use this function because of the multiplication for small * integer powers, but in the body use cpow if it is available. */ /* private function for use in npy_pow{f, ,l} */ #ifdef HAVE_CPOW@C@ static @ctype@ sys_cpow@c@(@ctype@ x, @ctype@ y) { return cpow@c@(x, y); } #endif @ctype@ npy_cpow@c@ (@ctype@ a, @ctype@ b) { npy_intp n; @type@ ar = npy_creal@c@(a); @type@ br = npy_creal@c@(b); @type@ ai = npy_cimag@c@(a); @type@ bi = npy_cimag@c@(b); @ctype@ r; /* * Checking if in a^b, if b is zero. * If a is not zero then by definition of logarithm a^0 is 1. * If a is also zero then 0^0 is best defined as 1. */ if (br == 0. && bi == 0.) { return npy_cpack@c@(1., 0.); } /* case 0^b * If a is a complex zero (ai=ar=0), then the result depends * upon values of br and bi. The result is either: * 0 (in magnitude), undefined or 1. * The later case is for br=bi=0 and independent of ar and ai * but is handled above). */ else if (ar == 0. && ai == 0.) { /* * If the real part of b is positive (br>0) then this is * the zero complex with positive sign on both the * real and imaginary part. */ if (br > 0) { return npy_cpack@c@(0., 0.); } /* else we are in the case where the * real part of b is negative (br<0). * Here we should return a complex nan * and raise FloatingPointError: invalid value... */ /* Raise invalid value by calling inf - inf*/ volatile @type@ tmp = NPY_INFINITY@C@; tmp -= NPY_INFINITY@C@; ar = tmp; r = npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); return r; } if (bi == 0 && br > -100 && br < 100 && (n=(npy_intp)br) == br) { if (n == 1) { /* unroll: handle inf better */ return npy_cpack@c@(ar, ai); } else if (n == 2) { /* unroll: handle inf better */ return cmul@c@(a, a); } else if (n == 3) { /* unroll: handle inf better */ return cmul@c@(a, cmul@c@(a, a)); } else if (n > -100 && n < 100) { @ctype@ p, aa; npy_intp mask = 1; if (n < 0) { n = -n; } aa = c_1@c@; p = npy_cpack@c@(ar, ai); while (1) { if (n & mask) { aa = cmul@c@(aa,p); } mask <<= 1; if (n < mask || mask <= 0) { break; } p = cmul@c@(p,p); } r = npy_cpack@c@(npy_creal@c@(aa), npy_cimag@c@(aa)); if (br < 0) { r = cdiv@c@(c_1@c@, r); } return r; } } #ifdef HAVE_CPOW@C@ return sys_cpow@c@(a, b); #else { @ctype@ loga = npy_clog@c@(a); ar = npy_creal@c@(loga); ai = npy_cimag@c@(loga); return npy_cexp@c@(npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br)); } #endif } #ifndef HAVE_CCOS@C@ @ctype@ npy_ccos@c@(@ctype@ z) { /* ccos(z) = ccosh(I * z) */ return npy_ccosh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); } #endif #ifndef HAVE_CSIN@C@ @ctype@ npy_csin@c@(@ctype@ z) { /* csin(z) = -I * csinh(I * z) */ z = npy_csinh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); return npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z)); } #endif #ifndef HAVE_CTAN@C@ @ctype@ npy_ctan@c@(@ctype@ z) { /* ctan(z) = -I * ctanh(I * z) */ z = npy_ctanh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z))); return (npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z))); } #endif #ifndef HAVE_CCOSH@C@ /* * Taken from the msun library in FreeBSD, rev 226599. * * Hyperbolic cosine of a complex argument z = x + i y. * * cosh(z) = cosh(x+iy) * = cosh(x) cos(y) + i sinh(x) sin(y). * * Exceptional values are noted in the comments within the source code. * These values and the return value were taken from n1124.pdf. * * CCOSH_BIG is chosen such that * spacing(0.5 * exp(CCOSH_BIG)) > 0.5*exp(-CCOSH_BIG) * although the exact value assigned to CCOSH_BIG is not so important */ @ctype@ npy_ccosh@c@(@ctype@ z) { #if @precision@ == 1 const npy_float CCOSH_BIG = 9.0f; const npy_float CCOSH_HUGE = 1.70141183e+38f; #endif #if @precision@ == 2 const npy_double CCOSH_BIG = 22.0; const npy_double CCOSH_HUGE = 8.9884656743115795e+307; #endif #if @precision@ >= 3 #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE const npy_longdouble CCOSH_BIG = 22.0L; const npy_longdouble CCOSH_HUGE = 8.9884656743115795e+307L; #else const npy_longdouble CCOSH_BIG = 24.0L; const npy_longdouble CCOSH_HUGE = 5.94865747678615882543e+4931L; #endif #endif @type@ x, y, h, absx; npy_int xfinite, yfinite; x = npy_creal@c@(z); y = npy_cimag@c@(z); xfinite = npy_isfinite(x); yfinite = npy_isfinite(y); /* Handle the nearly-non-exceptional cases where x and y are finite. */ if (xfinite && yfinite) { if (y == 0) { return npy_cpack@c@(npy_cosh@c@(x), x * y); } absx = npy_fabs@c@(x); if (absx < CCOSH_BIG) { /* small x: normal case */ return npy_cpack@c@(npy_cosh@c@(x) * npy_cos@c@(y), npy_sinh@c@(x) * npy_sin@c@(y)); } /* |x| >= 22, so cosh(x) ~= exp(|x|) */ if (absx < SCALED_CEXP_LOWER@C@) { /* x < 710: exp(|x|) won't overflow */ h = npy_exp@c@(absx) * 0.5@c@; return npy_cpack@c@(h * npy_cos@c@(y), npy_copysign@c@(h, x) * npy_sin@c@(y)); } else if (absx < SCALED_CEXP_UPPER@C@) { /* x < 1455: scale to avoid overflow */ z = _npy_scaled_cexp@c@(absx, y, -1); return npy_cpack@c@(npy_creal@c@(z), npy_cimag@c@(z) * npy_copysign@c@(1, x)); } else { /* x >= 1455: the result always overflows */ h = CCOSH_HUGE * x; return npy_cpack@c@(h * h * npy_cos@c@(y), h * npy_sin@c@(y)); } } /* * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0. * The sign of 0 in the result is unspecified. Choice = normally * the same as dNaN. Raise the invalid floating-point exception. * * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0. * The sign of 0 in the result is unspecified. Choice = normally * the same as d(NaN). */ if (x == 0 && !yfinite) { return npy_cpack@c@(y - y, npy_copysign@c@(0, x * (y - y))); } /* * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0. * * cosh(NaN +- I 0) = d(NaN) + I sign(d(NaN, +-0))0. * The sign of 0 in the result is unspecified. */ if (y == 0 && !xfinite) { return npy_cpack@c@(x * x, npy_copysign@c@(0, x) * y); } /* * cosh(x +- I Inf) = dNaN + I dNaN. * Raise the invalid floating-point exception for finite nonzero x. * * cosh(x + I NaN) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero x. Choice = don't raise (except for signaling NaNs). */ if (xfinite && !yfinite) { return npy_cpack@c@(y - y, x * (y - y)); } /* * cosh(+-Inf + I NaN) = +Inf + I d(NaN). * * cosh(+-Inf +- I Inf) = +Inf + I dNaN. * The sign of Inf in the result is unspecified. Choice = always +. * Raise the invalid floating-point exception. * * cosh(+-Inf + I y) = +Inf cos(y) +- I Inf sin(y) */ if (npy_isinf(x)) { if (!yfinite) { return npy_cpack@c@(x * x, x * (y - y)); } return npy_cpack@c@((x * x) * npy_cos@c@(y), x * npy_sin@c@(y)); } /* * cosh(NaN + I NaN) = d(NaN) + I d(NaN). * * cosh(NaN +- I Inf) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception. * Choice = raise. * * cosh(NaN + I y) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero y. Choice = don't raise (except for signaling NaNs). */ return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); } #undef CCOSH_BIG #undef CCOSH_HUGE #endif #ifndef HAVE_CSINH@C@ /* * Taken from the msun library in FreeBSD, rev 226599. * * Hyperbolic sine of a complex argument z = x + i y. * * sinh(z) = sinh(x+iy) * = sinh(x) cos(y) + i cosh(x) sin(y). * * Exceptional values are noted in the comments within the source code. * These values and the return value were taken from n1124.pdf. */ @ctype@ npy_csinh@c@(@ctype@ z) { #if @precision@ == 1 const npy_float CSINH_BIG = 9.0f; const npy_float CSINH_HUGE = 1.70141183e+38f; #endif #if @precision@ == 2 const npy_double CSINH_BIG = 22.0; const npy_double CSINH_HUGE = 8.9884656743115795e+307; #endif #if @precision@ >= 3 #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE const npy_longdouble CSINH_BIG = 22.0L; const npy_longdouble CSINH_HUGE = 8.9884656743115795e+307L; #else const npy_longdouble CSINH_BIG = 24.0L; const npy_longdouble CSINH_HUGE = 5.94865747678615882543e+4931L; #endif #endif @type@ x, y, h, absx; npy_int xfinite, yfinite; x = npy_creal@c@(z); y = npy_cimag@c@(z); xfinite = npy_isfinite(x); yfinite = npy_isfinite(y); /* Handle the nearly-non-exceptional cases where x and y are finite. */ if (xfinite && yfinite) { if (y == 0) { return npy_cpack@c@(npy_sinh@c@(x), y); } absx = npy_fabs@c@(x); if (absx < CSINH_BIG) { /* small x: normal case */ return npy_cpack@c@(npy_sinh@c@(x) * npy_cos@c@(y), npy_cosh@c@(x) * npy_sin@c@(y)); } /* |x| >= 22, so cosh(x) ~= exp(|x|) */ if (absx < SCALED_CEXP_LOWER@C@) { /* x < 710: exp(|x|) won't overflow */ h = npy_exp@c@(npy_fabs@c@(x)) * 0.5@c@; return npy_cpack@c@(npy_copysign@c@(h, x) * npy_cos@c@(y), h * npy_sin@c@(y)); } else if (x < SCALED_CEXP_UPPER@C@) { /* x < 1455: scale to avoid overflow */ z = _npy_scaled_cexp@c@(absx, y, -1); return npy_cpack@c@(npy_creal@c@(z) * npy_copysign@c@(1, x), npy_cimag@c@(z)); } else { /* x >= 1455: the result always overflows */ h = CSINH_HUGE * x; return npy_cpack@c@(h * npy_cos@c@(y), h * h * npy_sin@c@(y)); } } /* * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN. * The sign of 0 in the result is unspecified. Choice = normally * the same as dNaN. Raise the invalid floating-point exception. * * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN). * The sign of 0 in the result is unspecified. Choice = normally * the same as d(NaN). */ if (x == 0 && !yfinite) { return npy_cpack@c@(npy_copysign@c@(0, x * (y - y)), y - y); } /* * sinh(+-Inf +- I 0) = +-Inf + I +-0. * * sinh(NaN +- I 0) = d(NaN) + I +-0. */ if (y == 0 && !xfinite) { if (npy_isnan(x)) { return z; } return npy_cpack@c@(x, npy_copysign@c@(0, y)); } /* * sinh(x +- I Inf) = dNaN + I dNaN. * Raise the invalid floating-point exception for finite nonzero x. * * sinh(x + I NaN) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero x. Choice = don't raise (except for signaling NaNs). */ if (xfinite && !yfinite) { return npy_cpack@c@(y - y, x * (y - y)); } /* * sinh(+-Inf + I NaN) = +-Inf + I d(NaN). * The sign of Inf in the result is unspecified. Choice = normally * the same as d(NaN). * * sinh(+-Inf +- I Inf) = +Inf + I dNaN. * The sign of Inf in the result is unspecified. Choice = always +. * Raise the invalid floating-point exception. * * sinh(+-Inf + I y) = +-Inf cos(y) + I Inf sin(y) */ if (!xfinite && !npy_isnan(x)) { if (!yfinite) { return npy_cpack@c@(x * x, x * (y - y)); } return npy_cpack@c@(x * npy_cos@c@(y), NPY_INFINITY@C@ * npy_sin@c@(y)); } /* * sinh(NaN + I NaN) = d(NaN) + I d(NaN). * * sinh(NaN +- I Inf) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception. * Choice = raise. * * sinh(NaN + I y) = d(NaN) + I d(NaN). * Optionally raises the invalid floating-point exception for finite * nonzero y. Choice = don't raise (except for signaling NaNs). */ return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y)); } #undef CSINH_BIG #undef CSINH_HUGE #endif #ifndef HAVE_CTANH@C@ /* * Taken from the msun library in FreeBSD, rev 226600. * * Hyperbolic tangent of a complex argument z = x + i y. * * The algorithm is from: * * W. Kahan. Branch Cuts for Complex Elementary Functions or Much * Ado About Nothing's Sign Bit. In The State of the Art in * Numerical Analysis, pp. 165 ff. Iserles and Powell, eds., 1987. * * Method: * * Let t = tan(x) * beta = 1/cos^2(y) * s = sinh(x) * rho = cosh(x) * * We have: * * tanh(z) = sinh(z) / cosh(z) * * sinh(x) cos(y) + i cosh(x) sin(y) * = --------------------------------- * cosh(x) cos(y) + i sinh(x) sin(y) * * cosh(x) sinh(x) / cos^2(y) + i tan(y) * = ------------------------------------- * 1 + sinh^2(x) / cos^2(y) * * beta rho s + i t * = ---------------- * 1 + beta s^2 * * Modifications: * * I omitted the original algorithm's handling of overflow in tan(x) after * verifying with nearpi.c that this can't happen in IEEE single or double * precision. I also handle large x differently. */ #define TANH_HUGE 22.0 #define TANHF_HUGE 11.0F #define TANHL_HUGE 42.0L @ctype@ npy_ctanh@c@(@ctype@ z) { @type@ x, y; @type@ t, beta, s, rho, denom; x = npy_creal@c@(z); y = npy_cimag@c@(z); /* * ctanh(NaN + i 0) = NaN + i 0 * * ctanh(NaN + i y) = NaN + i NaN for y != 0 * * The imaginary part has the sign of x*sin(2*y), but there's no * special effort to get this right. * * ctanh(+-Inf +- i Inf) = +-1 +- 0 * * ctanh(+-Inf + i y) = +-1 + 0 sin(2y) for y finite * * The imaginary part of the sign is unspecified. This special * case is only needed to avoid a spurious invalid exception when * y is infinite. */ if (!npy_isfinite(x)) { if (npy_isnan(x)) { return npy_cpack@c@(x, (y == 0 ? y : x * y)); } return npy_cpack@c@(npy_copysign@c@(1,x), npy_copysign@c@(0, npy_isinf(y) ? y : npy_sin@c@(y) * npy_cos@c@(y))); } /* * ctanh(x + i NAN) = NaN + i NaN * ctanh(x +- i Inf) = NaN + i NaN */ if (!npy_isfinite(y)) { return (npy_cpack@c@(y - y, y - y)); } /* * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the * approximation sinh^2(huge) ~= exp(2*huge) / 4. * We use a modified formula to avoid spurious overflow. */ if (npy_fabs@c@(x) >= TANH@C@_HUGE) { @type@ exp_mx = npy_exp@c@(-npy_fabs@c@(x)); return npy_cpack@c@(npy_copysign@c@(1, x), 4 * npy_sin@c@(y) * npy_cos@c@(y) * exp_mx * exp_mx); } /* Kahan's algorithm */ t = npy_tan@c@(y); beta = 1 + t * t; /* = 1 / cos^2(y) */ s = npy_sinh@c@(x); rho = npy_sqrt@c@(1 + s * s); /* = cosh(x) */ denom = 1 + beta * s * s; return (npy_cpack@c@((beta * rho * s) / denom, t / denom)); } #undef TANH_HUGE #undef TANHF_HUGE #undef TANHL_HUGE #endif #if !defined (HAVE_CACOS@C@) || !defined (HAVE_CASINH@C@) /* * Complex inverse trig functions taken from the msum library in FreeBSD * revision 251404 * * The algorithm is very close to that in "Implementing the complex arcsine * and arccosine functions using exception handling" by T. E. Hull, Thomas F. * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335, * https://dl.acm.org/doi/10.1145/275323.275324. * * Throughout we use the convention z = x + I*y. * * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B) * where * A = (|z+I| + |z-I|) / 2 * B = (|z+I| - |z-I|) / 2 = y/A * * These formulas become numerically unstable: * (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that * is, Re(casinh(z)) is close to 0); * (b) for Im(casinh(z)) when z is close to either of the intervals * [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is * close to PI/2). * * These numerical problems are overcome by defining * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2 * Then if A < A_crossover, we use * log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1))) * A-1 = f(x, 1+y) + f(x, 1-y) * and if B > B_crossover, we use * asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y))) * A-y = f(x, y+1) + f(x, y-1) * where without loss of generality we have assumed that x and y are * non-negative. * * Much of the difficulty comes because the intermediate computations may * produce overflows or underflows. This is dealt with in the paper by Hull * et al by using exception handling. We do this by detecting when * computations risk underflow or overflow. The hardest part is handling the * underflows when computing f(a, b). * * Note that the function f(a, b) does not appear explicitly in the paper by * Hull et al, but the idea may be found on pages 308 and 309. Introducing the * function f(a, b) allows us to concentrate many of the clever tricks in this * paper into one function. */ /* * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2. * Pass hypot(a, b) as the third argument. */ static inline @type@ _f@c@(@type@ a, @type@ b, @type@ hypot_a_b) { if (b < 0) { return ((hypot_a_b - b) / 2); } if (b == 0) { return (a / 2); } return (a * a / (hypot_a_b + b) / 2); } /* * All the hard work is contained in this function. * x and y are assumed positive or zero, and less than RECIP_EPSILON. * Upon return: * rx = Re(casinh(z)) = -Im(cacos(y + I*x)). * B_is_usable is set to 1 if the value of B is usable. * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y. * If returning sqrt_A2my2 has potential to result in an underflow, it is * rescaled, and new_y is similarly rescaled. */ static inline void _do_hard_work@c@(@type@ x, @type@ y, @type@ *rx, npy_int *B_is_usable, @type@ *B, @type@ *sqrt_A2my2, @type@ *new_y) { #if @precision@ == 1 const npy_float A_crossover = 10.0f; const npy_float B_crossover = 0.6417f; const npy_float FOUR_SQRT_MIN = 4.3368086899420177e-19f; #endif #if @precision@ == 2 const npy_double A_crossover = 10.0; const npy_double B_crossover = 0.6417; const npy_double FOUR_SQRT_MIN = 5.9666725849601654e-154; #endif #if @precision@ == 3 const npy_longdouble A_crossover = 10.0l; const npy_longdouble B_crossover = 0.6417l; #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE const npy_longdouble FOUR_SQRT_MIN = 5.9666725849601654e-154; #else const npy_longdouble FOUR_SQRT_MIN = 7.3344154702193886625e-2466l; #endif #endif @type@ R, S, A; /* A, B, R, and S are as in Hull et al. */ @type@ Am1, Amy; /* A-1, A-y. */ R = npy_hypot@c@(x, y + 1); /* |z+I| */ S = npy_hypot@c@(x, y - 1); /* |z-I| */ /* A = (|z+I| + |z-I|) / 2 */ A = (R + S) / 2; /* * Mathematically A >= 1. There is a small chance that this will not * be so because of rounding errors. So we will make certain it is * so. */ if (A < 1) { A = 1; } if (A < A_crossover) { /* * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y). * rx = log1p(Am1 + sqrt(Am1*(A+1))) */ if (y == 1 && x < @TEPS@ * @TEPS@ / 128) { /* * fp is of order x^2, and fm = x/2. * A = 1 (inexactly). */ *rx = npy_sqrt@c@(x); } else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { /* * Underflow will not occur because * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN */ Am1 = _f@c@(x, 1 + y, R) + _f@c@(x, 1 - y, S); *rx = npy_log1p@c@(Am1 + npy_sqrt@c@(Am1 * (A + 1))); } else if (y < 1) { /* * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and * A = 1 (inexactly). */ *rx = x / npy_sqrt@c@((1 - y) * (1 + y)); } else { /* if (y > 1) */ /* * A-1 = y-1 (inexactly). */ *rx = npy_log1p@c@((y - 1) + npy_sqrt@c@((y - 1) * (y + 1))); } } else { *rx = npy_log@c@(A + npy_sqrt@c@(A * A - 1)); } *new_y = y; if (y < FOUR_SQRT_MIN) { /* * Avoid a possible underflow caused by y/A. For casinh this * would be legitimate, but will be picked up by invoking atan2 * later on. For cacos this would not be legitimate. */ *B_is_usable = 0; *sqrt_A2my2 = A * (2 / @TEPS@); *new_y = y * (2 / @TEPS@); return; } /* B = (|z+I| - |z-I|) / 2 = y/A */ *B = y / A; *B_is_usable = 1; if (*B > B_crossover) { *B_is_usable = 0; /* * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1). * sqrt_A2my2 = sqrt(Amy*(A+y)) */ if (y == 1 && x < @TEPS@ / 128) { /* * fp is of order x^2, and fm = x/2. * A = 1 (inexactly). */ *sqrt_A2my2 = npy_sqrt@c@(x) * npy_sqrt@c@((A + y) / 2); } else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) { /* * Underflow will not occur because * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN * and * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN */ Amy = _f@c@(x, y + 1, R) + _f@c@(x, y - 1, S); *sqrt_A2my2 = npy_sqrt@c@(Amy * (A + y)); } else if (y > 1) { /* * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and * A = y (inexactly). * * y < RECIP_EPSILON. So the following * scaling should avoid any underflow problems. */ *sqrt_A2my2 = x * (4 / @TEPS@ / @TEPS@) * y / npy_sqrt@c@((y + 1) * (y - 1)); *new_y = y * (4 / @TEPS@ / @TEPS@); } else { /* if (y < 1) */ /* * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and * A = 1 (inexactly). */ *sqrt_A2my2 = npy_sqrt@c@((1 - y) * (1 + y)); } } } /* * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON. */ static inline void _clog_for_large_values@c@(@type@ x, @type@ y, @type@ *rr, @type@ *ri) { #if @precision@ == 1 const npy_float QUARTER_SQRT_MAX = 4.611685743549481e+18f; const npy_float SQRT_MIN = 1.0842021724855044e-19f; #endif #if @precision@ == 2 const npy_double QUARTER_SQRT_MAX = 3.3519519824856489e+153; const npy_double SQRT_MIN = 1.4916681462400413e-154; #endif #if @precision@ == 3 #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE const npy_longdouble QUARTER_SQRT_MAX = 3.3519519824856489e+153; const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; #else const npy_longdouble QUARTER_SQRT_MAX = 2.7268703390485398235e+2465l; const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; #endif #endif @type@ ax, ay, t; ax = npy_fabs@c@(x); ay = npy_fabs@c@(y); if (ax < ay) { t = ax; ax = ay; ay = t; } /* * Avoid overflow in hypot() when x and y are both very large. * Divide x and y by E, and then add 1 to the logarithm. This depends * on E being larger than sqrt(2). * Dividing by E causes an insignificant loss of accuracy; however * this method is still poor since it is unnecessarily slow. */ if (ax > @TMAX@ / 2) { *rr = npy_log@c@(npy_hypot@c@(x / NPY_E@c@, y / NPY_E@c@)) + 1; } /* * Avoid overflow when x or y is large. Avoid underflow when x or * y is small. */ else if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) { *rr = npy_log@c@(npy_hypot@c@(x, y)); } else { *rr = npy_log@c@(ax * ax + ay * ay) / 2; } *ri = npy_atan2@c@(y, x); } #endif #ifndef HAVE_CACOS@C@ @ctype@ npy_cacos@c@(@ctype@ z) { #if @precision@ == 1 /* this is sqrt(6*EPS) */ const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ const volatile npy_float pio2_lo = 7.5497899549e-9f; #endif #if @precision@ == 2 const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; const volatile npy_double pio2_lo = 6.1232339957367659e-17; #endif #if @precision@ == 3 const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; #endif const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; const @type@ pio2_hi = NPY_PI_2@c@; @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2mx2, new_x; npy_int sx, sy; npy_int B_is_usable; x = npy_creal@c@(z); y = npy_cimag@c@(z); sx = npy_signbit(x); sy = npy_signbit(y); ax = npy_fabs@c@(x); ay = npy_fabs@c@(y); if (npy_isnan(x) || npy_isnan(y)) { /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */ if (npy_isinf(x)) { return npy_cpack@c@(y + y, -NPY_INFINITY@C@); } /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */ if (npy_isinf(y)) { return npy_cpack@c@(x + x, -y); } /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */ if (x == 0) { return npy_cpack@c@(pio2_hi + pio2_lo, y + y); } /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { /* clog...() will raise inexact unless x or y is infinite. */ _clog_for_large_values@c@(x, y, &wx, &wy); rx = npy_fabs@c@(wy); ry = wx + NPY_LOGE2@c@; if (sy == 0) { ry = -ry; } return npy_cpack@c@(rx, ry); } /* Avoid spuriously raising inexact for z = 1. */ if (x == 1 && y == 0) { return npy_cpack@c@(0, -y); } /* All remaining cases are inexact. */ raise_inexact(); if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { return npy_cpack@c@(pio2_hi - (x - pio2_lo), -y); } _do_hard_work@c@(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x); if (B_is_usable) { if (sx == 0) { rx = npy_acos@c@(B); } else { rx = npy_acos@c@(-B); } } else { if (sx == 0) { rx = npy_atan2@c@(sqrt_A2mx2, new_x); } else { rx = npy_atan2@c@(sqrt_A2mx2, -new_x); } } if (sy == 0) { ry = -ry; } return npy_cpack@c@(rx, ry); } #endif #ifndef HAVE_CASIN@C@ @ctype@ npy_casin@c@(@ctype@ z) { /* casin(z) = I * conj( casinh(I * conj(z)) ) */ z = npy_casinh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); } #endif #ifndef HAVE_CATAN@C@ @ctype@ npy_catan@c@(@ctype@ z) { /* catan(z) = I * conj( catanh(I * conj(z)) ) */ z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z))); return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)); } #endif #ifndef HAVE_CACOSH@C@ @ctype@ npy_cacosh@c@(@ctype@ z) { /* * cacosh(z) = I*cacos(z) or -I*cacos(z) * where the sign is chosen so Re(cacosh(z)) >= 0. */ @ctype@ w; @type@ rx, ry; w = npy_cacos@c@(z); rx = npy_creal@c@(w); ry = npy_cimag@c@(w); /* cacosh(NaN + I*NaN) = NaN + I*NaN */ if (npy_isnan(rx) && npy_isnan(ry)) { return npy_cpack@c@(ry, rx); } /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */ /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */ if (npy_isnan(rx)) { return npy_cpack@c@(npy_fabs@c@(ry), rx); } /* cacosh(0 + I*NaN) = NaN + I*NaN */ if (npy_isnan(ry)) { return npy_cpack@c@(ry, ry); } return npy_cpack@c@(npy_fabs@c@(ry), npy_copysign@c@(rx, npy_cimag@c@(z))); } #endif #ifndef HAVE_CASINH@C@ /* * casinh(z) = z + O(z^3) as z -> 0 * * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2) as z -> infinity * The above formula works for the imaginary part as well, because * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3) * as z -> infinity, uniformly in y */ @ctype@ npy_casinh@c@(@ctype@ z) { #if @precision@ == 1 /* this is sqrt(6*EPS) */ const npy_float SQRT_6_EPSILON = 8.4572793338e-4f; #endif #if @precision@ == 2 const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08; #endif #if @precision@ == 3 const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l; #endif const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2my2, new_y; npy_int B_is_usable; x = npy_creal@c@(z); y = npy_cimag@c@(z); ax = npy_fabs@c@(x); ay = npy_fabs@c@(y); if (npy_isnan(x) || npy_isnan(y)) { /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */ if (npy_isinf(x)) { return npy_cpack@c@(x, y + y); } /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */ if (npy_isinf(y)) { return npy_cpack@c@(y, x + x); } /* casinh(NaN + I*0) = NaN + I*0 */ if (y == 0) { return npy_cpack@c@(x + x, y); } /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { /* clog...() will raise inexact unless x or y is infinite. */ if (npy_signbit(x) == 0) { _clog_for_large_values@c@(x, y, &wx, &wy); wx += NPY_LOGE2@c@; } else { _clog_for_large_values@c@(-x, -y, &wx, &wy); wx += NPY_LOGE2@c@; } return npy_cpack@c@(npy_copysign@c@(wx, x), npy_copysign@c@(wy, y)); } /* Avoid spuriously raising inexact for z = 0. */ if (x == 0 && y == 0) { return (z); } /* All remaining cases are inexact. */ raise_inexact(); if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) { return (z); } _do_hard_work@c@(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y); if (B_is_usable) { ry = npy_asin@c@(B); } else { ry = npy_atan2@c@(new_y, sqrt_A2my2); } return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); } #endif #ifndef HAVE_CATANH@C@ /* * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow). * Assumes x*x and y*y will not overflow. * Assumes x and y are finite. * Assumes y is non-negative. * Assumes fabs(x) >= DBL_EPSILON. */ static inline @type@ _sum_squares@c@(@type@ x, @type@ y) { #if @precision@ == 1 const npy_float SQRT_MIN = 1.0842022e-19f; #endif #if @precision@ == 2 const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ #endif #if @precision@ == 3 #if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */ #else /* this is correct for 80 bit long doubles */ const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l; #endif #endif /* Avoid underflow when y is small. */ if (y < SQRT_MIN) { return (x * x); } return (x * x + y * y); } /* * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y). * Assumes x and y are not NaN, and one of x and y is larger than * RECIP_EPSILON. We avoid unwarranted underflow. It is important to not use * the code creal(1/z), because the imaginary part may produce an unwanted * underflow. * This is only called in a context where inexact is always raised before * the call, so no effort is made to avoid or force inexact. */ #if @precision@ == 1 #define BIAS (FLT_MAX_EXP - 1) #define CUTOFF (FLT_MANT_DIG / 2 + 1) static inline npy_float _real_part_reciprocalf(npy_float x, npy_float y) { npy_float scale; npy_uint32 hx, hy; npy_int32 ix, iy; GET_FLOAT_WORD(hx, x); ix = hx & 0x7f800000; GET_FLOAT_WORD(hy, y); iy = hy & 0x7f800000; if (ix - iy >= CUTOFF << 23 || npy_isinf(x)) { return (1 / x); } if (iy - ix >= CUTOFF << 23) { return (x / y / y); } if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) { return (x / (x * x + y * y)); } SET_FLOAT_WORD(scale, 0x7f800000 - ix); x *= scale; y *= scale; return (x / (x * x + y * y) * scale); } #undef BIAS #undef CUTOFF #endif #if @precision@ == 2 #define BIAS (DBL_MAX_EXP - 1) /* more guard digits are useful iff there is extra precision. */ #define CUTOFF (DBL_MANT_DIG / 2 + 1) /* just half or 1 guard digit */ static inline npy_double _real_part_reciprocal(npy_double x, npy_double y) { npy_double scale; npy_uint32 hx, hy; npy_int32 ix, iy; /* * This code is inspired by the C99 document n1124.pdf, Section G.5.1, * example 2. */ GET_HIGH_WORD(hx, x); ix = hx & 0x7ff00000; GET_HIGH_WORD(hy, y); iy = hy & 0x7ff00000; if (ix - iy >= CUTOFF << 20 || npy_isinf(x)) { /* +-Inf -> +-0 is special */ return (1 / x); } if (iy - ix >= CUTOFF << 20) { /* should avoid double div, but hard */ return (x / y / y); } if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) { return (x / (x * x + y * y)); } scale = 1; SET_HIGH_WORD(scale, 0x7ff00000 - ix); /* 2**(1-ilogb(x)) */ x *= scale; y *= scale; return (x / (x * x + y * y) * scale); } #undef BIAS #undef CUTOFF #endif #if @precision@ == 3 #if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \ !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE) #define BIAS (LDBL_MAX_EXP - 1) #define CUTOFF (LDBL_MANT_DIG / 2 + 1) static inline npy_longdouble _real_part_reciprocall(npy_longdouble x, npy_longdouble y) { npy_longdouble scale; union IEEEl2bitsrep ux, uy, us; npy_int32 ix, iy; ux.e = x; ix = GET_LDOUBLE_EXP(ux); uy.e = y; iy = GET_LDOUBLE_EXP(uy); if (ix - iy >= CUTOFF || npy_isinf(x)) { return (1/x); } if (iy - ix >= CUTOFF) { return (x/y/y); } if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF) { return (x/(x*x + y*y)); } us.e = 1; SET_LDOUBLE_EXP(us, 0x7fff - ix); scale = us.e; x *= scale; y *= scale; return (x/(x*x + y*y) * scale); } #undef BIAS #undef CUTOFF #else static inline npy_longdouble _real_part_reciprocall(npy_longdouble x, npy_longdouble y) { return x/(x*x + y*y); } #endif #endif @ctype@ npy_catanh@c@(@ctype@ z) { #if @precision@ == 1 /* this is sqrt(3*EPS) */ const npy_float SQRT_3_EPSILON = 5.9801995673e-4f; /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */ const volatile npy_float pio2_lo = 7.5497899549e-9f; #endif #if @precision@ == 2 const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8; const volatile npy_double pio2_lo = 6.1232339957367659e-17; #endif #if @precision@ == 3 const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10l; const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l; #endif const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@; const @type@ pio2_hi = NPY_PI_2@c@; @type@ x, y, ax, ay, rx, ry; x = npy_creal@c@(z); y = npy_cimag@c@(z); ax = npy_fabs@c@(x); ay = npy_fabs@c@(y); /* This helps handle many cases. */ if (y == 0 && ax <= 1) { return npy_cpack@c@(npy_atanh@c@(x), y); } /* To ensure the same accuracy as atan(), and to filter out z = 0. */ if (x == 0) { return npy_cpack@c@(x, npy_atan@c@(y)); } if (npy_isnan(x) || npy_isnan(y)) { /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */ if (npy_isinf(x)) { return npy_cpack@c@(npy_copysign@c@(0, x), y + y); } /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */ if (npy_isinf(y)) { return npy_cpack@c@(npy_copysign@c@(0, x), npy_copysign@c@(pio2_hi + pio2_lo, y)); } /* * All other cases involving NaN return NaN + I*NaN. * C99 leaves it optional whether to raise invalid if one of * the arguments is not NaN, so we opt not to raise it. */ return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@); } if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) { return npy_cpack@c@(_real_part_reciprocal@c@(x, y), npy_copysign@c@(pio2_hi + pio2_lo, y)); } if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) { /* * z = 0 was filtered out above. All other cases must raise * inexact, but this is the only one that needs to do it * explicitly. */ raise_inexact(); return (z); } if (ax == 1 && ay < @TEPS@) { rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2; } else { rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4; } if (ax == 1) { ry = npy_atan2@c@(2, -ay) / 2; } else if (ay < @TEPS@) { ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2; } else { ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2; } return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y)); } #endif /**end repeat**/ /*========================================================== * Decorate all the functions which are available natively *=========================================================*/ /**begin repeat * #type = npy_float, npy_double, npy_longdouble# * #ctype = npy_cfloat, npy_cdouble, npy_clongdouble# * #c = f, , l# * #C = F, , L# */ /**begin repeat1 * #kind = cabs,carg# * #KIND = CABS,CARG# */ #ifdef HAVE_@KIND@@C@ @type@ npy_@kind@@c@(@ctype@ z) { return @kind@@c@(z); } #endif /**end repeat1**/ /**begin repeat1 * #kind = cexp,clog,csqrt,ccos,csin,ctan,ccosh,csinh,ctanh, * cacos,casin,catan,cacosh,casinh,catanh# * #KIND = CEXP,CLOG,CSQRT,CCOS,CSIN,CTAN,CCOSH,CSINH,CTANH, * CACOS,CASIN,CATAN,CACOSH,CASINH,CATANH# */ #ifdef HAVE_@KIND@@C@ @ctype@ npy_@kind@@c@(@ctype@ z) { return @kind@@c@(z); } #endif /**end repeat1**/ /**end repeat**/