/*
 * vim: syntax=c
 *
 * Implement some C99-compatible complex math functions
 *
 * Most of the code is taken from the msun library in FreeBSD (HEAD @ 4th
 * October 2013), under the following license:
 *
 * Copyright (c) 2007, 2011 David Schultz <das@FreeBSD.ORG>
 * Copyright (c) 2012 Stephen Montgomery-Smith <stephen@FreeBSD.ORG>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
#include "npy_math_common.h"
#include "npy_math_private.h"
#include <numpy/utils.h>

/*
 * Hack inherited from BSD, the intent is to set the FPU inexact
 * flag in an efficient way. The flag is IEEE specific. See
 * https://github.com/freebsd/freebsd/blob/4c6378299/lib/msun/src/catrig.c#L42
 */
#if !defined(HAVE_CACOSF) || !defined(HAVE_CACOSL) || !defined(HAVE_CASINHF) || !defined(HAVE_CASINHL)
#define raise_inexact() do {                        \
    volatile npy_float NPY_UNUSED(junk) = 1 + tiny; \
} while (0)


static const volatile npy_float tiny = 3.9443045e-31f;
#endif

/**begin repeat
 * #type = npy_float, npy_double, npy_longdouble#
 * #ctype = npy_cfloat,npy_cdouble,npy_clongdouble#
 * #c = f, , l#
 * #C = F, , L#
 * #NAME = CFLOAT, CDOUBLE, CLONGDOUBLE#
 * #TMAX = FLT_MAX, DBL_MAX, LDBL_MAX#
 * #TMIN = FLT_MIN, DBL_MIN, LDBL_MIN#
 * #TMANT_DIG = FLT_MANT_DIG, DBL_MANT_DIG, LDBL_MANT_DIG#
 * #TEPS = FLT_EPSILON, DBL_EPSILON, LDBL_EPSILON#
 * #precision = 1, 2, 3#
 */

/*==========================================================
 * Constants
 *=========================================================*/
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
static const @ctype@ c_1@c@ = {1.0@C@, 0.0@C@};
#else
static const @ctype@ c_1@c@ = 1.0@C@;
#endif

/*==========================================================
 * Helper functions
 *
 * These are necessary because we do not count on using a
 * C99 compiler.
 *=========================================================*/
static inline
@ctype@
cmul@c@(@ctype@ a, @ctype@ b)
{
    @type@ ar, ai, br, bi;
    ar = npy_creal@c@(a);
    ai = npy_cimag@c@(a);
    br = npy_creal@c@(b);
    bi = npy_cimag@c@(b);
    return npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br);
}

static inline
@ctype@
cdiv@c@(@ctype@ a, @ctype@ b)
{
    @type@ ar, ai, br, bi, abs_br, abs_bi;
    ar = npy_creal@c@(a);
    ai = npy_cimag@c@(a);
    br = npy_creal@c@(b);
    bi = npy_cimag@c@(b);
    abs_br = npy_fabs@c@(br);
    abs_bi = npy_fabs@c@(bi);

    if (abs_br >= abs_bi) {
        if (abs_br == 0 && abs_bi == 0) {
            /* divide by zeros should yield a complex inf or nan */
            return npy_cpack@c@(ar/abs_br, ai/abs_bi);
        }
        else {
            @type@ rat = bi/br;
            @type@ scl = 1.0@C@/(br+bi*rat);
            return npy_cpack@c@((ar + ai*rat)*scl, (ai - ar*rat)*scl);
        }
    }
    else {
        @type@ rat = br/bi;
        @type@ scl = 1.0@C@/(bi + br*rat);
        return npy_cpack@c@((ar*rat + ai)*scl, (ai*rat - ar)*scl);
    }
}

/*==========================================================
 * Custom implementation of missing complex C99 functions
 *=========================================================*/

#ifndef HAVE_CABS@C@
@type@
npy_cabs@c@(@ctype@ z)
{
    return npy_hypot@c@(npy_creal@c@(z), npy_cimag@c@(z));
}
#endif

#ifndef HAVE_CARG@C@
@type@
npy_carg@c@(@ctype@ z)
{
    return npy_atan2@c@(npy_cimag@c@(z), npy_creal@c@(z));
}
#endif

/*
 * cexp and (ccos, csin)h functions need to calculate exp scaled by another
 * number.  This can be difficult if exp(x) overflows.  By doing this way, we
 * don't risk overflowing exp. This likely raises floating-point exceptions,
 * if we decide that we care.
 *
 * This is only useful over a limited range, (see below) an expects that the
 * input values are in this range.
 *
 * This is based on the technique used in FreeBSD's __frexp_exp and
 * __ldexp_(c)exp functions by David Schultz.
 *
 * SCALED_CEXP_LOWER = log(FLT_MAX)
 * SCALED_CEXP_UPPER = log(2) + log(FLT_MAX) - log(FLT_TRUE_MIN),
 * where FLT_TRUE_MIN is the smallest possible subnormal number.
 */

#define SCALED_CEXP_LOWERF 88.722839f
#define SCALED_CEXP_UPPERF 192.69492f
#define SCALED_CEXP_LOWER 710.47586007394386
#define SCALED_CEXP_UPPER 1454.9159319953251
#define SCALED_CEXP_LOWERL 11357.216553474703895L
#define SCALED_CEXP_UPPERL 22756.021937783004509L

#if !defined(HAVE_CSINH@C@) || \
    !defined(HAVE_CCOSH@C@) || \
    !defined(HAVE_CEXP@C@)

static
@ctype@
_npy_scaled_cexp@c@(@type@ x, @type@ y, npy_int expt)
{
#if @precision@ == 1
    const npy_int k = 235;
#endif
#if @precision@ == 2
    const npy_int k = 1799;
#endif
#if @precision@ == 3
    const npy_int k = 19547;
#endif
    const @type@ kln2 = k * NPY_LOGE2@c@;
    @type@ mant, mantcos, mantsin;
    npy_int ex, excos, exsin;

    mant = npy_frexp@c@(npy_exp@c@(x - kln2), &ex);
    mantcos = npy_frexp@c@(npy_cos@c@(y), &excos);
    mantsin = npy_frexp@c@(npy_sin@c@(y), &exsin);

    expt += ex + k;
    return npy_cpack@c@( npy_ldexp@c@(mant * mantcos, expt + excos),
                         npy_ldexp@c@(mant * mantsin, expt + exsin));
}

#endif

#ifndef HAVE_CEXP@C@

@ctype@
npy_cexp@c@(@ctype@ z)
{
    @type@ x, c, s;
    @type@ r, i;
    @ctype@ ret;

    r = npy_creal@c@(z);
    i = npy_cimag@c@(z);

    if (npy_isfinite(r)) {
        if (r >= SCALED_CEXP_LOWER@C@ && r <= SCALED_CEXP_UPPER@C@) {
            ret = _npy_scaled_cexp@c@(r, i, 0);
        }
        else {
            x = npy_exp@c@(r);

            c = npy_cos@c@(i);
            s = npy_sin@c@(i);

            if (npy_isfinite(i)) {
                ret = npy_cpack@c@(x * c, x * s);
            }
            else {
                ret = npy_cpack@c@(NPY_NAN@C@, npy_copysign@c@(NPY_NAN@C@, i));
            }
        }

    }
    else  if (npy_isnan(r)) {
        /* r is nan */
        if (i == 0) {
            ret = z;
        }
        else {
            ret = npy_cpack@c@(r, npy_copysign@c@(NPY_NAN@C@, i));
        }
    }
    else {
        /* r is +- inf */
        if (r > 0) {
            if (i == 0) {
                ret = npy_cpack@c@(r, i);
            }
            else if (npy_isfinite(i)) {
                c = npy_cos@c@(i);
                s = npy_sin@c@(i);

                ret = npy_cpack@c@(r * c, r * s);
            }
            else {
                /* x = +inf, y = +-inf | nan */
                npy_set_floatstatus_invalid();
                ret = npy_cpack@c@(r, NPY_NAN@C@);
            }
        }
        else {
            if (npy_isfinite(i)) {
                x = npy_exp@c@(r);
                c = npy_cos@c@(i);
                s = npy_sin@c@(i);

                ret = npy_cpack@c@(x * c, x * s);
            }
            else {
                /* x = -inf, y = nan | +i inf */
                ret = npy_cpack@c@(0, 0);
            }
        }
    }

    return ret;
}
#endif

#ifndef HAVE_CLOG@C@
/* algorithm from cpython, rev. d86f5686cef9
 *
 * The usual formula for the real part is log(hypot(z.real, z.imag)).
 * There are four situations where this formula is potentially
 * problematic:
 *
 * (1) the absolute value of z is subnormal.  Then hypot is subnormal,
 * so has fewer than the usual number of bits of accuracy, hence may
 * have large relative error.  This then gives a large absolute error
 * in the log.  This can be solved by rescaling z by a suitable power
 * of 2.
 *
 * (2) the absolute value of z is greater than DBL_MAX (e.g. when both
 * z.real and z.imag are within a factor of 1/sqrt(2) of DBL_MAX)
 * Again, rescaling solves this.
 *
 * (3) the absolute value of z is close to 1.  In this case it's
 * difficult to achieve good accuracy, at least in part because a
 * change of 1ulp in the real or imaginary part of z can result in a
 * change of billions of ulps in the correctly rounded answer.
 *
 * (4) z = 0.  The simplest thing to do here is to call the
 * floating-point log with an argument of 0, and let its behaviour
 * (returning -infinity, signaling a floating-point exception, setting
 * errno, or whatever) determine that of c_log.  So the usual formula
 * is fine here.
*/
@ctype@
npy_clog@c@(@ctype@ z)
{
    @type@ ax = npy_fabs@c@(npy_creal@c@(z));
    @type@ ay = npy_fabs@c@(npy_cimag@c@(z));
    @type@ rr, ri;

    if (ax > @TMAX@/4 || ay > @TMAX@/4) {
        rr = npy_log@c@(npy_hypot@c@(ax/2, ay/2)) + NPY_LOGE2@c@;
    }
    else if (ax < @TMIN@ && ay < @TMIN@) {
        if (ax > 0  || ay > 0) {
            /* catch cases where hypot(ax, ay) is subnormal */
            rr = npy_log@c@(npy_hypot@c@(npy_ldexp@c@(ax, @TMANT_DIG@),
                 npy_ldexp@c@(ay, @TMANT_DIG@))) - @TMANT_DIG@*NPY_LOGE2@c@;
        }
        else {
            /* log(+/-0 +/- 0i) */
            /* raise divide-by-zero floating point exception */
            rr = -1.0@c@ / npy_creal@c@(z);
            rr = npy_copysign@c@(rr, -1);
            ri = npy_carg@c@(z);
            return npy_cpack@c@(rr, ri);
        }
    }
    else {
        @type@ h = npy_hypot@c@(ax, ay);
        if (0.71 <= h && h <= 1.73) {
            @type@ am = ax > ay ? ax : ay; /* max(ax, ay) */
            @type@ an = ax > ay ? ay : ax; /* min(ax, ay) */
            rr = npy_log1p@c@((am-1)*(am+1)+an*an)/2;
        }
        else {
            rr = npy_log@c@(h);
        }
    }
    ri = npy_carg@c@(z);

    return npy_cpack@c@(rr, ri);
}
#endif

#ifndef HAVE_CSQRT@C@

/* We risk spurious overflow for components >= DBL_MAX / (1 + sqrt(2)). */
#define THRESH  (@TMAX@ / (1 + NPY_SQRT2@c@))

@ctype@
npy_csqrt@c@(@ctype@ z)
{
    @ctype@ result;
    @type@ a, b;
    @type@ t;
    int scale;

    a = npy_creal@c@(z);
    b = npy_cimag@c@(z);

    /* Handle special cases. */
    if (a == 0 && b == 0) {
        return (npy_cpack@c@(0, b));
    }
    if (npy_isinf(b)) {
        return (npy_cpack@c@(NPY_INFINITY@C@, b));
    }
    if (npy_isnan(a)) {
        t = (b - b) / (b - b);  /* raise invalid if b is not a NaN */
        return (npy_cpack@c@(a, t));    /* return NaN + NaN i */
    }
    if (npy_isinf(a)) {
        /*
         * csqrt(inf + NaN i)  = inf +  NaN i
         * csqrt(inf + y i)    = inf +  0 i
         * csqrt(-inf + NaN i) = NaN +- inf i
         * csqrt(-inf + y i)   = 0   +  inf i
         */
        if (npy_signbit(a)) {
            return (npy_cpack@c@(npy_fabs@c@(b - b), npy_copysign@c@(a, b)));
        }
        else {
            return (npy_cpack@c@(a, npy_copysign@c@(b - b, b)));
        }
    }
    /*
     * The remaining special case (b is NaN) is handled just fine by
     * the normal code path below.
     */

    /* Scale to avoid overflow. */
    if (npy_fabs@c@(a) >= THRESH || npy_fabs@c@(b) >= THRESH) {
        a *= 0.25;
        b *= 0.25;
        scale = 1;
    }
    else {
        scale = 0;
    }

    /* Algorithm 312, CACM vol 10, Oct 1967. */
    if (a >= 0) {
        t = npy_sqrt@c@((a + npy_hypot@c@(a, b)) * 0.5@c@);
        result = npy_cpack@c@(t, b / (2 * t));
    }
    else {
        t = npy_sqrt@c@((-a + npy_hypot@c@(a, b)) * 0.5@c@);
        result = npy_cpack@c@(npy_fabs@c@(b) / (2 * t), npy_copysign@c@(t, b));
    }

    /* Rescale. */
    if (scale) {
        return (npy_cpack@c@(npy_creal@c@(result) * 2, npy_cimag@c@(result)));
    }
    else {
        return (result);
    }
}
#undef THRESH
#endif

/*
 * Always use this function because of the multiplication for small
 * integer powers, but in the body use cpow if it is available.
 */

/* private function for use in npy_pow{f, ,l} */
#ifdef HAVE_CPOW@C@
static @ctype@
sys_cpow@c@(@ctype@ x, @ctype@ y)
{
    return cpow@c@(x, y);
}
#endif


@ctype@
npy_cpow@c@ (@ctype@ a, @ctype@ b)
{
    npy_intp n;
    @type@ ar = npy_creal@c@(a);
    @type@ br = npy_creal@c@(b);
    @type@ ai = npy_cimag@c@(a);
    @type@ bi = npy_cimag@c@(b);
    @ctype@ r;

    /*
     * Checking if in a^b, if b is zero.
     * If a is not zero then by definition of logarithm a^0 is 1.
     * If a is also zero then 0^0 is best defined as 1.
     */
    if (br == 0. && bi == 0.) {
        return npy_cpack@c@(1., 0.);
    }
    /* case 0^b
     * If a is a complex zero (ai=ar=0), then the result depends 
     * upon values of br and bi. The result is either:
     * 0 (in magnitude), undefined or 1.
     * The later case is for br=bi=0 and independent of ar and ai
     * but is handled above).
     */
    else if (ar == 0. && ai == 0.) {
        /* 
         * If the real part of b is positive (br>0) then this is
         * the zero complex with positive sign on both the
         * real and imaginary part.
         */
         if (br > 0) {
             return npy_cpack@c@(0., 0.);
         }
        /* else we are in the case where the
         * real part of b is negative (br<0).
         * Here we should return a complex nan
         * and raise FloatingPointError: invalid value...
         */
         
         /* Raise invalid value by calling inf - inf*/
          volatile @type@ tmp = NPY_INFINITY@C@;
          tmp -= NPY_INFINITY@C@;
          ar = tmp;
          
          r = npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@);
          return r;
    }
    if (bi == 0 && br > -100 && br < 100 && (n=(npy_intp)br) == br) {
        if (n == 1) {
            /* unroll: handle inf better */
            return npy_cpack@c@(ar, ai);
        }
        else if (n == 2) {
            /* unroll: handle inf better */
            return cmul@c@(a, a);
        }
        else if (n == 3) {
            /* unroll: handle inf better */
            return cmul@c@(a, cmul@c@(a, a));
        }
        else if (n > -100 && n < 100) {
            @ctype@ p, aa;
            npy_intp mask = 1;
            if (n < 0) {
                n = -n;
            }
            aa = c_1@c@;
            p = npy_cpack@c@(ar, ai);
            while (1) {
                if (n & mask) {
                    aa = cmul@c@(aa,p);
                }
                mask <<= 1;
                if (n < mask || mask <= 0) {
                    break;
                }
                p = cmul@c@(p,p);
            }
            r = npy_cpack@c@(npy_creal@c@(aa), npy_cimag@c@(aa));
            if (br < 0) {
                r = cdiv@c@(c_1@c@, r);
            }
            return r;
        }
    }

#ifdef HAVE_CPOW@C@
    return sys_cpow@c@(a, b);

#else
    {
        @ctype@ loga = npy_clog@c@(a);

        ar = npy_creal@c@(loga);
        ai = npy_cimag@c@(loga);
        return npy_cexp@c@(npy_cpack@c@(ar*br - ai*bi, ar*bi + ai*br));
    }

#endif
}


#ifndef HAVE_CCOS@C@
@ctype@
npy_ccos@c@(@ctype@ z)
{
    /* ccos(z) = ccosh(I * z) */
    return npy_ccosh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z)));
}
#endif

#ifndef HAVE_CSIN@C@
@ctype@
npy_csin@c@(@ctype@ z)
{
    /* csin(z) = -I * csinh(I * z) */
    z = npy_csinh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z)));
    return npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z));
}
#endif

#ifndef HAVE_CTAN@C@
@ctype@
npy_ctan@c@(@ctype@ z)
{
    /* ctan(z) = -I * ctanh(I * z) */
    z = npy_ctanh@c@(npy_cpack@c@(-npy_cimag@c@(z), npy_creal@c@(z)));
    return (npy_cpack@c@(npy_cimag@c@(z), -npy_creal@c@(z)));
}
#endif

#ifndef HAVE_CCOSH@C@
/*
 * Taken from the msun library in FreeBSD, rev 226599.
 *
 * Hyperbolic cosine of a complex argument z = x + i y.
 *
 * cosh(z) = cosh(x+iy)
 *         = cosh(x) cos(y) + i sinh(x) sin(y).
 *
 * Exceptional values are noted in the comments within the source code.
 * These values and the return value were taken from n1124.pdf.
 *
 * CCOSH_BIG is chosen such that
 * spacing(0.5 * exp(CCOSH_BIG)) > 0.5*exp(-CCOSH_BIG)
 * although the exact value assigned to CCOSH_BIG is not so important
 */
@ctype@
npy_ccosh@c@(@ctype@ z)
{
#if @precision@ == 1
    const npy_float CCOSH_BIG = 9.0f;
    const npy_float CCOSH_HUGE = 1.70141183e+38f;
#endif
#if @precision@ == 2
    const npy_double CCOSH_BIG = 22.0;
    const npy_double CCOSH_HUGE = 8.9884656743115795e+307;
#endif
#if @precision@ >= 3
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
    const npy_longdouble CCOSH_BIG = 22.0L;
    const npy_longdouble CCOSH_HUGE = 8.9884656743115795e+307L;
#else
    const npy_longdouble CCOSH_BIG = 24.0L;
    const npy_longdouble CCOSH_HUGE = 5.94865747678615882543e+4931L;
#endif
#endif

    @type@  x, y, h, absx;
    npy_int xfinite, yfinite;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);

    xfinite = npy_isfinite(x);
    yfinite = npy_isfinite(y);

    /* Handle the nearly-non-exceptional cases where x and y are finite. */
    if (xfinite && yfinite) {
        if (y == 0) {
            return npy_cpack@c@(npy_cosh@c@(x), x * y);
        }
        absx = npy_fabs@c@(x);
        if (absx < CCOSH_BIG) {
            /* small x: normal case */
            return npy_cpack@c@(npy_cosh@c@(x) * npy_cos@c@(y),
                                npy_sinh@c@(x) * npy_sin@c@(y));
        }

        /* |x| >= 22, so cosh(x) ~= exp(|x|) */
        if (absx < SCALED_CEXP_LOWER@C@) {
            /* x < 710: exp(|x|) won't overflow */
            h = npy_exp@c@(absx) * 0.5@c@;
            return npy_cpack@c@(h * npy_cos@c@(y),
                                npy_copysign@c@(h, x) * npy_sin@c@(y));
        }
        else if (absx < SCALED_CEXP_UPPER@C@) {
            /* x < 1455: scale to avoid overflow */
            z = _npy_scaled_cexp@c@(absx, y, -1);
            return npy_cpack@c@(npy_creal@c@(z),
                                npy_cimag@c@(z) * npy_copysign@c@(1, x));
        }
        else {
            /* x >= 1455: the result always overflows */
            h = CCOSH_HUGE * x;
            return npy_cpack@c@(h * h * npy_cos@c@(y), h * npy_sin@c@(y));
        }
    }

    /*
     * cosh(+-0 +- I Inf) = dNaN + I sign(d(+-0, dNaN))0.
     * The sign of 0 in the result is unspecified.  Choice = normally
     * the same as dNaN.  Raise the invalid floating-point exception.
     *
     * cosh(+-0 +- I NaN) = d(NaN) + I sign(d(+-0, NaN))0.
     * The sign of 0 in the result is unspecified.  Choice = normally
     * the same as d(NaN).
     */
    if (x == 0 && !yfinite) {
        return npy_cpack@c@(y - y, npy_copysign@c@(0, x * (y - y)));
    }

    /*
     * cosh(+-Inf +- I 0) = +Inf + I (+-)(+-)0.
     *
     * cosh(NaN +- I 0)   = d(NaN) + I sign(d(NaN, +-0))0.
     * The sign of 0 in the result is unspecified.
     */
    if (y == 0 && !xfinite) {
        return npy_cpack@c@(x * x, npy_copysign@c@(0, x) * y);
    }

    /*
     * cosh(x +- I Inf) = dNaN + I dNaN.
     * Raise the invalid floating-point exception for finite nonzero x.
     *
     * cosh(x + I NaN) = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception for finite
     * nonzero x.  Choice = don't raise (except for signaling NaNs).
     */
    if (xfinite && !yfinite) {
        return npy_cpack@c@(y - y, x * (y - y));
    }

    /*
     * cosh(+-Inf + I NaN)  = +Inf + I d(NaN).
     *
     * cosh(+-Inf +- I Inf) = +Inf + I dNaN.
     * The sign of Inf in the result is unspecified.  Choice = always +.
     * Raise the invalid floating-point exception.
     *
     * cosh(+-Inf + I y)   = +Inf cos(y) +- I Inf sin(y)
     */
    if (npy_isinf(x)) {
        if (!yfinite) {
            return npy_cpack@c@(x * x, x * (y - y));
        }
        return npy_cpack@c@((x * x) * npy_cos@c@(y), x * npy_sin@c@(y));
    }

    /*
     * cosh(NaN + I NaN)  = d(NaN) + I d(NaN).
     *
     * cosh(NaN +- I Inf) = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception.
     * Choice = raise.
     *
     * cosh(NaN + I y)    = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception for finite
     * nonzero y.  Choice = don't raise (except for signaling NaNs).
     */
    return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y));
}
#undef CCOSH_BIG
#undef CCOSH_HUGE
#endif

#ifndef HAVE_CSINH@C@
/*
 * Taken from the msun library in FreeBSD, rev 226599.
 *
 * Hyperbolic sine of a complex argument z = x + i y.
 *
 * sinh(z) = sinh(x+iy)
 *         = sinh(x) cos(y) + i cosh(x) sin(y).
 *
 * Exceptional values are noted in the comments within the source code.
 * These values and the return value were taken from n1124.pdf.
 */
@ctype@
npy_csinh@c@(@ctype@ z)
{
#if @precision@ == 1
    const npy_float CSINH_BIG = 9.0f;
    const npy_float CSINH_HUGE = 1.70141183e+38f;
#endif
#if @precision@ == 2
    const npy_double CSINH_BIG = 22.0;
    const npy_double CSINH_HUGE = 8.9884656743115795e+307;
#endif
#if @precision@ >= 3
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
    const npy_longdouble CSINH_BIG = 22.0L;
    const npy_longdouble CSINH_HUGE = 8.9884656743115795e+307L;
#else
    const npy_longdouble CSINH_BIG = 24.0L;
    const npy_longdouble CSINH_HUGE = 5.94865747678615882543e+4931L;
#endif
#endif

    @type@ x, y, h, absx;
    npy_int xfinite, yfinite;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);

    xfinite = npy_isfinite(x);
    yfinite = npy_isfinite(y);

    /* Handle the nearly-non-exceptional cases where x and y are finite. */
    if (xfinite && yfinite) {
        if (y == 0) {
            return npy_cpack@c@(npy_sinh@c@(x), y);
        }
        absx = npy_fabs@c@(x);
        if (absx < CSINH_BIG) {
            /* small x: normal case */
            return npy_cpack@c@(npy_sinh@c@(x) * npy_cos@c@(y),
                                npy_cosh@c@(x) * npy_sin@c@(y));
        }

        /* |x| >= 22, so cosh(x) ~= exp(|x|) */
        if (absx < SCALED_CEXP_LOWER@C@) {
            /* x < 710: exp(|x|) won't overflow */
            h = npy_exp@c@(npy_fabs@c@(x)) * 0.5@c@;
            return npy_cpack@c@(npy_copysign@c@(h, x) * npy_cos@c@(y),
                                h * npy_sin@c@(y));
        }
        else if (x < SCALED_CEXP_UPPER@C@) {
            /* x < 1455: scale to avoid overflow */
            z = _npy_scaled_cexp@c@(absx, y, -1);
            return npy_cpack@c@(npy_creal@c@(z) * npy_copysign@c@(1, x),
                                npy_cimag@c@(z));
        }
        else {
            /* x >= 1455: the result always overflows */
            h = CSINH_HUGE * x;
            return npy_cpack@c@(h * npy_cos@c@(y), h * h * npy_sin@c@(y));
        }
    }

    /*
     * sinh(+-0 +- I Inf) = sign(d(+-0, dNaN))0 + I dNaN.
     * The sign of 0 in the result is unspecified.  Choice = normally
     * the same as dNaN.  Raise the invalid floating-point exception.
     *
     * sinh(+-0 +- I NaN) = sign(d(+-0, NaN))0 + I d(NaN).
     * The sign of 0 in the result is unspecified.  Choice = normally
     * the same as d(NaN).
     */
    if (x == 0 && !yfinite) {
        return npy_cpack@c@(npy_copysign@c@(0, x * (y - y)), y - y);
    }

    /*
     * sinh(+-Inf +- I 0) = +-Inf + I +-0.
     *
     * sinh(NaN +- I 0)   = d(NaN) + I +-0.
     */
    if (y == 0 && !xfinite) {
        if (npy_isnan(x)) {
            return z;
        }
        return npy_cpack@c@(x, npy_copysign@c@(0, y));
    }

    /*
     * sinh(x +- I Inf) = dNaN + I dNaN.
     * Raise the invalid floating-point exception for finite nonzero x.
     *
     * sinh(x + I NaN) = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception for finite
     * nonzero x.  Choice = don't raise (except for signaling NaNs).
     */
    if (xfinite && !yfinite) {
        return npy_cpack@c@(y - y, x * (y - y));
    }

    /*
     * sinh(+-Inf + I NaN)  = +-Inf + I d(NaN).
     * The sign of Inf in the result is unspecified.  Choice = normally
     * the same as d(NaN).
     *
     * sinh(+-Inf +- I Inf) = +Inf + I dNaN.
     * The sign of Inf in the result is unspecified.  Choice = always +.
     * Raise the invalid floating-point exception.
     *
     * sinh(+-Inf + I y)   = +-Inf cos(y) + I Inf sin(y)
     */
    if (!xfinite && !npy_isnan(x)) {
        if (!yfinite) {
            return npy_cpack@c@(x * x, x * (y - y));
        }
        return npy_cpack@c@(x * npy_cos@c@(y),
                            NPY_INFINITY@C@ * npy_sin@c@(y));
    }

    /*
     * sinh(NaN + I NaN)  = d(NaN) + I d(NaN).
     *
     * sinh(NaN +- I Inf) = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception.
     * Choice = raise.
     *
     * sinh(NaN + I y)    = d(NaN) + I d(NaN).
     * Optionally raises the invalid floating-point exception for finite
     * nonzero y.  Choice = don't raise (except for signaling NaNs).
     */
    return npy_cpack@c@((x * x) * (y - y), (x + x) * (y - y));
}
#undef CSINH_BIG
#undef CSINH_HUGE
#endif

#ifndef HAVE_CTANH@C@
/*
 * Taken from the msun library in FreeBSD, rev 226600.
 *
 * Hyperbolic tangent of a complex argument z = x + i y.
 *
 * The algorithm is from:
 *
 *   W. Kahan.  Branch Cuts for Complex Elementary Functions or Much
 *   Ado About Nothing's Sign Bit.  In The State of the Art in
 *   Numerical Analysis, pp. 165 ff.  Iserles and Powell, eds., 1987.
 *
 * Method:
 *
 *   Let t    = tan(x)
 *       beta = 1/cos^2(y)
 *       s    = sinh(x)
 *       rho  = cosh(x)
 *
 *   We have:
 *
 *   tanh(z) = sinh(z) / cosh(z)
 *
 *             sinh(x) cos(y) + i cosh(x) sin(y)
 *           = ---------------------------------
 *             cosh(x) cos(y) + i sinh(x) sin(y)
 *
 *             cosh(x) sinh(x) / cos^2(y) + i tan(y)
 *           = -------------------------------------
 *                    1 + sinh^2(x) / cos^2(y)
 *
 *             beta rho s + i t
 *           = ----------------
 *               1 + beta s^2
 *
 * Modifications:
 *
 *   I omitted the original algorithm's handling of overflow in tan(x) after
 *   verifying with nearpi.c that this can't happen in IEEE single or double
 *   precision.  I also handle large x differently.
 */

#define TANH_HUGE 22.0
#define TANHF_HUGE 11.0F
#define TANHL_HUGE 42.0L

@ctype@
npy_ctanh@c@(@ctype@ z)
{
    @type@ x, y;
    @type@ t, beta, s, rho, denom;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);

    /*
     * ctanh(NaN + i 0) = NaN + i 0
     *
     * ctanh(NaN + i y) = NaN + i NaN        for y != 0
     *
     * The imaginary part has the sign of x*sin(2*y), but there's no
     * special effort to get this right.
     *
     * ctanh(+-Inf +- i Inf) = +-1 +- 0
     *
     * ctanh(+-Inf + i y) = +-1 + 0 sin(2y)        for y finite
     *
     * The imaginary part of the sign is unspecified.  This special
     * case is only needed to avoid a spurious invalid exception when
     * y is infinite.
     */
        if (!npy_isfinite(x)) {
            if (npy_isnan(x)) {
                return npy_cpack@c@(x, (y == 0 ? y : x * y));
            }
            return npy_cpack@c@(npy_copysign@c@(1,x),
                                npy_copysign@c@(0,
                                npy_isinf(y) ?
                                    y : npy_sin@c@(y) * npy_cos@c@(y)));
        }

    /*
     * ctanh(x + i NAN) = NaN + i NaN
     * ctanh(x +- i Inf) = NaN + i NaN
     */
    if (!npy_isfinite(y)) {
        return (npy_cpack@c@(y - y, y - y));
    }

    /*
     * ctanh(+-huge + i +-y) ~= +-1 +- i 2sin(2y)/exp(2x), using the
     * approximation sinh^2(huge) ~= exp(2*huge) / 4.
     * We use a modified formula to avoid spurious overflow.
     */
    if (npy_fabs@c@(x) >= TANH@C@_HUGE) {
        @type@ exp_mx = npy_exp@c@(-npy_fabs@c@(x));
        return npy_cpack@c@(npy_copysign@c@(1, x),
                            4 * npy_sin@c@(y) * npy_cos@c@(y) *
                                exp_mx * exp_mx);
    }

    /* Kahan's algorithm */
    t = npy_tan@c@(y);
    beta = 1 + t * t;    /* = 1 / cos^2(y) */
    s = npy_sinh@c@(x);
    rho = npy_sqrt@c@(1 + s * s);    /* = cosh(x) */
    denom = 1 + beta * s * s;
    return (npy_cpack@c@((beta * rho * s) / denom, t / denom));
}
#undef TANH_HUGE
#undef TANHF_HUGE
#undef TANHL_HUGE
#endif

#if !defined (HAVE_CACOS@C@) || !defined (HAVE_CASINH@C@)
/*
 * Complex inverse trig functions taken from the msum library in FreeBSD
 * revision 251404
 *
 * The algorithm is very close to that in "Implementing the complex arcsine
 * and arccosine functions using exception handling" by T. E. Hull, Thomas F.
 * Fairgrieve, and Ping Tak Peter Tang, published in ACM Transactions on
 * Mathematical Software, Volume 23 Issue 3, 1997, Pages 299-335,
 * https://dl.acm.org/doi/10.1145/275323.275324.
 *
 * Throughout we use the convention z = x + I*y.
 *
 * casinh(z) = sign(x)*log(A+sqrt(A*A-1)) + I*asin(B)
 * where
 * A = (|z+I| + |z-I|) / 2
 * B = (|z+I| - |z-I|) / 2 = y/A
 *
 * These formulas become numerically unstable:
 *   (a) for Re(casinh(z)) when z is close to the line segment [-I, I] (that
 *       is, Re(casinh(z)) is close to 0);
 *   (b) for Im(casinh(z)) when z is close to either of the intervals
 *       [I, I*infinity) or (-I*infinity, -I] (that is, |Im(casinh(z))| is
 *       close to PI/2).
 *
 * These numerical problems are overcome by defining
 * f(a, b) = (hypot(a, b) - b) / 2 = a*a / (hypot(a, b) + b) / 2
 * Then if A < A_crossover, we use
 *   log(A + sqrt(A*A-1)) = log1p((A-1) + sqrt((A-1)*(A+1)))
 *   A-1 = f(x, 1+y) + f(x, 1-y)
 * and if B > B_crossover, we use
 *   asin(B) = atan2(y, sqrt(A*A - y*y)) = atan2(y, sqrt((A+y)*(A-y)))
 *   A-y = f(x, y+1) + f(x, y-1)
 * where without loss of generality we have assumed that x and y are
 * non-negative.
 *
 * Much of the difficulty comes because the intermediate computations may
 * produce overflows or underflows.  This is dealt with in the paper by Hull
 * et al by using exception handling.  We do this by detecting when
 * computations risk underflow or overflow.  The hardest part is handling the
 * underflows when computing f(a, b).
 *
 * Note that the function f(a, b) does not appear explicitly in the paper by
 * Hull et al, but the idea may be found on pages 308 and 309.  Introducing the
 * function f(a, b) allows us to concentrate many of the clever tricks in this
 * paper into one function.
 */

/*
 * Function f(a, b, hypot_a_b) = (hypot(a, b) - b) / 2.
 * Pass hypot(a, b) as the third argument.
 */
static inline @type@
_f@c@(@type@ a, @type@ b, @type@ hypot_a_b)
{
    if (b < 0) {
        return ((hypot_a_b - b) / 2);
    }
    if (b == 0) {
        return (a / 2);
    }
    return (a * a / (hypot_a_b + b) / 2);
}

/*
 * All the hard work is contained in this function.
 * x and y are assumed positive or zero, and less than RECIP_EPSILON.
 * Upon return:
 * rx = Re(casinh(z)) = -Im(cacos(y + I*x)).
 * B_is_usable is set to 1 if the value of B is usable.
 * If B_is_usable is set to 0, sqrt_A2my2 = sqrt(A*A - y*y), and new_y = y.
 * If returning sqrt_A2my2 has potential to result in an underflow, it is
 * rescaled, and new_y is similarly rescaled.
 */
static inline void
_do_hard_work@c@(@type@ x, @type@ y, @type@ *rx,
    npy_int *B_is_usable, @type@ *B, @type@ *sqrt_A2my2, @type@ *new_y)
{
#if @precision@ == 1
    const npy_float A_crossover = 10.0f;
    const npy_float B_crossover = 0.6417f;
    const npy_float FOUR_SQRT_MIN = 4.3368086899420177e-19f;
#endif
#if @precision@ == 2
    const npy_double A_crossover = 10.0;
    const npy_double B_crossover = 0.6417;
    const npy_double FOUR_SQRT_MIN = 5.9666725849601654e-154;
#endif
#if @precision@ == 3
    const npy_longdouble A_crossover = 10.0l;
    const npy_longdouble B_crossover = 0.6417l;
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
    const npy_longdouble FOUR_SQRT_MIN = 5.9666725849601654e-154;
#else
    const npy_longdouble FOUR_SQRT_MIN = 7.3344154702193886625e-2466l;
#endif
#endif
    @type@ R, S, A; /* A, B, R, and S are as in Hull et al. */
    @type@ Am1, Amy; /* A-1, A-y. */

    R = npy_hypot@c@(x, y + 1);        /* |z+I| */
    S = npy_hypot@c@(x, y - 1);        /* |z-I| */

    /* A = (|z+I| + |z-I|) / 2 */
    A = (R + S) / 2;
    /*
     * Mathematically A >= 1.  There is a small chance that this will not
     * be so because of rounding errors.  So we will make certain it is
     * so.
     */
    if (A < 1) {
        A = 1;
    }

    if (A < A_crossover) {
        /*
         * Am1 = fp + fm, where fp = f(x, 1+y), and fm = f(x, 1-y).
         * rx = log1p(Am1 + sqrt(Am1*(A+1)))
         */
        if (y == 1 && x < @TEPS@ * @TEPS@ / 128) {
            /*
             * fp is of order x^2, and fm = x/2.
             * A = 1 (inexactly).
             */
            *rx = npy_sqrt@c@(x);
        }
        else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) {
            /*
             * Underflow will not occur because
             * x >= DBL_EPSILON^2/128 >= FOUR_SQRT_MIN
             */
            Am1 = _f@c@(x, 1 + y, R) + _f@c@(x, 1 - y, S);
            *rx = npy_log1p@c@(Am1 + npy_sqrt@c@(Am1 * (A + 1)));
        }
        else if (y < 1) {
            /*
             * fp = x*x/(1+y)/4, fm = x*x/(1-y)/4, and
             * A = 1 (inexactly).
             */
            *rx = x / npy_sqrt@c@((1 - y) * (1 + y));
        }
        else {        /* if (y > 1) */
            /*
             * A-1 = y-1 (inexactly).
             */
            *rx = npy_log1p@c@((y - 1) + npy_sqrt@c@((y - 1) * (y + 1)));
        }
    }
    else {
        *rx = npy_log@c@(A + npy_sqrt@c@(A * A - 1));
    }

    *new_y = y;

    if (y < FOUR_SQRT_MIN) {
        /*
         * Avoid a possible underflow caused by y/A.  For casinh this
         * would be legitimate, but will be picked up by invoking atan2
         * later on.  For cacos this would not be legitimate.
         */
        *B_is_usable = 0;
        *sqrt_A2my2 = A * (2 / @TEPS@);
        *new_y = y * (2 / @TEPS@);
        return;
    }

    /* B = (|z+I| - |z-I|) / 2 = y/A */
    *B = y / A;
    *B_is_usable = 1;

    if (*B > B_crossover) {
        *B_is_usable = 0;
        /*
         * Amy = fp + fm, where fp = f(x, y+1), and fm = f(x, y-1).
         * sqrt_A2my2 = sqrt(Amy*(A+y))
         */
        if (y == 1 && x < @TEPS@ / 128) {
            /*
             * fp is of order x^2, and fm = x/2.
             * A = 1 (inexactly).
             */
            *sqrt_A2my2 = npy_sqrt@c@(x) * npy_sqrt@c@((A + y) / 2);
        }
        else if (x >= @TEPS@ * npy_fabs@c@(y - 1)) {
            /*
             * Underflow will not occur because
             * x >= DBL_EPSILON/128 >= FOUR_SQRT_MIN
             * and
             * x >= DBL_EPSILON^2 >= FOUR_SQRT_MIN
             */
            Amy = _f@c@(x, y + 1, R) + _f@c@(x, y - 1, S);
            *sqrt_A2my2 = npy_sqrt@c@(Amy * (A + y));
        }
        else if (y > 1) {
            /*
             * fp = x*x/(y+1)/4, fm = x*x/(y-1)/4, and
             * A = y (inexactly).
             *
             * y < RECIP_EPSILON.  So the following
             * scaling should avoid any underflow problems.
             */
            *sqrt_A2my2 = x * (4 / @TEPS@ / @TEPS@) * y /
                npy_sqrt@c@((y + 1) * (y - 1));
            *new_y = y * (4 / @TEPS@ / @TEPS@);
        }
        else {        /* if (y < 1) */
            /*
             * fm = 1-y >= DBL_EPSILON, fp is of order x^2, and
             * A = 1 (inexactly).
             */
            *sqrt_A2my2 = npy_sqrt@c@((1 - y) * (1 + y));
        }
    }
}

/*
 * Optimized version of clog() for |z| finite and larger than ~RECIP_EPSILON.
 */
static inline void
_clog_for_large_values@c@(@type@ x, @type@ y,
    @type@ *rr, @type@ *ri)
{
#if @precision@ == 1
    const npy_float QUARTER_SQRT_MAX = 4.611685743549481e+18f;
    const npy_float SQRT_MIN = 1.0842021724855044e-19f;
 #endif
#if @precision@ == 2
    const npy_double QUARTER_SQRT_MAX = 3.3519519824856489e+153;
    const npy_double SQRT_MIN = 1.4916681462400413e-154;
 #endif
#if @precision@ == 3
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
    const npy_longdouble QUARTER_SQRT_MAX = 3.3519519824856489e+153;
    const npy_longdouble SQRT_MIN = 1.4916681462400413e-154;
#else
    const npy_longdouble QUARTER_SQRT_MAX = 2.7268703390485398235e+2465l;
    const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l;
#endif
#endif
    @type@ ax, ay, t;

    ax = npy_fabs@c@(x);
    ay = npy_fabs@c@(y);
    if (ax < ay) {
        t = ax;
        ax = ay;
        ay = t;
    }

    /*
     * Avoid overflow in hypot() when x and y are both very large.
     * Divide x and y by E, and then add 1 to the logarithm.  This depends
     * on E being larger than sqrt(2).
     * Dividing by E causes an insignificant loss of accuracy; however
     * this method is still poor since it is unnecessarily slow.
     */
    if (ax > @TMAX@ / 2) {
        *rr = npy_log@c@(npy_hypot@c@(x / NPY_E@c@, y / NPY_E@c@)) + 1;
    }
    /*
     * Avoid overflow when x or y is large.  Avoid underflow when x or
     * y is small.
     */
    else if (ax > QUARTER_SQRT_MAX || ay < SQRT_MIN) {
        *rr = npy_log@c@(npy_hypot@c@(x, y));
    }
    else {
        *rr = npy_log@c@(ax * ax + ay * ay) / 2;
    }
    *ri = npy_atan2@c@(y, x);
}
#endif

#ifndef HAVE_CACOS@C@
@ctype@
npy_cacos@c@(@ctype@ z)
{
#if @precision@ == 1
    /* this is sqrt(6*EPS) */
    const npy_float SQRT_6_EPSILON = 8.4572793338e-4f;
    /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */
    const volatile npy_float pio2_lo = 7.5497899549e-9f;
#endif
#if @precision@ == 2
    const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08;
    const volatile npy_double pio2_lo = 6.1232339957367659e-17;
#endif
#if @precision@ == 3
    const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l;
    const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l;
#endif
    const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@;
    const @type@ pio2_hi = NPY_PI_2@c@;
    @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2mx2, new_x;
    npy_int sx, sy;
    npy_int B_is_usable;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);
    sx = npy_signbit(x);
    sy = npy_signbit(y);
    ax = npy_fabs@c@(x);
    ay = npy_fabs@c@(y);

    if (npy_isnan(x) || npy_isnan(y)) {
        /* cacos(+-Inf + I*NaN) = NaN + I*opt(-)Inf */
        if (npy_isinf(x)) {
            return npy_cpack@c@(y + y, -NPY_INFINITY@C@);
        }
        /* cacos(NaN + I*+-Inf) = NaN + I*-+Inf */
        if (npy_isinf(y)) {
            return npy_cpack@c@(x + x, -y);
        }
        /* cacos(0 + I*NaN) = PI/2 + I*NaN with inexact */
        if (x == 0) {
            return npy_cpack@c@(pio2_hi + pio2_lo, y + y);
        }
        /*
         * All other cases involving NaN return NaN + I*NaN.
         * C99 leaves it optional whether to raise invalid if one of
         * the arguments is not NaN, so we opt not to raise it.
         */
        return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@);
    }

    if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
        /* clog...() will raise inexact unless x or y is infinite. */
        _clog_for_large_values@c@(x, y, &wx, &wy);
        rx = npy_fabs@c@(wy);
        ry = wx + NPY_LOGE2@c@;
        if (sy == 0) {
            ry = -ry;
        }
        return npy_cpack@c@(rx, ry);
    }

    /* Avoid spuriously raising inexact for z = 1. */
    if (x == 1 && y == 0) {
        return npy_cpack@c@(0, -y);
    }

    /* All remaining cases are inexact. */
    raise_inexact();

    if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) {
        return npy_cpack@c@(pio2_hi - (x - pio2_lo), -y);
    }

    _do_hard_work@c@(ay, ax, &ry, &B_is_usable, &B, &sqrt_A2mx2, &new_x);
    if (B_is_usable) {
        if (sx == 0) {
            rx = npy_acos@c@(B);
        }
        else {
            rx = npy_acos@c@(-B);
        }
    }
    else {
        if (sx == 0) {
            rx = npy_atan2@c@(sqrt_A2mx2, new_x);
        }
        else {
            rx = npy_atan2@c@(sqrt_A2mx2, -new_x);
        }
    }
    if (sy == 0) {
        ry = -ry;
    }
    return npy_cpack@c@(rx, ry);
}
#endif

#ifndef HAVE_CASIN@C@
@ctype@
npy_casin@c@(@ctype@ z)
{
    /* casin(z) = I * conj( casinh(I * conj(z)) ) */
    z = npy_casinh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)));
    return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z));
}
#endif

#ifndef HAVE_CATAN@C@
@ctype@
npy_catan@c@(@ctype@ z)
{
    /* catan(z) = I * conj( catanh(I * conj(z)) ) */
    z = npy_catanh@c@(npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z)));
    return npy_cpack@c@(npy_cimag@c@(z), npy_creal@c@(z));
}
#endif

#ifndef HAVE_CACOSH@C@
@ctype@
npy_cacosh@c@(@ctype@ z)
{
    /*
     * cacosh(z) = I*cacos(z) or -I*cacos(z)
     * where the sign is chosen so Re(cacosh(z)) >= 0.
     */
    @ctype@  w;
    @type@ rx, ry;

    w = npy_cacos@c@(z);
    rx = npy_creal@c@(w);
    ry = npy_cimag@c@(w);
    /* cacosh(NaN + I*NaN) = NaN + I*NaN */
    if (npy_isnan(rx) && npy_isnan(ry)) {
        return npy_cpack@c@(ry, rx);
    }
    /* cacosh(NaN + I*+-Inf) = +Inf + I*NaN */
    /* cacosh(+-Inf + I*NaN) = +Inf + I*NaN */
    if (npy_isnan(rx)) {
        return npy_cpack@c@(npy_fabs@c@(ry), rx);
    }
    /* cacosh(0 + I*NaN) = NaN + I*NaN */
    if (npy_isnan(ry)) {
        return npy_cpack@c@(ry, ry);
    }
    return npy_cpack@c@(npy_fabs@c@(ry), npy_copysign@c@(rx, npy_cimag@c@(z)));
}
#endif

#ifndef HAVE_CASINH@C@
/*
 * casinh(z) = z + O(z^3)   as z -> 0
 *
 * casinh(z) = sign(x)*clog(sign(x)*z) + O(1/z^2)   as z -> infinity
 * The above formula works for the imaginary part as well, because
 * Im(casinh(z)) = sign(x)*atan2(sign(x)*y, fabs(x)) + O(y/z^3)
 *    as z -> infinity, uniformly in y
 */
@ctype@
npy_casinh@c@(@ctype@ z)
{
#if @precision@ == 1
    /* this is sqrt(6*EPS) */
    const npy_float SQRT_6_EPSILON = 8.4572793338e-4f;
#endif
#if @precision@ == 2
    const npy_double SQRT_6_EPSILON = 3.65002414998885671e-08;
#endif
#if @precision@ == 3
    const npy_longdouble SQRT_6_EPSILON = 8.0654900873493277169e-10l;
#endif
    const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@;
    @type@ x, y, ax, ay, wx, wy, rx, ry, B, sqrt_A2my2, new_y;
    npy_int B_is_usable;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);
    ax = npy_fabs@c@(x);
    ay = npy_fabs@c@(y);

    if (npy_isnan(x) || npy_isnan(y)) {
        /* casinh(+-Inf + I*NaN) = +-Inf + I*NaN */
        if (npy_isinf(x)) {
            return npy_cpack@c@(x, y + y);
        }
        /* casinh(NaN + I*+-Inf) = opt(+-)Inf + I*NaN */
        if (npy_isinf(y)) {
            return npy_cpack@c@(y, x + x);
        }
        /* casinh(NaN + I*0) = NaN + I*0 */
        if (y == 0) {
            return npy_cpack@c@(x + x, y);
        }
        /*
         * All other cases involving NaN return NaN + I*NaN.
         * C99 leaves it optional whether to raise invalid if one of
         * the arguments is not NaN, so we opt not to raise it.
         */
        return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@);
    }

    if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
        /* clog...() will raise inexact unless x or y is infinite. */
        if (npy_signbit(x) == 0) {
            _clog_for_large_values@c@(x, y, &wx, &wy);
            wx += NPY_LOGE2@c@;
        }
        else {
            _clog_for_large_values@c@(-x, -y, &wx, &wy);
            wx += NPY_LOGE2@c@;
        }
        return npy_cpack@c@(npy_copysign@c@(wx, x), npy_copysign@c@(wy, y));
    }

    /* Avoid spuriously raising inexact for z = 0. */
    if (x == 0 && y == 0) {
        return (z);
    }

    /* All remaining cases are inexact. */
    raise_inexact();

    if (ax < SQRT_6_EPSILON / 4 && ay < SQRT_6_EPSILON / 4) {
        return (z);
    }

    _do_hard_work@c@(ax, ay, &rx, &B_is_usable, &B, &sqrt_A2my2, &new_y);
    if (B_is_usable) {
        ry = npy_asin@c@(B);
    }
    else {
        ry = npy_atan2@c@(new_y, sqrt_A2my2);
    }
    return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y));
}
#endif

#ifndef HAVE_CATANH@C@
/*
 * sum_squares(x,y) = x*x + y*y (or just x*x if y*y would underflow).
 * Assumes x*x and y*y will not overflow.
 * Assumes x and y are finite.
 * Assumes y is non-negative.
 * Assumes fabs(x) >= DBL_EPSILON.
 */
static inline @type@
_sum_squares@c@(@type@ x, @type@ y)
{
#if @precision@ == 1
const npy_float SQRT_MIN = 1.0842022e-19f;
#endif
#if @precision@ == 2
const npy_double SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */
#endif
#if @precision@ == 3
#if NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE
const npy_longdouble SQRT_MIN = 1.4916681462400413e-154; /* sqrt(DBL_MIN) */
#else
/* this is correct for 80 bit long doubles */
const npy_longdouble SQRT_MIN = 1.8336038675548471656e-2466l;
#endif
#endif
    /* Avoid underflow when y is small. */
    if (y < SQRT_MIN) {
        return (x * x);
    }

    return (x * x + y * y);
}

/*
 * real_part_reciprocal(x, y) = Re(1/(x+I*y)) = x/(x*x + y*y).
 * Assumes x and y are not NaN, and one of x and y is larger than
 * RECIP_EPSILON.  We avoid unwarranted underflow.  It is important to not use
 * the code creal(1/z), because the imaginary part may produce an unwanted
 * underflow.
 * This is only called in a context where inexact is always raised before
 * the call, so no effort is made to avoid or force inexact.
 */
#if @precision@ == 1
#define BIAS (FLT_MAX_EXP - 1)
#define CUTOFF (FLT_MANT_DIG / 2 + 1)
static inline npy_float
_real_part_reciprocalf(npy_float x, npy_float y)
{
    npy_float scale;
    npy_uint32 hx, hy;
    npy_int32 ix, iy;

    GET_FLOAT_WORD(hx, x);
    ix = hx & 0x7f800000;
    GET_FLOAT_WORD(hy, y);
    iy = hy & 0x7f800000;
    if (ix - iy >= CUTOFF << 23 || npy_isinf(x)) {
        return (1 / x);
    }
    if (iy - ix >= CUTOFF << 23) {
        return (x / y / y);
    }
    if (ix <= (BIAS + FLT_MAX_EXP / 2 - CUTOFF) << 23) {
        return (x / (x * x + y * y));
    }
    SET_FLOAT_WORD(scale, 0x7f800000 - ix);
    x *= scale;
    y *= scale;
    return (x / (x * x + y * y) * scale);
}
#undef BIAS
#undef CUTOFF
#endif

#if @precision@ == 2
#define BIAS (DBL_MAX_EXP - 1)
/*  more guard digits are useful iff there is extra precision. */
#define CUTOFF (DBL_MANT_DIG / 2 + 1)  /* just half or 1 guard digit */
static inline npy_double
_real_part_reciprocal(npy_double x, npy_double y)
{
    npy_double scale;
    npy_uint32 hx, hy;
    npy_int32 ix, iy;

    /*
     * This code is inspired by the C99 document n1124.pdf, Section G.5.1,
     * example 2.
     */
    GET_HIGH_WORD(hx, x);
    ix = hx & 0x7ff00000;
    GET_HIGH_WORD(hy, y);
    iy = hy & 0x7ff00000;
    if (ix - iy >= CUTOFF << 20 || npy_isinf(x)) {
        /* +-Inf -> +-0 is special */
        return (1 / x);
    }
    if (iy - ix >= CUTOFF << 20) {
        /* should avoid double div, but hard */
        return (x / y / y);
    }
    if (ix <= (BIAS + DBL_MAX_EXP / 2 - CUTOFF) << 20) {
        return (x / (x * x + y * y));
    }
    scale = 1;
    SET_HIGH_WORD(scale, 0x7ff00000 - ix);  /* 2**(1-ilogb(x)) */
    x *= scale;
    y *= scale;
    return (x / (x * x + y * y) * scale);
}
#undef BIAS
#undef CUTOFF
#endif

#if @precision@ == 3
#if !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_BE) && \
    !defined(HAVE_LDOUBLE_DOUBLE_DOUBLE_LE)

#define BIAS (LDBL_MAX_EXP - 1)
#define CUTOFF (LDBL_MANT_DIG / 2 + 1)
static inline npy_longdouble
_real_part_reciprocall(npy_longdouble x,
    npy_longdouble y)
{
    npy_longdouble scale;
    union IEEEl2bitsrep ux, uy, us;
    npy_int32 ix, iy;

    ux.e = x;
    ix = GET_LDOUBLE_EXP(ux);
    uy.e = y;
    iy = GET_LDOUBLE_EXP(uy);
    if (ix - iy >= CUTOFF || npy_isinf(x)) {
        return (1/x);
    }
    if (iy - ix >= CUTOFF) {
        return (x/y/y);
    }
    if (ix <= BIAS + LDBL_MAX_EXP / 2 - CUTOFF) {
        return (x/(x*x + y*y));
    }
    us.e = 1;
    SET_LDOUBLE_EXP(us, 0x7fff - ix);
    scale = us.e;
    x *= scale;
    y *= scale;
    return (x/(x*x + y*y) * scale);
}
#undef BIAS
#undef CUTOFF

#else

static inline npy_longdouble
_real_part_reciprocall(npy_longdouble x,
    npy_longdouble y)
{
    return x/(x*x + y*y);
}

#endif
#endif

@ctype@
npy_catanh@c@(@ctype@ z)
{
#if @precision@ == 1
    /* this is sqrt(3*EPS) */
    const npy_float SQRT_3_EPSILON = 5.9801995673e-4f;
    /* chosen such that pio2_hi + pio2_lo == pio2_hi but causes FE_INEXACT. */
    const volatile npy_float pio2_lo = 7.5497899549e-9f;
#endif
#if @precision@ == 2
    const npy_double SQRT_3_EPSILON = 2.5809568279517849e-8;
    const volatile npy_double pio2_lo = 6.1232339957367659e-17;
#endif
#if @precision@ == 3
    const npy_longdouble SQRT_3_EPSILON = 5.70316273435758915310e-10l;
    const volatile npy_longdouble pio2_lo = 2.710505431213761085e-20l;
#endif
    const @type@ RECIP_EPSILON = 1.0@c@ / @TEPS@;
    const @type@ pio2_hi = NPY_PI_2@c@;
    @type@ x, y, ax, ay, rx, ry;

    x = npy_creal@c@(z);
    y = npy_cimag@c@(z);
    ax = npy_fabs@c@(x);
    ay = npy_fabs@c@(y);

    /* This helps handle many cases. */
    if (y == 0 && ax <= 1) {
        return npy_cpack@c@(npy_atanh@c@(x), y);
    }

    /* To ensure the same accuracy as atan(), and to filter out z = 0. */
    if (x == 0) {
        return npy_cpack@c@(x, npy_atan@c@(y));
    }

    if (npy_isnan(x) || npy_isnan(y)) {
        /* catanh(+-Inf + I*NaN) = +-0 + I*NaN */
        if (npy_isinf(x)) {
            return npy_cpack@c@(npy_copysign@c@(0, x), y + y);
        }
        /* catanh(NaN + I*+-Inf) = sign(NaN)0 + I*+-PI/2 */
        if (npy_isinf(y)) {
            return npy_cpack@c@(npy_copysign@c@(0, x),
                npy_copysign@c@(pio2_hi + pio2_lo, y));
        }
        /*
         * All other cases involving NaN return NaN + I*NaN.
         * C99 leaves it optional whether to raise invalid if one of
         * the arguments is not NaN, so we opt not to raise it.
         */
        return npy_cpack@c@(NPY_NAN@C@, NPY_NAN@C@);
    }

    if (ax > RECIP_EPSILON || ay > RECIP_EPSILON) {
        return npy_cpack@c@(_real_part_reciprocal@c@(x, y),
            npy_copysign@c@(pio2_hi + pio2_lo, y));
    }

    if (ax < SQRT_3_EPSILON / 2 && ay < SQRT_3_EPSILON / 2) {
        /*
         * z = 0 was filtered out above.  All other cases must raise
         * inexact, but this is the only one that needs to do it
         * explicitly.
         */
        raise_inexact();
        return (z);
    }

    if (ax == 1 && ay < @TEPS@) {
        rx = (NPY_LOGE2@c@ - npy_log@c@(ay)) / 2;
    }
    else {
        rx = npy_log1p@c@(4 * ax / _sum_squares@c@(ax - 1, ay)) / 4;
    }

    if (ax == 1) {
        ry = npy_atan2@c@(2, -ay) / 2;
    }
    else if (ay < @TEPS@) {
        ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax)) / 2;
    }
    else {
        ry = npy_atan2@c@(2 * ay, (1 - ax) * (1 + ax) - ay * ay) / 2;
    }

    return npy_cpack@c@(npy_copysign@c@(rx, x), npy_copysign@c@(ry, y));
}
#endif
/**end repeat**/

/*==========================================================
 * Decorate all the functions which are available natively
 *=========================================================*/

/**begin repeat
 * #type = npy_float, npy_double, npy_longdouble#
 * #ctype = npy_cfloat, npy_cdouble, npy_clongdouble#
 * #c = f, , l#
 * #C = F, , L#
 */

/**begin repeat1
 * #kind = cabs,carg#
 * #KIND = CABS,CARG#
 */
#ifdef HAVE_@KIND@@C@
@type@
npy_@kind@@c@(@ctype@ z)
{
    return @kind@@c@(z);
}
#endif
/**end repeat1**/

/**begin repeat1
 * #kind = cexp,clog,csqrt,ccos,csin,ctan,ccosh,csinh,ctanh,
 *         cacos,casin,catan,cacosh,casinh,catanh#
 * #KIND = CEXP,CLOG,CSQRT,CCOS,CSIN,CTAN,CCOSH,CSINH,CTANH,
 *         CACOS,CASIN,CATAN,CACOSH,CASINH,CATANH#
 */
#ifdef HAVE_@KIND@@C@
@ctype@
npy_@kind@@c@(@ctype@ z)
{
    return @kind@@c@(z);
}
#endif
/**end repeat1**/


/**end repeat**/