Source code for tkp.sourcefinder.stats

"""
Generic utility routines for number handling and calculating (specific)
variances used by the TKP sourcefinder.
"""

import numpy
from numpy.ma import MaskedArray
from scipy.special import erf
from scipy.special import erfcinv
from .utils import calculate_correlation_lengths


# CODE & NUMBER HANDLING ROUTINES
#
[docs]def var_helper(N):
    """Correct for the fact the rms noise is computed from a clipped
    distribution.

    That noise will always be lower than the noise from the complete
    distribution.  The correction factor is a function of the computed
    rms noise only.
    """
    term1 = numpy.sqrt(2. * numpy.pi) * erf(N / numpy.sqrt(2.))
    term2 = 2. * N * numpy.exp(-N**2 / 2.)
    return term1 / (term1 - term2)


def indep_pixels(N, beam):
    corlengthlong, corlengthshort = calculate_correlation_lengths(
        beam[0], beam[1])
    correlated_area = 0.25 * numpy.pi * corlengthlong * corlengthshort
    return N / correlated_area


[docs]def unbiased_sigma(N_indep):
    """Calculate an unbiased sigma for using in sigma clipping.

    The formula below for cliplim is pretty subtle. Kappa, sigma
    clipping should be such that the noise is not biased by
    it. Consequently, the clipping boundaries should be such that
    exactly half an independent pixel should exceed it if the map were
    source free. A rigid boundary of 3 sigma is appropriate only if the
    number of independent pixels is about 185 (the number of
    independent pixels equals the number of pixels divided by the
    beamsize in pixels).

    The condition that kappa, sigma clipping may not bias the noise is
    translated in the formula below, using Gaussian statistics. A
    disadvantage of this is that more iterations of kappa, sigma
    clipping are needed, compared to 3 sigma clipping. However, the
    noise values derived are generally significantly different (lower)
    compared to 3 sigma clipping.
    """

    return 1.4142135623730951 * erfcinv(0.5 / N_indep)


[docs]def sigma_clip(data, beam, sigma=unbiased_sigma, max_iter=100,
               centref=numpy.median, distf=numpy.var, my_iterations=0,
               corr_clip=1.):
    """Iterative clipping

    By default, this performs clipping of the standard deviation about the
    median of the data. But by tweaking centref/distf, it could be much
    more general.

    max_iter sets the maximum number of iterations used.

    my_iterations is a counter for recursive operation of the code; leave it
    alone unless you really want to pretend to jump into the middle of a loop.

    sigma is subtle: if a callable is given, it is passed a copy of the data
    array and can calculate a clipping limit. See, for e.g., unbiased_sigma()
    defined above. However, if it isn't callable, sigma is assumed to just set
    a hard limit.

    To do: Improve documentation
            -Returns???
            -How does it make use of the beam? (It estimates the noise correlation)
    """
    if my_iterations >= max_iter:
        # Exceeded maximum number of iterations; return
        return data, my_iterations

    # Numpy 1.1 breaks std() for MaskedArray: see
    # <http://www.scipy.org/scipy/numpy/wiki/MaskedArray>.
    # MaskedArray.compressed() returns a 1-D array of non-masked data.
    if isinstance(data, MaskedArray):
        data = data.compressed()
    centre = centref(data)
    N = numpy.size(data)
    N_indep = indep_pixels(N, beam)
    if N_indep < 1:
        # This chunk is too small for processing; return an empty array.
        return numpy.array([]), 0, 0, 0

    # If sigma is callable, use it to dynamically calculate the clipping
    # limits.
    if callable(sigma):
        my_sigma = sigma(N_indep)
    else:
        my_sigma = sigma

    # distf=numpy.var is a sample variance with the factor N/(N-1)
    # already built in, N being the number of pixels. So, we are
    # going to remove that and replace it by N_indep/(N_indep-1)
    clipped_var = distf(data) * (N - 1.) * N_indep / (N * (N_indep - 1.))
    unbiased_var = corr_clip * clipped_var

    # There is an extra factor c4 needed to get a unbiased standard
    # deviation, unbiased if we disregard clipping bias, see
    # http://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation\
    #         #Results_for_the_normal_distribution
    c4 = 1. - 0.25 / N_indep - 0.21875 / N_indep**2
    unbiased_std = numpy.sqrt(unbiased_var) / c4

    limit = my_sigma * unbiased_std

    newdata = data.compress(abs(data - centre) <= limit)

    if len(newdata) != len(data) and len(newdata) > 0:
        corr_clip = var_helper(my_sigma)
        my_iterations += 1
        return sigma_clip(newdata, beam, sigma, max_iter, centref, distf,
                          my_iterations, corr_clip)
    else:
        return newdata, unbiased_std, centre, my_iterations