Source code for tkp.sourcefinder.stats

"""
Generic utility routines for number handling and calculating (specific)
variances used by the TKP sourcefinder.
"""

import numpy
from numpy.ma import MaskedArray
from scipy.special import erf
from scipy.special import erfcinv
from .utils import calculate_correlation_lengths


# CODE & NUMBER HANDLING ROUTINES
#
[docs]def var_helper(N): """Correct for the fact the rms noise is computed from a clipped distribution. That noise will always be lower than the noise from the complete distribution. The correction factor is a function of the computed rms noise only. """ term1 = numpy.sqrt(2. * numpy.pi) * erf(N / numpy.sqrt(2.)) term2 = 2. * N * numpy.exp(-N**2 / 2.) return term1 / (term1 - term2)
def indep_pixels(N, beam): corlengthlong, corlengthshort = calculate_correlation_lengths( beam[0], beam[1]) correlated_area = 0.25 * numpy.pi * corlengthlong * corlengthshort return N / correlated_area
[docs]def unbiased_sigma(N_indep): """Calculate an unbiased sigma for using in sigma clipping. The formula below for cliplim is pretty subtle. Kappa, sigma clipping should be such that the noise is not biased by it. Consequently, the clipping boundaries should be such that exactly half an independent pixel should exceed it if the map were source free. A rigid boundary of 3 sigma is appropriate only if the number of independent pixels is about 185 (the number of independent pixels equals the number of pixels divided by the beamsize in pixels). The condition that kappa, sigma clipping may not bias the noise is translated in the formula below, using Gaussian statistics. A disadvantage of this is that more iterations of kappa, sigma clipping are needed, compared to 3 sigma clipping. However, the noise values derived are generally significantly different (lower) compared to 3 sigma clipping. """ return 1.4142135623730951 * erfcinv(0.5 / N_indep)
[docs]def sigma_clip(data, beam, sigma=unbiased_sigma, max_iter=100, centref=numpy.median, distf=numpy.var, my_iterations=0, corr_clip=1.): """Iterative clipping By default, this performs clipping of the standard deviation about the median of the data. But by tweaking centref/distf, it could be much more general. max_iter sets the maximum number of iterations used. my_iterations is a counter for recursive operation of the code; leave it alone unless you really want to pretend to jump into the middle of a loop. sigma is subtle: if a callable is given, it is passed a copy of the data array and can calculate a clipping limit. See, for e.g., unbiased_sigma() defined above. However, if it isn't callable, sigma is assumed to just set a hard limit. To do: Improve documentation -Returns??? -How does it make use of the beam? (It estimates the noise correlation) """ if my_iterations >= max_iter: # Exceeded maximum number of iterations; return return data, my_iterations # Numpy 1.1 breaks std() for MaskedArray: see # <http://www.scipy.org/scipy/numpy/wiki/MaskedArray>. # MaskedArray.compressed() returns a 1-D array of non-masked data. if isinstance(data, MaskedArray): data = data.compressed() centre = centref(data) N = numpy.size(data) N_indep = indep_pixels(N, beam) if N_indep < 1: # This chunk is too small for processing; return an empty array. return numpy.array([]), 0, 0, 0 # If sigma is callable, use it to dynamically calculate the clipping # limits. if callable(sigma): my_sigma = sigma(N_indep) else: my_sigma = sigma # distf=numpy.var is a sample variance with the factor N/(N-1) # already built in, N being the number of pixels. So, we are # going to remove that and replace it by N_indep/(N_indep-1) clipped_var = distf(data) * (N - 1.) * N_indep / (N * (N_indep - 1.)) unbiased_var = corr_clip * clipped_var # There is an extra factor c4 needed to get a unbiased standard # deviation, unbiased if we disregard clipping bias, see # http://en.wikipedia.org/wiki/Unbiased_estimation_of_standard_deviation\ # #Results_for_the_normal_distribution c4 = 1. - 0.25 / N_indep - 0.21875 / N_indep**2 unbiased_std = numpy.sqrt(unbiased_var) / c4 limit = my_sigma * unbiased_std newdata = data.compress(abs(data - centre) <= limit) if len(newdata) != len(data) and len(newdata) > 0: corr_clip = var_helper(my_sigma) my_iterations += 1 return sigma_clip(newdata, beam, sigma, max_iter, centref, distf, my_iterations, corr_clip) else: return newdata, unbiased_std, centre, my_iterations