Source code for pyrolite.util.distributions

from functools import partial

import numpy as np
import scipy.stats

from ..comp.codata import ILR, close
from ..util.math import flattengrid
from .log import Handle

logger = Handle(__name__)


[docs]def get_scaler(*fs): """ Generate a function which will transform columns of an array based on input functions (e.g. :code:`np.log` will log-transform the x values, :code:`None, np.log` will log-transform the y values but not the x). Parameters ------------ fs A series of functions to apply to subsequent axes of an array. """ def scaler(arr, fs=fs): A = arr.copy() for ix, f in enumerate(fs): if f is not None: A[:, ix] = f(A[:, ix]) return A return partial(scaler, fs=fs)
[docs]def sample_kde(data, samples, renorm=False, transform=lambda x: x, bw_method=None): """ Sample a Kernel Density Estimate at points or a grid defined. Parameters ------------ data : :class:`numpy.ndarray` Source data to estimate the kernel density estimate; observations should be in rows (:code:`npoints, ndim`). samples : :class:`numpy.ndarray` Coordinates to sample the KDE estimate at (:code:`npoints, ndim`). transform Transformation used prior to kernel density estimate. bw_method : :class:`str`, :class:`float`, callable Method used to calculate the estimator bandwidth. See :func:`scipy.stats.gaussian_kde`. Returns ---------- :class:`numpy.ndarray` """ # check shape info first data = np.atleast_2d(data) if data.shape[0] == 1: # single row which should be a column logger.debug("Transposing data row to column format for KDE.") data = data.T tdata = transform(data) tdata = tdata[np.isfinite(tdata).all(axis=1), :] # filter rows with nans if isinstance(samples, list) and isinstance(samples[0], np.ndarray): # meshgrid logger.debug("Sampling with meshgrid.") zshape = samples[0].shape ksamples = transform(flattengrid(samples)) else: zshape = samples.shape[0] ksamples = transform(samples) if not len(tdata): return np.zeros(zshape) K = scipy.stats.gaussian_kde(tdata.T, bw_method=bw_method) # ensures shape is fine even if row is passed ksamples = ksamples.reshape(-1, tdata.shape[1]) if not tdata.shape[1] == ksamples.shape[1]: logger.warn("Dimensions of data and samples do not match.") kfltr = np.isfinite(ksamples).all(axis=1) zi = np.ones(zshape, dtype=float) * np.nan zi.flat[kfltr] = K(ksamples[kfltr, :].T) if renorm: logger.debug("Normalising KDE sample.") zi = zi / np.nanmax(zi) return zi
[docs]def sample_ternary_kde(data, samples, transform=ILR): """ Sample a Kernel Density Estimate in ternary space points or a grid defined by samples. Parameters ------------ data : :class:`numpy.ndarray` Source data to estimate the kernel density estimate (:code:`npoints, ndim`). samples : :class:`numpy.ndarray` Coordinates to sample the KDE estimate at (:code:`npoints, ndim`).. transform Log-transformation used prior to kernel density estimate. Returns ---------- :class:`numpy.ndarray` """ return sample_kde(data, samples, transform=lambda x: transform(close(x)))
[docs]def lognorm_to_norm(mu, s): """ Calculate mean and variance for a normal random variable from the lognormal parameters :code:`mu` and :code:`s`. Parameters ----------- mu : :class:`float` Parameter :code:`mu` for the lognormal distribution. s : :class:`float` :code:`sigma` for the lognormal distribution. Returns -------- mean : :class:`float` Mean of the normal distribution. sigma : :class:`float` Variance of the normal distribution. """ mean = np.exp(mu + 0.5 * s**2) variance = (np.exp(s**2) - 1) * np.exp(2 * mu + s**2) return mean, np.sqrt(variance)
[docs]def norm_to_lognorm(mean, sigma, exp=True): """ Calculate :code:`mu` and :code:`sigma` parameters for a lognormal random variable with a given mean and variance. Lognormal with parameters :code:`mean` and :code:`sigma`. Parameters ----------- mean : :class:`float` Mean of the normal distribution. sigma : :class:`float` :code:`sigma` of the normal distribution. exp : :class:`bool` If using the :mod:`scipy.stats` parameterisation; this uses :code:`scale = np.exp(mu)`. Returns -------- mu : :class:`float` Parameter :code:`mu` for the lognormal distribution. s : :class:`float` :code:`sigma` of the lognormal distribution. """ mu = np.log(mean / np.sqrt(1 + sigma**2 / (mean**2))) v = np.log(1 + sigma**2 / (mean**2)) if exp: # scipy parameterisation of lognormal uses scale = np.exp(mu) ! mu = np.exp(mu) return mu, np.sqrt(v)