Source code for pyrolite.util.math

from copy import copy

import numpy as np
import scipy
import sympy

from .log import Handle

logger = Handle(__name__)


[docs]def eigsorted(cov): """ Returns arrays of eigenvalues and eigenvectors sorted by magnitude. Parameters ----------- cov : :class:`numpy.ndarray` Covariance matrix to extract eigenvalues and eigenvectors from. Returns -------- vals : :class:`numpy.ndarray` Sorted eigenvalues. vecs : :class:`numpy.ndarray` Sorted eigenvectors. """ vals, vecs = np.linalg.eigh(cov) order = vals.argsort()[::-1] return vals[order], vecs[:, order]
[docs]def augmented_covariance_matrix(M, C): r""" Constructs an augmented covariance matrix from means M and covariance matrix C. Parameters ---------- M : :class:`numpy.ndarray` Array of means. C : :class:`numpy.ndarray` Covariance matrix. Returns --------- :class:`numpy.ndarray` Augmented covariance matrix A. Notes ------ Augmented covariance matrix constructed from mean of shape (D, ) and covariance matrix of shape (D, D) as follows: .. math:: \begin{array}{c|c} -1 & M.T \\ \hline M & C \end{array} """ d = np.squeeze(M).shape[0] A = np.zeros((d + 1, d + 1)) A[0, 0] = -1 A[0, 1 : d + 1] = M A[1 : d + 1, 0] = M.T A[1 : d + 1, 1 : d + 1] = C return A
[docs]def interpolate_line(x, y, n=0, logy=False): """ Add intermediate evenly spaced points interpolated between given x-y coordinates, assuming the x points are the same. Parameters ----------- x : :class:`numpy.ndarray` 1D array of x values. y : :class:`numpy.ndarray` ND array of y values. """ if logy: # perform interpolation against logy, then revert with exp y = np.log(y) current = x[:-1].copy() # the first part of the x array intervals = x[1:] - x[:-1] # right-wise intervals (could be negative for REE) _x = current.copy().astype(float) if n: # should be able to tile this instead dx = intervals / (n + 1.0) for ix in range(n): current = current + dx _x = np.hstack([_x, current]) _x = np.append(_x, x[-1]) # add one final value to x series _x = np.sort(_x, axis=-1) f = scipy.interpolate.interp1d(x, y, axis=-1) _y = f(_x) # assert all([i in _x for i in x]) if logy: _y = np.exp(_y) return _x, _y
[docs]def grid_from_ranges(X, bins=100, **kwargs): """ Create a meshgrid based on the ranges along columns of array X. Parameters ----------- X : :class:`numpy.ndarray` Array of shape :code:`(samples, dimensions)` to create a meshgrid from. bins : :class:`int` | :class:`tuple` Shape of the meshgrid. If an integer, provides a square mesh. If a tuple, values for each column are required. Returns -------- :class:`numpy.ndarray` Notes ------- Can pass keyword arg indexing = {'xy', 'ij'} """ dim = X.shape[1] if isinstance(bins, int): # expand to list of len == dimensions bins = [bins for ix in range(dim)] mmb = [(np.nanmin(X[:, ix]), np.nanmax(X[:, ix]), bins[ix]) for ix in range(dim)] grid = np.meshgrid(*[np.linspace(*i) for i in mmb], **kwargs) return grid
[docs]def flattengrid(grid): """ Convert a collection of arrays to a concatenated array of flattened components. Useful for passing meshgrid values to a function which accepts argumnets of shape :code:`(samples, dimensions)`. Parameters ----------- grid : :class:`list` Collection of arrays (e.g. a meshgrid) to flatten and concatenate. Returns -------- :class:`numpy.ndarray` """ return np.vstack([g.flatten() for g in grid]).T
[docs]def linspc_(_min, _max, step=0.0, bins=20): """ Linear spaced array, with optional step for grid margins. Parameters ----------- _min : :class:`float` Minimum value for spaced range. _max : :class:`float` Maximum value for spaced range. step : :class:`float`, 0.0 Step for expanding at grid edges. Default of 0.0 results in no expansion. bins : int Number of bins to divide the range (adds one by default). Returns ------- :class:`numpy.ndarray` Linearly-spaced array. """ if step < 0: step = -step return np.linspace(_min - step, _max + step, bins + 1)
[docs]def logspc_(_min, _max, step=1.0, bins=20): """ Log spaced array, with optional step for grid margins. Parameters ----------- _min : :class:`float` Minimum value for spaced range. _max : :class:`float` Maximum value for spaced range. step : :class:`float`, 1.0 Step for expanding at grid edges. Default of 1.0 results in no expansion. bins : int Number of bins to divide the range (adds one by default). Returns ------- :class:`numpy.ndarray` Log-spaced array. """ if step < 1.0: step = 1.0 / step return np.logspace(np.log(_min / step), np.log(_max * step), bins, base=np.e)
[docs]def logrng_(v, exp=0.0): """ Range of a sample, where values <0 are excluded. Parameters ----------- v : :class:`list`; list-like Array of values to obtain a range from. exp : :class:`float`, (0, 1) Fractional expansion of the range. Returns ------- :class:`tuple` Min, max tuple. """ u = v[(v > 0)] # make sure the range_values are >0 return linrng_(u, exp=exp)
[docs]def linrng_(v, exp=0.0): """ Range of a sample, where values <0 are included. Parameters ----------- v : :class:`list`; list-like Array of values to obtain a range from. exp : :class:`float`, (0, 1) Fractional expansion of the range. Returns ------- :class:`tuple` Min, max tuple. """ u = v[np.isfinite(v)] return (np.nanmin(u) * (1.0 - exp), np.nanmax(u) * (1.0 + exp))
[docs]def isclose(a, b): """ Implementation of np.isclose with equal nan. Parameters ------------ a,b : :class:`float` | :class:`numpy.ndarray` Numbers or arrays to compare. Returns ------- :class:`bool` """ hasnan = np.isnan(a) | np.isnan(b) if np.array(a).ndim > 1: if hasnan.any(): # if they're both all nan in the same places if not np.isnan(a[hasnan]).all() & np.isnan(b[hasnan]).all(): return False else: return np.isclose(a[~hasnan], b[~hasnan]) else: return np.isclose(a, b) else: if hasnan: return np.isnan(a) & np.isnan(b) else: return np.isclose(a, b)
[docs]def is_numeric(obj): """ Check for numerical behaviour. Parameters ---------- obj Object to check. Returns -------- :class:`bool` """ attrs = ["__add__", "__sub__", "__mul__", "__truediv__", "__pow__"] return all(hasattr(obj, attr) for attr in attrs)
@np.vectorize def round_sig(x, sig=2): """ Round a number to a certain number of significant figures. Parameters ---------- x : :class:`float` Number to round. sig : :class:`int` Number of significant digits to round to. Returns ------- :class:`float` """ where_nan = ~np.isfinite(x) x = copy(x) if hasattr(x, "__len__"): x[where_nan] = np.finfo(np.float64).eps vals = np.round(x, sig - int(np.floor(np.log10(np.abs(x)))) - 1) vals[where_nan] = np.nan return vals else: try: return np.round(x, sig - int(np.floor(np.log10(np.abs(x)))) - 1) except (ValueError, OverflowError): # nan or inf is passed return x
[docs]def significant_figures(n, unc=None, max_sf=20, rtol=1e-20): """ Iterative method to determine the number of significant digits for a given float, optionally providing an uncertainty. Parameters ---------- n : :class:`float` Number from which to ascertain the significance level. unc : :class:`float`, :code:`None` Uncertainty, which if provided is used to derive the number of significant digits. max_sf : :class:`int` An upper limit to the number of significant digits suggested. rtol : :class:`float` Relative tolerance to determine similarity of numbers, used in calculations. Returns ------- :class:`int` Number of significant digits. """ if not hasattr(n, "__len__"): if np.isfinite(n): if unc is not None: mag_n = np.floor(np.log10(np.abs(n))) mag_u = np.floor(np.log10(unc)) if not np.isfinite(mag_u) or not np.isfinite(mag_n): return np.nan sf = int(max(0, int(1.0 + mag_n - mag_u))) else: sf = min( [ ix for ix in range(max_sf) if np.isclose(round_sig(n, ix), n, rtol=rtol) ] ) return sf else: return 0 else: # this isn't working n = np.array(n) _n = n.copy() mask = np.isclose(n, 0.0) # can't process zeros _n[mask] = np.nan if unc is not None: mag_n = np.floor(np.log10(np.abs(_n))) mag_u = np.floor(np.log10(unc)) sfs = np.nanmax( np.vstack([np.zeros(mag_n.shape), (1.0 + mag_n - mag_u).astype(int)]), axis=0, ).astype(int) else: rounded = np.vstack([_n] * max_sf).reshape(max_sf, *_n.shape) indx = np.indices(rounded.shape)[0] # get the row indexes for no. sig figs rounded = round_sig(rounded, indx) sfs = np.nanargmax(np.isclose(rounded, _n, rtol=rtol), axis=0) sfs[np.isnan(sfs)] = 0 return sfs
[docs]def signify_digit(n, unc=None, leeway=0, low_filter=True): """ Reformats numbers to contain only significant_digits. Uncertainty can be provided to digits with relevant precision. Parameters ---------- n : :class:`float` Number to reformat unc : :class:`float`, :code:`None` Absolute uncertainty on the number, optional. leeway : :class:`int`, 0 Manual override for significant figures. Positive values will force extra significant figures; negative values will remove significant figures. low_filter : :class:`bool`, :code:`True` Whether to return :class:`np.nan` in place of values which are within precision equal to zero. Returns ------- :class:`float` Reformatted number. Notes ----- * Will not pad 0s at the end or before floats. """ if np.isfinite(n): if np.isclose(n, 0.0): return n else: mag_n = np.floor(np.log10(np.abs(n))) sf = significant_figures(n, unc=unc) + int(leeway) if unc is not None: mag_u = np.floor(np.log10(unc)) if np.isnan(mag_u): mag_u = 0 else: mag_u = 0 round_to = sf - int(mag_n) - 1 + leeway if round_to <= 0: fmt = int else: fmt = lambda x: x sig_n = round(n, round_to) if low_filter and sig_n == 0.0: return np.nan else: return fmt(sig_n) else: return np.nan
[docs]def most_precise(arr): """ Get the most precise element from an array. Parameters ----------- arr : :class:`numpy.ndarray` Array to obtain the most precise element/subarray from. Returns ----------- :class:`float` | :class:`numpy.ndarray` Returns the most precise array element (for ndim=1), or most precise subarray (for ndim > 1). """ arr = np.array(arr) if np.isfinite(arr).any().any(): precision = significant_figures(arr) if arr.ndim > 1: return arr[range(arr.shape[0]), np.nanargmax(precision, axis=-1)] else: return arr[np.nanargmax(precision, axis=-1)] else: return np.nan
[docs]def equal_within_significance(arr, equal_nan=False, rtol=1e-15): """ Test whether elements of an array are equal within the precision of the least precise. Parameters ------------ arr : :class:`numpy.ndarray` Array to test. equal_nan : :class:`bool`, :code:`False` Whether to consider :class:`np.nan` elements equal to one another. rtol : :class:`float` Relative tolerance for comparison. Returns --------- :class:`bool` | :class:`numpy.ndarray`(:class:`bool`) """ arr = np.array(arr) if arr.ndim == 1: if not np.isfinite(arr).all(): return equal_nan else: precision = significant_figures(arr) min_precision = np.nanmin(precision) rounded = round_sig(arr, min_precision * np.ones(arr.shape, dtype=int)) return np.isclose(rounded[0], rounded, rtol=rtol).all() else: # ndmim =2 equal = equal_nan * np.ones( arr.shape[0], dtype=bool ) # mean for rows containing nan if np.isfinite(arr).all(axis=1).any(): non_nan_rows = np.isfinite(arr).all(axis=1) precision = significant_figures(arr[non_nan_rows, :]) min_precision = np.nanmin(precision, axis=1) precs = np.repeat(min_precision, arr.shape[1]).reshape( arr[non_nan_rows, :].shape ) rounded = round_sig(arr[non_nan_rows, :], precs) equal[non_nan_rows] = np.apply_along_axis( lambda x: (x == x[0]).all(), 1, rounded ) return equal
[docs]def helmert_basis(D: int, full=False, **kwargs): """ Generate a set of orthogonal basis vectors in the form of a helmert matrix. Parameters --------------- D : :class:`int` Dimension of compositional vectors. Returns -------- :class:`numpy.ndarray` (D-1, D) helmert matrix corresponding to default orthogonal basis. """ H = scipy.linalg.helmert(D, full=full, **kwargs) return H
[docs]def symbolic_helmert_basis(D, full=False): """ Get a symbolic representation of a Helmert Matrix. Parameters ---------- D : :class:`int` Order of the matrix. Equivalent to dimensionality for compositional data analysis. full : :class:`bool` Whether to return the full matrix, or alternatively exclude the first row. Analogous to the option for :func:`scipy.linalg.helmert`. Returns -------- :class:`sympy.matrices.dense.DenseMatrix` """ rows = [] if full: rows += [[1 / sympy.sqrt(D)] * D] for r in np.arange(1, D): rows += [ [1 / sympy.sqrt((r + 1) * r)] * r # 1/sqrt(n(*n+1)) + [-r / sympy.sqrt((r + 1) * r)] # -n/sqrt(n(*n+1)) + [0] * (D - r - 1) ] # could check summations here return sympy.Matrix(rows)
[docs]def on_finite(X, f): """ Calls a function on an array ignoring np.nan and +/- np.inf. Note that the shape of the output may be different to that of the input. Parameters --------------- X : :class:`numpy.ndarray` Array on which to perform the function. f : :class:`Callable` Function to call on the array. Returns ------- :class:`numpy.ndarray` """ ma = np.isfinite(X) return f(X[ma])
[docs]def nancov(X): """ Generates a covariance matrix excluding nan-components. Parameters --------------- X : :class:`numpy.ndarray` Input array for which to derive a covariance matrix. Returns ------- :class:`numpy.ndarray` """ # tried and true - simply excludes samples Xnanfree = X[np.all(np.isfinite(X), axis=1), :].T # assert Xnanfree.shape[1] > Xnanfree.shape[0] # (1/m)X^T*X return np.cov(Xnanfree)
[docs]def solve_ratios(*eqs, evaluate=True): """ Solve a ternary system (top-left-right) given two constraints on two ratios, which together describe intersecting lines/a point. """ t, l, r = sympy.symbols("t l r") def to_sympy(t): # rearrange to have =0 equvalent expressions return sympy.sympify("-".join(t.split("="))) result = sympy.solve( [to_sympy(e) for e in eqs] + [to_sympy("t + l + r = 1")], (t, l, r) ) return list(result.values())