from copy import copy
import numpy as np
import scipy
import sympy
from .log import Handle
logger = Handle(__name__)
[docs]def eigsorted(cov):
"""
Returns arrays of eigenvalues and eigenvectors sorted by magnitude.
Parameters
-----------
cov : :class:`numpy.ndarray`
Covariance matrix to extract eigenvalues and eigenvectors from.
Returns
--------
vals : :class:`numpy.ndarray`
Sorted eigenvalues.
vecs : :class:`numpy.ndarray`
Sorted eigenvectors.
"""
vals, vecs = np.linalg.eigh(cov)
order = vals.argsort()[::-1]
return vals[order], vecs[:, order]
[docs]def augmented_covariance_matrix(M, C):
r"""
Constructs an augmented covariance matrix from means M and covariance matrix C.
Parameters
----------
M : :class:`numpy.ndarray`
Array of means.
C : :class:`numpy.ndarray`
Covariance matrix.
Returns
---------
:class:`numpy.ndarray`
Augmented covariance matrix A.
Notes
------
Augmented covariance matrix constructed from mean of shape (D, ) and covariance
matrix of shape (D, D) as follows:
.. math::
\begin{array}{c|c}
-1 & M.T \\
\hline
M & C
\end{array}
"""
d = np.squeeze(M).shape[0]
A = np.zeros((d + 1, d + 1))
A[0, 0] = -1
A[0, 1 : d + 1] = M
A[1 : d + 1, 0] = M.T
A[1 : d + 1, 1 : d + 1] = C
return A
[docs]def interpolate_line(x, y, n=0, logy=False):
"""
Add intermediate evenly spaced points interpolated between given x-y coordinates,
assuming the x points are the same.
Parameters
-----------
x : :class:`numpy.ndarray`
1D array of x values.
y : :class:`numpy.ndarray`
ND array of y values.
"""
if logy: # perform interpolation against logy, then revert with exp
y = np.log(y)
current = x[:-1].copy() # the first part of the x array
intervals = x[1:] - x[:-1] # right-wise intervals (could be negative for REE)
_x = current.copy().astype(float)
if n: # should be able to tile this instead
dx = intervals / (n + 1.0)
for ix in range(n):
current = current + dx
_x = np.hstack([_x, current])
_x = np.append(_x, x[-1]) # add one final value to x series
_x = np.sort(_x, axis=-1)
f = scipy.interpolate.interp1d(x, y, axis=-1)
_y = f(_x)
# assert all([i in _x for i in x])
if logy:
_y = np.exp(_y)
return _x, _y
[docs]def grid_from_ranges(X, bins=100, **kwargs):
"""
Create a meshgrid based on the ranges along columns of array X.
Parameters
-----------
X : :class:`numpy.ndarray`
Array of shape :code:`(samples, dimensions)` to create a meshgrid from.
bins : :class:`int` | :class:`tuple`
Shape of the meshgrid. If an integer, provides a square mesh. If a tuple,
values for each column are required.
Returns
--------
:class:`numpy.ndarray`
Notes
-------
Can pass keyword arg indexing = {'xy', 'ij'}
"""
dim = X.shape[1]
if isinstance(bins, int): # expand to list of len == dimensions
bins = [bins for ix in range(dim)]
mmb = [(np.nanmin(X[:, ix]), np.nanmax(X[:, ix]), bins[ix]) for ix in range(dim)]
grid = np.meshgrid(*[np.linspace(*i) for i in mmb], **kwargs)
return grid
[docs]def flattengrid(grid):
"""
Convert a collection of arrays to a concatenated array of flattened components.
Useful for passing meshgrid values to a function which accepts argumnets of shape
:code:`(samples, dimensions)`.
Parameters
-----------
grid : :class:`list`
Collection of arrays (e.g. a meshgrid) to flatten and concatenate.
Returns
--------
:class:`numpy.ndarray`
"""
return np.vstack([g.flatten() for g in grid]).T
[docs]def linspc_(_min, _max, step=0.0, bins=20):
"""
Linear spaced array, with optional step for grid margins.
Parameters
-----------
_min : :class:`float`
Minimum value for spaced range.
_max : :class:`float`
Maximum value for spaced range.
step : :class:`float`, 0.0
Step for expanding at grid edges. Default of 0.0 results in no expansion.
bins : int
Number of bins to divide the range (adds one by default).
Returns
-------
:class:`numpy.ndarray`
Linearly-spaced array.
"""
if step < 0:
step = -step
return np.linspace(_min - step, _max + step, bins + 1)
[docs]def logspc_(_min, _max, step=1.0, bins=20):
"""
Log spaced array, with optional step for grid margins.
Parameters
-----------
_min : :class:`float`
Minimum value for spaced range.
_max : :class:`float`
Maximum value for spaced range.
step : :class:`float`, 1.0
Step for expanding at grid edges. Default of 1.0 results in no expansion.
bins : int
Number of bins to divide the range (adds one by default).
Returns
-------
:class:`numpy.ndarray`
Log-spaced array.
"""
if step < 1.0:
step = 1.0 / step
return np.logspace(np.log(_min / step), np.log(_max * step), bins, base=np.e)
[docs]def logrng_(v, exp=0.0):
"""
Range of a sample, where values <0 are excluded.
Parameters
-----------
v : :class:`list`; list-like
Array of values to obtain a range from.
exp : :class:`float`, (0, 1)
Fractional expansion of the range.
Returns
-------
:class:`tuple`
Min, max tuple.
"""
u = v[(v > 0)] # make sure the range_values are >0
return linrng_(u, exp=exp)
[docs]def linrng_(v, exp=0.0):
"""
Range of a sample, where values <0 are included.
Parameters
-----------
v : :class:`list`; list-like
Array of values to obtain a range from.
exp : :class:`float`, (0, 1)
Fractional expansion of the range.
Returns
-------
:class:`tuple`
Min, max tuple.
"""
u = v[np.isfinite(v)]
return (np.nanmin(u) * (1.0 - exp), np.nanmax(u) * (1.0 + exp))
[docs]def isclose(a, b):
"""
Implementation of np.isclose with equal nan.
Parameters
------------
a,b : :class:`float` | :class:`numpy.ndarray`
Numbers or arrays to compare.
Returns
-------
:class:`bool`
"""
hasnan = np.isnan(a) | np.isnan(b)
if np.array(a).ndim > 1:
if hasnan.any():
# if they're both all nan in the same places
if not np.isnan(a[hasnan]).all() & np.isnan(b[hasnan]).all():
return False
else:
return np.isclose(a[~hasnan], b[~hasnan])
else:
return np.isclose(a, b)
else:
if hasnan:
return np.isnan(a) & np.isnan(b)
else:
return np.isclose(a, b)
[docs]def is_numeric(obj):
"""
Check for numerical behaviour.
Parameters
----------
obj
Object to check.
Returns
--------
:class:`bool`
"""
attrs = ["__add__", "__sub__", "__mul__", "__truediv__", "__pow__"]
return all(hasattr(obj, attr) for attr in attrs)
@np.vectorize
def round_sig(x, sig=2):
"""
Round a number to a certain number of significant figures.
Parameters
----------
x : :class:`float`
Number to round.
sig : :class:`int`
Number of significant digits to round to.
Returns
-------
:class:`float`
"""
where_nan = ~np.isfinite(x)
x = copy(x)
if hasattr(x, "__len__"):
x[where_nan] = np.finfo(np.float64).eps
vals = np.round(x, sig - int(np.floor(np.log10(np.abs(x)))) - 1)
vals[where_nan] = np.nan
return vals
else:
try:
return np.round(x, sig - int(np.floor(np.log10(np.abs(x)))) - 1)
except (ValueError, OverflowError): # nan or inf is passed
return x
[docs]def signify_digit(n, unc=None, leeway=0, low_filter=True):
"""
Reformats numbers to contain only significant_digits. Uncertainty can be provided to
digits with relevant precision.
Parameters
----------
n : :class:`float`
Number to reformat
unc : :class:`float`, :code:`None`
Absolute uncertainty on the number, optional.
leeway : :class:`int`, 0
Manual override for significant figures. Positive values will force extra
significant figures; negative values will remove significant figures.
low_filter : :class:`bool`, :code:`True`
Whether to return :class:`np.nan` in place of values which are within precision
equal to zero.
Returns
-------
:class:`float`
Reformatted number.
Notes
-----
* Will not pad 0s at the end or before floats.
"""
if np.isfinite(n):
if np.isclose(n, 0.0):
return n
else:
mag_n = np.floor(np.log10(np.abs(n)))
sf = significant_figures(n, unc=unc) + int(leeway)
if unc is not None:
mag_u = np.floor(np.log10(unc))
if np.isnan(mag_u):
mag_u = 0
else:
mag_u = 0
round_to = sf - int(mag_n) - 1 + leeway
if round_to <= 0:
fmt = int
else:
fmt = lambda x: x
sig_n = round(n, round_to)
if low_filter and sig_n == 0.0:
return np.nan
else:
return fmt(sig_n)
else:
return np.nan
[docs]def most_precise(arr):
"""
Get the most precise element from an array.
Parameters
-----------
arr : :class:`numpy.ndarray`
Array to obtain the most precise element/subarray from.
Returns
-----------
:class:`float` | :class:`numpy.ndarray`
Returns the most precise array element (for ndim=1), or most precise subarray
(for ndim > 1).
"""
arr = np.array(arr)
if np.isfinite(arr).any().any():
precision = significant_figures(arr)
if arr.ndim > 1:
return arr[range(arr.shape[0]), np.nanargmax(precision, axis=-1)]
else:
return arr[np.nanargmax(precision, axis=-1)]
else:
return np.nan
[docs]def equal_within_significance(arr, equal_nan=False, rtol=1e-15):
"""
Test whether elements of an array are equal within the precision of the
least precise.
Parameters
------------
arr : :class:`numpy.ndarray`
Array to test.
equal_nan : :class:`bool`, :code:`False`
Whether to consider :class:`np.nan` elements equal to one another.
rtol : :class:`float`
Relative tolerance for comparison.
Returns
---------
:class:`bool` | :class:`numpy.ndarray`(:class:`bool`)
"""
arr = np.array(arr)
if arr.ndim == 1:
if not np.isfinite(arr).all():
return equal_nan
else:
precision = significant_figures(arr)
min_precision = np.nanmin(precision)
rounded = round_sig(arr, min_precision * np.ones(arr.shape, dtype=int))
return np.isclose(rounded[0], rounded, rtol=rtol).all()
else: # ndmim =2
equal = equal_nan * np.ones(
arr.shape[0], dtype=bool
) # mean for rows containing nan
if np.isfinite(arr).all(axis=1).any():
non_nan_rows = np.isfinite(arr).all(axis=1)
precision = significant_figures(arr[non_nan_rows, :])
min_precision = np.nanmin(precision, axis=1)
precs = np.repeat(min_precision, arr.shape[1]).reshape(
arr[non_nan_rows, :].shape
)
rounded = round_sig(arr[non_nan_rows, :], precs)
equal[non_nan_rows] = np.apply_along_axis(
lambda x: (x == x[0]).all(), 1, rounded
)
return equal
[docs]def helmert_basis(D: int, full=False, **kwargs):
"""
Generate a set of orthogonal basis vectors in the form of a helmert matrix.
Parameters
---------------
D : :class:`int`
Dimension of compositional vectors.
Returns
--------
:class:`numpy.ndarray`
(D-1, D) helmert matrix corresponding to default orthogonal basis.
"""
H = scipy.linalg.helmert(D, full=full, **kwargs)
return H
[docs]def symbolic_helmert_basis(D, full=False):
"""
Get a symbolic representation of a Helmert Matrix.
Parameters
----------
D : :class:`int`
Order of the matrix. Equivalent to dimensionality for compositional data
analysis.
full : :class:`bool`
Whether to return the full matrix, or alternatively exclude the first row.
Analogous to the option for :func:`scipy.linalg.helmert`.
Returns
--------
:class:`sympy.matrices.dense.DenseMatrix`
"""
rows = []
if full:
rows += [[1 / sympy.sqrt(D)] * D]
for r in np.arange(1, D):
rows += [
[1 / sympy.sqrt((r + 1) * r)] * r # 1/sqrt(n(*n+1))
+ [-r / sympy.sqrt((r + 1) * r)] # -n/sqrt(n(*n+1))
+ [0] * (D - r - 1)
]
# could check summations here
return sympy.Matrix(rows)
[docs]def on_finite(X, f):
"""
Calls a function on an array ignoring np.nan and +/- np.inf. Note that the
shape of the output may be different to that of the input.
Parameters
---------------
X : :class:`numpy.ndarray`
Array on which to perform the function.
f : :class:`Callable`
Function to call on the array.
Returns
-------
:class:`numpy.ndarray`
"""
ma = np.isfinite(X)
return f(X[ma])
[docs]def nancov(X):
"""
Generates a covariance matrix excluding nan-components.
Parameters
---------------
X : :class:`numpy.ndarray`
Input array for which to derive a covariance matrix.
Returns
-------
:class:`numpy.ndarray`
"""
# tried and true - simply excludes samples
Xnanfree = X[np.all(np.isfinite(X), axis=1), :].T
# assert Xnanfree.shape[1] > Xnanfree.shape[0]
# (1/m)X^T*X
return np.cov(Xnanfree)
[docs]def solve_ratios(*eqs, evaluate=True):
"""
Solve a ternary system (top-left-right) given two constraints on
two ratios, which together describe intersecting lines/a point.
"""
t, l, r = sympy.symbols("t l r")
def to_sympy(t): # rearrange to have =0 equvalent expressions
return sympy.sympify("-".join(t.split("=")))
result = sympy.solve(
[to_sympy(e) for e in eqs] + [to_sympy("t + l + r = 1")], (t, l, r)
)
return list(result.values())