Source code for pyrolite.comp

"""
Submodule for working with compositional data.
"""

import functools
import inspect

import numpy as np
import pandas as pd

from ..util.log import Handle
from . import codata

logger = Handle(__name__)


def attribute_transform(f, *args, **kwargs):
    """
    Decorator to add transform function as a dataframe attribute after
    transformation, for traceability.

    Parameters
    -----------
    f : :class:`func` | :class:`class`
        Transform function.

    Returns
    -------
    :class:`func` | :class:`class`
        Object with modified docstring.
    """

    @functools.wraps(f)
    def wrapper(*args, **kwargs):
        output = f(*args, **kwargs)
        output.attrs["transform"] = f.__name__
        return output

    wrapper.__signature__ = inspect.signature(f)
    wrapper.__doc__ = f.__doc__  # keep the docstring!
    return wrapper


# note that only some of these methods will be valid for series
[docs]@pd.api.extensions.register_series_accessor("pyrocomp") @pd.api.extensions.register_dataframe_accessor("pyrocomp") class pyrocomp(object): def __init__(self, obj): """ Custom dataframe accessor for pyrolite compositional transforms. """ self._validate(obj) self._obj = obj @staticmethod def _validate(obj): pass
[docs] def renormalise(self, components: list = [], scale=100.0): """ Renormalises compositional data to ensure closure. Parameters ---------- components : :class:`list` Option subcompositon to renormalise to 100. Useful for the use case where compostional data and non-compositional data are stored in the same dataframe. scale : :class:`float`, :code:`100.` Closure parameter. Typically either 100 or 1. Returns ------- :class:`pandas.DataFrame` Renormalized dataframe. Notes ------ This won't modify the dataframe in place, you'll need to assign it to something. If you specify components, those components will be summed to 100%, and others remain unchanged. """ obj = self._obj return codata.renormalise(obj, components=components, scale=scale)
[docs] @attribute_transform def ALR(self, components=[], ind=-1, null_col=False, label_mode="simple"): """ Additive Log Ratio transformation. Parameters ---------- ind: :class:`int`, :class:`str` Index or name of column used as denominator. null_col : :class:`bool` Whether to keep the redundant column. Returns ------- :class:`pandas.DataFrame` ALR-transformed array, of shape :code:`(N, D-1)`. """ components = self._obj.columns.values.tolist() if isinstance(ind, int): index_col_no = ind elif isinstance(ind, str): assert ind in components index_col_no = components.index(ind) if index_col_no == -1: index_col_no += len(components) if label_mode.lower().startswith("num"): colnames = ["ALR{}".format(ix) for ix in range(self._obj.columns.size)] else: colnames = codata.get_ALR_labels( self._obj, mode=label_mode, ind=index_col_no ) if not null_col: colnames = [n for ix, n in enumerate(colnames) if ix != index_col_no] tfm_df = pd.DataFrame( codata.ALR( self._obj[components].values, ind=index_col_no, null_col=null_col ), index=self._obj.index, columns=colnames, ) tfm_df.attrs["ALR_index"] = index_col_no # save parameter for inverse_transform tfm_df.attrs["inverts_to"] = self._obj.columns.to_list() return tfm_df
[docs] def inverse_ALR(self, ind=None, null_col=False): """ Inverse Additive Log Ratio transformation. Parameters ---------- ind: :class:`int`, :class:`str` Index or name of column used as denominator. null_col : :class:`bool`, :code:`False` Whether the array contains an extra redundant column (i.e. shape is :code:`(N, D)`). Returns ------- :class:`pandas.DataFrame` Inverse-ALR transformed array, of shape :code:`(N, D)`. """ colnames = self._obj.attrs.get("inverts_to") if ind is None: ind = self._obj.attrs.get("ALR_index", -1) itfm_df = pd.DataFrame( codata.inverse_ALR(self._obj.values, ind=ind, null_col=null_col), index=self._obj.index, columns=colnames, ) return itfm_df
[docs] @attribute_transform def CLR(self, label_mode="simple"): """ Centred Log Ratio transformation. Parameters ---------- label_mode : :class:`str` Labelling mode for the output dataframe (:code:`numeric`, :code:`simple`, :code:`LaTeX`). If you plan to use the outputs for automated visualisation and want to know which components contribute, use :code:`simple` or :code:`LaTeX`. Returns ------- :class:`pandas.DataFrame` CLR-transformed array, of shape :code:`(N, D)`. """ if label_mode.lower().startswith("num"): colnames = ["CLR{}".format(ix) for ix in range(self._obj.columns.size)] else: colnames = codata.get_CLR_labels(self._obj, mode=label_mode) tfm_df = pd.DataFrame( codata.CLR(self._obj.values), index=self._obj.index, columns=colnames, ) tfm_df.attrs[ "inverts_to" ] = self._obj.columns.to_list() # save parameter for inverse_transform return tfm_df
[docs] def inverse_CLR(self): """ Inverse Centred Log Ratio transformation. Parameters ---------- Returns ------- :class:`pandas.DataFrame` Inverse-CLR transformed array, of shape :code:`(N, D)`. """ colnames = self._obj.attrs.get("inverts_to") itfm_df = pd.DataFrame( codata.inverse_CLR(self._obj.values), index=self._obj.index, columns=colnames, ) return itfm_df
[docs] @attribute_transform def ILR(self, label_mode="simple"): """ Isometric Log Ratio transformation. Parameters ---------- label_mode : :class:`str` Labelling mode for the output dataframe (:code:`numeric`, :code:`simple`, :code:`LaTeX`). If you plan to use the outputs for automated visualisation and want to know which components contribute, use :code:`simple` or :code:`LaTeX`. Returns ------- :class:`pandas.DataFrame` ILR-transformed array, of shape :code:`(N, D-1)`. """ if label_mode.lower().startswith("num"): colnames = ["ILR{}".format(ix) for ix in range(self._obj.columns.size - 1)] else: colnames = codata.get_ILR_labels(self._obj, mode=label_mode) tfm_df = pd.DataFrame( codata.ILR(self._obj.values), index=self._obj.index, columns=colnames, ) tfm_df.attrs[ "inverts_to" ] = self._obj.columns.to_list() # save parameter for inverse_transform return tfm_df
[docs] def inverse_ILR(self, X=None): """ Inverse Isometric Log Ratio transformation. Parameters ---------- X : :class:`numpy.ndarray`, :code:`None` Optional specification for an array from which to derive the orthonormal basis, with shape :code:`(N, D)`. Returns -------- :class:`pandas.DataFrame` Inverse-ILR transformed array, of shape :code:`(N, D)`. """ colnames = self._obj.attrs.get("inverts_to") itfm_df = pd.DataFrame( codata.inverse_ILR(self._obj.values), index=self._obj.index, columns=colnames, ) return itfm_df
[docs] @attribute_transform def boxcox( self, lmbda=None, lmbda_search_space=(-1, 5), search_steps=100, return_lmbda=False, ): """ Box-Cox transformation. Parameters --------------- lmbda : :class:`numpy.number`, :code:`None` Lambda value used to forward-transform values. If none, it will be calculated using the mean lmbda_search_space : :class:`tuple` Range tuple (min, max). search_steps : :class:`int` Steps for lambda search range. Returns ------- :class:`pandas.DataFrame` Box-Cox transformed array. """ arr, lmbda = codata.boxcox( self._obj.values, lmbda=lmbda, lmbda_search_space=lmbda_search_space, search_steps=search_steps, return_lmbda=True, ) tfm_df = pd.DataFrame(arr, index=self._obj.index, columns=self._obj.columns) tfm_df.attrs["boxcox_lmbda"] = lmbda # save parameter for inverse_transform return tfm_df
[docs] def inverse_boxcox(self, lmbda=None): """ Inverse Box-Cox transformation. Parameters --------------- lmbda : :class:`float` Lambda value used to forward-transform values. Returns ------- :class:`pandas.DataFrame` Inverse Box-Cox transformed array. """ if lmbda is None: lmbda = self._obj.attrs.get("boxcox_lmbda") assert ( lmbda is not None ), "Can't invert a box-cox transform without a lambda parameter." itfm_df = pd.DataFrame( codata.inverse_boxcox(self._obj.values, lmbda=lmbda), index=self._obj.index, columns=self._obj.columns, ) return itfm_df
[docs] @attribute_transform def sphere(self): r""" Spherical coordinate transformation for compositional data. Returns ------- θ : :class:`pandas.DataFrame` Array of angles in radians (:math:`(0, \pi / 2]`) """ arr = codata.sphere(self._obj.values) tfm_df = pd.DataFrame( arr, index=self._obj.index, columns=["θ_" + c for c in self._obj.columns[1:]], ) # save column names for inverse_sphere tfm_df.attrs["variables"] = self._obj.columns return tfm_df
[docs] def inverse_sphere(self, variables=None): """ Inverse spherical coordinate transformation to revert back to compositional data in the simplex. Parameters ---------- variables : :class:`list` List of names for the compositional data variables, optionally specified (for when they may not be stored in the dataframe attributes through the :class:`~pyrolite.comp.pyrocomp` functions). Returns ------- df : :class:`pandas.DataFrame` Dataframe of original compositional (simplex) coordinates, normalised to 1. """ if variables is None: variables = self._obj.attrs.get( "variables", np.arange(self._obj.columns.size) ) itfm_df = pd.DataFrame( codata.inverse_sphere(self._obj.values), index=self._obj.index, columns=variables, ) return itfm_df
[docs] def logratiomean(self, transform=codata.CLR, inverse_transform=codata.inverse_CLR): """ Take a mean of log-ratios along the index of a dataframe. Parameters ---------- transform : :class:`callable` : :class:`str` Log transform to use. Returns ------- :class:`pandas.Series` Mean values as a pandas series. """ return codata.logratiomean(self._obj, transform=transform)
[docs] def invert_transform(self, **kwargs): """ Try to inverse-transform a transformed dataframe. """ colnames = self._obj.attrs.get("inverts_to") tfm = self._obj.attrs.get("transform") try: tfm, inv_tfm = codata.get_transforms(tfm) except ValueError: raise ValueError("DataFrame has no transform history.") _invert_method = getattr(self, inv_tfm.__name__) return _invert_method(**kwargs)