Source code for pyrolite.util.time

from collections import ChainMap, defaultdict

import numpy as np
import pandas as pd

from .log import Handle
from .meta import pyrolite_datafolder
from .text import titlecase

logger = Handle(__name__)


# get the latest geotimescale data
__data__ = sorted(
    pyrolite_datafolder(subfolder="timescale").glob("geotimescale_*.csv")
)[-1]
__colors__ = pyrolite_datafolder(subfolder="timescale") / "timecolors.csv"


[docs]def listify(df, axis=1): """ Consdense text information across columns into a single list. Parameters ---------- df : :class:`pandas.DataFrame` Dataframe (or slice of dataframe) to condense along axis. axis : :class:`int` Axis to condense along. """ return df.copy(deep=True).apply(list, axis=axis)
[docs]def age_name( agenamelist, prefixes=["Lower", "Middle", "Upper"], suffixes=["Stage", "Series"] ): """ Condenses an agename list to a specific agename, given a subset of ambiguous_names. Parameters ---------- agenamelist : :class:`list` List of name components (i.e. :code:`[Eon, Era, Period, Epoch]`) prefixes : :class:`list` Name components which occur prior to the higher order classification (e.g. :code:`"Upper Triassic"`). suffixes : :class:`list` Name components which occur after the higher order classification (e.g. :code:`"Cambrian Series 2"`). """ ambiguous_names = prefixes + suffixes ambig_vars = [s.lower().strip() for s in ambiguous_names] nameguess = agenamelist[-1] # Process e.g. Stage 1 => Stage nn_nameguess = "".join([i for i in nameguess if not i.isdigit()]).strip() # check if the name guess corresponds to any of the ambiguous names hit = [ ambiguous_names[ix] for ix, vars in enumerate(ambig_vars) if nn_nameguess.lower().strip() in vars ][0:1] if hit: indexstart = len(agenamelist) - 1 outname = [agenamelist[indexstart]] out_index_previous = 0 ambiguous_name = True while ambiguous_name: hitphrase = hit[0] indexstart -= 1 nextup = agenamelist[indexstart] if hitphrase in prefixes: # insert the higher order component after the previous one outname.insert(out_index_previous + 1, nextup) out_index_previous += 1 else: # insert the higher order component before the previous one outname.insert(out_index_previous - 1, nextup) out_index_previous -= 1 _nn_nextupguess = "".join([i for i in nextup if not i.isdigit()]).strip() hit = [ ambiguous_names[ix] for ix, vars in enumerate(ambig_vars) if _nn_nextupguess.lower().strip() in vars ][0:1] if not hit: ambiguous_name = False return " ".join(outname) else: return nameguess
[docs]def import_colors(filename=__colors__, delim="/"): """ Import a list of timescale names with associated colors. """ c = pd.read_csv(filename).dropna(how="all") if delim is not None: # and ("RGB" in c.columns): c["RGB"] = c["RGB"].apply( lambda x: tuple( [float(i) / 255.0 for i in x.split(delim)] + [1.0] ) # add alpha ) return {name: rgb for name, rgb in c.values}
[docs]def timescale_reference_frame( filename=__data__, info_cols=["Start", "End", "Aliases"], color_info=None ): """ Rearrange the text-based timescale dataframe. Utility function for timescale class. Parameters ---------- filename : :class:`str` | :class:`pathlib.Path` File from which to generate the timescale information. info_cols : :class:`list` List of columns beyond hierarchial group labels (e.g. Eon, Era..). Returns ------- :class:`pandas.DataFrame` Dataframe containing timescale information. """ df = pd.read_csv(filename) df[["Start", "End"]] = df.loc[:, ["Start", "End"]].apply( pd.to_numeric, errors="coerce" ) _df = df.copy(deep=True) grps = [i for i in _df.columns if not i in info_cols] condensed = _df.loc[:, [i for i in _df.columns if not i in info_cols]].fillna( value="" ) _df["Level"] = condensed.apply( lambda x: grps[[ix for ix, v in enumerate(x) if v][-1]], axis=1 ) condensed = listify(condensed).apply(lambda x: [i for i in x if i]) _df["Name"] = condensed.apply(age_name) _df["Ident"] = condensed.apply("-".join) _df["MeanAge"] = _df.apply(lambda x: (x.Start + x.End) / 2, axis=1) _df["Unc"] = _df.apply(lambda x: np.abs((x.Start - x.End)) / 2, axis=1) # Aliases _df.Aliases = _df.Aliases.apply(lambda x: [] if pd.isnull(x) else x.split(";")) _df.Aliases = _df.apply(lambda x: [x.Name, x.Ident] + x.Aliases, axis=1) _df.Aliases = _df.Aliases.apply(lambda x: [i.lower().strip() for i in x]) colors = color_info or import_colors() _df["Color"] = _df.Name.apply(lambda x: colors.get(x, None)) col_order = ( ["Ident", "Name", "Level", "Start", "End", "MeanAge", "Unc"] + grps + ["Aliases", "Color"] ) return _df.loc[:, col_order]
[docs]class Timescale(object): def __init__(self, filename=None): """ Geological Timescale class to provide time-focused utility functions. Parameters ----------- filename : :class:`str` | :class:`pathlib.Path` Path to the timescale data file. Attributes ---------- data : :class:`pandas.DataFrame` Timescale dataframe. levels : :class:`list` Hierarchial levels within the timescale. """ if filename is None: self.data = timescale_reference_frame() else: self.data = timescale_reference_frame(filename) self.levels = [i for i in self.data.Level.unique() if not pd.isnull(i)] self.levels = [i for i in self.data.columns if i in self.levels] def getnan(): return np.nan, np.nan self.locate = defaultdict(getnan) self.build()
[docs] def build(self): """ Build the timescale from data within file. """ for ix, g in enumerate(self.levels): others = self.levels[ix + 1 :] fltr = ( self.data.loc[:, others].isnull().all(axis=1) & ~self.data.loc[:, g].isnull() ) setattr(self, g + "s", self.data.loc[fltr, :]) dicts = self.data.apply( lambda x: {a: (x.Start, x.End) for a in x.Aliases}, axis=1 ) # should check that the keys are unique across all of these self.locate.update(dict(ChainMap(*dicts))) self.data = self.data.set_index("Ident")
[docs] def text2age(self, entry, nulls=[None, "None", "none", np.nan, "NaN"]): """ Converts a text-based age to the corresponding age range (in Ma). String-based entries return (max_age, min_age). Collection-based entries return a list of tuples. Parameters ------------ entry : :class:`str` String name, or series of string names, for geological age range. Returns ------- :class:`tuple` | :class:`list` (:class:`tuple`) Tuple or list of tuples. """ try: entry = float(entry) return (entry, entry) except ValueError: return self.locate[entry.lower().strip()]
[docs] def named_age(self, age, level="Specific", **kwargs): """ Converts a numeric age (in Ma) to named age at a specific level. Parameters ---------- age : :class:`float` Numeric age in Ma. level : :class:`str`, :code:`{'Eon', 'Era', 'Period', 'Superepoch', 'Epoch', 'Age', 'Specific'}` Level of specificity. Returns ------- :class:`str` String representation for the entry. """ level = titlecase(level) wthn_rng = lambda x: (age <= x.Start) & (age >= x.End) relevant = self.data.loc[self.data.apply(wthn_rng, axis=1).values, :] if level == "Specific": # take the rightmost grouping relevant = relevant.loc[:, self.levels] counts = (~pd.isnull(relevant)).count(axis=1) if sum(counts == counts.max()) > 1: idx_rel_row = counts.index[ max([ix for (ix, r) in enumerate(counts) if r == counts[0]]) ] else: idx_rel_row = counts.idxmax() rel_row = relevant.loc[idx_rel_row, :] return age_name(rel_row[~pd.isnull(rel_row)], **kwargs) else: unique_values = relevant.loc[:, level].unique() return unique_values[~pd.isnull(unique_values)][0]