from collections import ChainMap, defaultdict
import numpy as np
import pandas as pd
from .log import Handle
from .meta import pyrolite_datafolder
from .text import titlecase
logger = Handle(__name__)
# get the latest geotimescale data
__data__ = sorted(
pyrolite_datafolder(subfolder="timescale").glob("geotimescale_*.csv")
)[-1]
__colors__ = pyrolite_datafolder(subfolder="timescale") / "timecolors.csv"
[docs]def listify(df, axis=1):
"""
Consdense text information across columns into a single list.
Parameters
----------
df : :class:`pandas.DataFrame`
Dataframe (or slice of dataframe) to condense along axis.
axis : :class:`int`
Axis to condense along.
"""
return df.copy(deep=True).apply(list, axis=axis)
[docs]def age_name(
agenamelist, prefixes=["Lower", "Middle", "Upper"], suffixes=["Stage", "Series"]
):
"""
Condenses an agename list to a specific agename, given a subset of
ambiguous_names.
Parameters
----------
agenamelist : :class:`list`
List of name components (i.e. :code:`[Eon, Era, Period, Epoch]`)
prefixes : :class:`list`
Name components which occur prior to the higher order classification
(e.g. :code:`"Upper Triassic"`).
suffixes : :class:`list`
Name components which occur after the higher order classification
(e.g. :code:`"Cambrian Series 2"`).
"""
ambiguous_names = prefixes + suffixes
ambig_vars = [s.lower().strip() for s in ambiguous_names]
nameguess = agenamelist[-1]
# Process e.g. Stage 1 => Stage
nn_nameguess = "".join([i for i in nameguess if not i.isdigit()]).strip()
# check if the name guess corresponds to any of the ambiguous names
hit = [
ambiguous_names[ix]
for ix, vars in enumerate(ambig_vars)
if nn_nameguess.lower().strip() in vars
][0:1]
if hit:
indexstart = len(agenamelist) - 1
outname = [agenamelist[indexstart]]
out_index_previous = 0
ambiguous_name = True
while ambiguous_name:
hitphrase = hit[0]
indexstart -= 1
nextup = agenamelist[indexstart]
if hitphrase in prefixes:
# insert the higher order component after the previous one
outname.insert(out_index_previous + 1, nextup)
out_index_previous += 1
else:
# insert the higher order component before the previous one
outname.insert(out_index_previous - 1, nextup)
out_index_previous -= 1
_nn_nextupguess = "".join([i for i in nextup if not i.isdigit()]).strip()
hit = [
ambiguous_names[ix]
for ix, vars in enumerate(ambig_vars)
if _nn_nextupguess.lower().strip() in vars
][0:1]
if not hit:
ambiguous_name = False
return " ".join(outname)
else:
return nameguess
[docs]def import_colors(filename=__colors__, delim="/"):
"""
Import a list of timescale names with associated colors.
"""
c = pd.read_csv(filename).dropna(how="all")
if delim is not None: # and ("RGB" in c.columns):
c["RGB"] = c["RGB"].apply(
lambda x: tuple(
[float(i) / 255.0 for i in x.split(delim)] + [1.0]
) # add alpha
)
return {name: rgb for name, rgb in c.values}
[docs]def timescale_reference_frame(
filename=__data__, info_cols=["Start", "End", "Aliases"], color_info=None
):
"""
Rearrange the text-based timescale dataframe. Utility function for
timescale class.
Parameters
----------
filename : :class:`str` | :class:`pathlib.Path`
File from which to generate the timescale information.
info_cols : :class:`list`
List of columns beyond hierarchial group labels (e.g. Eon, Era..).
Returns
-------
:class:`pandas.DataFrame`
Dataframe containing timescale information.
"""
df = pd.read_csv(filename)
df[["Start", "End"]] = df.loc[:, ["Start", "End"]].apply(
pd.to_numeric, errors="coerce"
)
_df = df.copy(deep=True)
grps = [i for i in _df.columns if not i in info_cols]
condensed = _df.loc[:, [i for i in _df.columns if not i in info_cols]].fillna(
value=""
)
_df["Level"] = condensed.apply(
lambda x: grps[[ix for ix, v in enumerate(x) if v][-1]], axis=1
)
condensed = listify(condensed).apply(lambda x: [i for i in x if i])
_df["Name"] = condensed.apply(age_name)
_df["Ident"] = condensed.apply("-".join)
_df["MeanAge"] = _df.apply(lambda x: (x.Start + x.End) / 2, axis=1)
_df["Unc"] = _df.apply(lambda x: np.abs((x.Start - x.End)) / 2, axis=1)
# Aliases
_df.Aliases = _df.Aliases.apply(lambda x: [] if pd.isnull(x) else x.split(";"))
_df.Aliases = _df.apply(lambda x: [x.Name, x.Ident] + x.Aliases, axis=1)
_df.Aliases = _df.Aliases.apply(lambda x: [i.lower().strip() for i in x])
colors = color_info or import_colors()
_df["Color"] = _df.Name.apply(lambda x: colors.get(x, None))
col_order = (
["Ident", "Name", "Level", "Start", "End", "MeanAge", "Unc"]
+ grps
+ ["Aliases", "Color"]
)
return _df.loc[:, col_order]
[docs]class Timescale(object):
def __init__(self, filename=None):
"""
Geological Timescale class to provide time-focused utility functions.
Parameters
-----------
filename : :class:`str` | :class:`pathlib.Path`
Path to the timescale data file.
Attributes
----------
data : :class:`pandas.DataFrame`
Timescale dataframe.
levels : :class:`list`
Hierarchial levels within the timescale.
"""
if filename is None:
self.data = timescale_reference_frame()
else:
self.data = timescale_reference_frame(filename)
self.levels = [i for i in self.data.Level.unique() if not pd.isnull(i)]
self.levels = [i for i in self.data.columns if i in self.levels]
def getnan():
return np.nan, np.nan
self.locate = defaultdict(getnan)
self.build()
[docs] def build(self):
"""
Build the timescale from data within file.
"""
for ix, g in enumerate(self.levels):
others = self.levels[ix + 1 :]
fltr = (
self.data.loc[:, others].isnull().all(axis=1)
& ~self.data.loc[:, g].isnull()
)
setattr(self, g + "s", self.data.loc[fltr, :])
dicts = self.data.apply(
lambda x: {a: (x.Start, x.End) for a in x.Aliases}, axis=1
)
# should check that the keys are unique across all of these
self.locate.update(dict(ChainMap(*dicts)))
self.data = self.data.set_index("Ident")
[docs] def text2age(self, entry, nulls=[None, "None", "none", np.nan, "NaN"]):
"""
Converts a text-based age to the corresponding age range (in Ma).
String-based entries return (max_age, min_age). Collection-based entries
return a list of tuples.
Parameters
------------
entry : :class:`str`
String name, or series of string names, for geological age range.
Returns
-------
:class:`tuple` | :class:`list` (:class:`tuple`)
Tuple or list of tuples.
"""
try:
entry = float(entry)
return (entry, entry)
except ValueError:
return self.locate[entry.lower().strip()]
[docs] def named_age(self, age, level="Specific", **kwargs):
"""
Converts a numeric age (in Ma) to named age at a specific level.
Parameters
----------
age : :class:`float`
Numeric age in Ma.
level : :class:`str`, :code:`{'Eon', 'Era', 'Period', 'Superepoch', 'Epoch', 'Age', 'Specific'}`
Level of specificity.
Returns
-------
:class:`str`
String representation for the entry.
"""
level = titlecase(level)
def wthn_rng(x):
return (age <= x.Start) & (age >= x.End)
relevant = self.data.loc[self.data.apply(wthn_rng, axis=1).values, :]
if level == "Specific": # take the rightmost grouping
relevant = relevant.loc[:, self.levels]
counts = (~pd.isnull(relevant)).count(axis=1)
if sum(counts == counts.max()) > 1:
idx_rel_row = counts.index[
max([ix for (ix, r) in enumerate(counts) if r == counts.iloc[0]])
]
else:
idx_rel_row = counts.idxmax()
rel_row = relevant.loc[idx_rel_row, :]
return age_name(rel_row[~pd.isnull(rel_row)].to_list(), **kwargs)
else:
unique_values = relevant.loc[:, level].unique()
return unique_values[~pd.isnull(unique_values)][0]