"""
Reference compostitions and compositional normalisation.
"""
import json
from pathlib import Path
import numpy as np
import pandas as pd
from tinydb import Query, TinyDB
from ..util.log import Handle
from ..util.meta import pyrolite_datafolder
from ..util.text import to_width
from ..util.units import scale
logger = Handle(__name__)
__dbfile__ = pyrolite_datafolder(subfolder="geochem") / "refdb.json"
[docs]def all_reference_compositions(path=None):
"""
Get a dictionary of all reference compositions indexed by name.
Parameters
-----------
path : :class:`str` | :class:`pathlib.Path`
Returns
--------
:class:`dict`
"""
if path is None:
path = __dbfile__
with TinyDB(str(path), access_mode="r") as db:
refs = {}
for r in db.all(): # there should be only one "_default" table
n, c = r["name"], r["composition"]
refs[n] = Composition(json.loads(c), name=n)
return refs
[docs]def get_reference_composition(name):
"""
Retrieve a particular composition from the reference database.
Parameters
------------
name : :class:`str`
Name of the reference composition model.
Returns
--------
:class:`pyrolite.geochem.norm.Composition`
"""
with TinyDB(str(__dbfile__), access_mode="r") as db:
res = db.search(Query().name == name)
assert len(res) == 1
res = res[0]
name, composition = res["name"], res["composition"]
return Composition(json.loads(composition), name=name)
[docs]def get_reference_files(directory=None, formats=["csv"]):
"""
Get a list of the reference composition files.
Parameters
-----------
directory : :class:`str`, :code:`None`
Location of reference data files.
formats : :class:`list`, :code:`["csv"]`
List of potential data formats to draw from. Currently only csv will work.
Returns
--------
:class:`list`
"""
directory = directory or (pyrolite_datafolder(subfolder="geochem") / "refcomp")
assert directory.exists() and directory.is_dir()
files = []
for fmt in formats:
files.extend(directory.glob("./*." + fmt))
return files
[docs]def update_database(path=None, encoding="cp1252", **kwargs):
"""
Update the reference composition database.
Notes
------
This will take all csv files from the geochem/refcomp pyrolite data folder
and construct a document-based JSON database.
"""
if path is None:
path = __dbfile__
# require write access
with TinyDB(str(path)) as db:
db.truncate()
for f in get_reference_files():
C = Composition(f, encoding=encoding, **kwargs)
db.insert(
{"name": C.name, "composition": C._df.T.to_json(force_ascii=False)}
)
db.close()
[docs]class Composition(object):
def __init__(
self, src, name=None, reference=None, reservoir=None, source=None, **kwargs
):
"""A composition with units and uncertainties for each compositional
variable.
Attributes
-----------
name : :class:`str`
Name of the composition.
reference : :class:`str`
Reference for the composition.
reservoir : :class:`str`
Optionally-specified reservoir for the specific compositoin (e.g. Primitive
Mantle).
source : :class:`str
Source of the composition (typically method of derivation,
e.g. 'calculated').
filename : :class:`str` | :class:`pathlib.Path`
File which the composition is derived from.
comp : :class:`pandas.DataFrame`
A 1-row dataframe
units : :class:`pandas.Series`
Units of the compositional variables.
unc_2sigma : :class:`pandas.Series`
Uncertainties for the compositional variables.
"""
self.comp = None
self.units = None
self.unc_2sigma = None
self.name = name
self.reference = reference
self.reservoir = reservoir
self.source = source
self.filename = None
self._df = None
if isinstance(src, (str, Path)):
self.filename = str(src)
self._import_file(self.filename, **kwargs)
self._process_imported_frame()
elif isinstance(src, (pd.DataFrame, pd.Series)): # composition dataframe
self.comp = pd.DataFrame(
src.loc[src.index[0], src.pyrochem.list_compositional].astype(float),
index=["value"],
)
elif isinstance(src, dict):
self._df = pd.DataFrame.from_dict(src).T
self._process_imported_frame()
else:
raise NotImplementedError(
"Import of compostions as {} not yet implemented.".format(type(src))
)
if (self.name is not None) and (self.filename is None):
self.filename = "{}.csv".format(self.name) # default naming
def _import_file(self, filename, **kwargs):
if filename.endswith(".csv"):
self._df = pd.read_csv(filename, **kwargs).set_index("var").T
elif filename.endswith("json"):
self._df = pd.read_json(filename, **kwargs).set_index("var").T
def _process_imported_frame(self):
assert self._df is not None
metadata = self._df.loc[
"value",
[
"ModelName",
"Reservoir",
"ModelType",
"Reference",
"Citation",
"DOI",
"Description",
],
]
metadata[pd.isnull(metadata)] = None
for src, dest in zip(
[
"ModelName",
"Reservoir",
"ModelType",
"Reference",
"Citation",
"DOI",
"Description",
],
[
"name",
"reservoir",
"source",
"reference",
"citation",
"doi",
"description",
],
):
setattr(self, dest, metadata.get(src, None))
self.comp = self._df.loc[
["value"], self._df.pyrochem.list_compositional
].astype(float)
self.comp = self.comp.dropna(axis=1)
if "units" in self._df.index:
self.units = self._df.loc["units", self.comp.columns]
if "unc_2sigma" in self._df.index:
self.unc_2sigma = self._df.loc["unc_2sigma", self.comp.columns].astype(
float
)
[docs] def set_units(self, to="wt%"):
"""
Set the units of the dataframe.
Parameters
------------
to : :class:`str`, :code:`"wt%"`
"""
scales = self.units.apply(scale, target_unit=to).astype(float)
self.comp *= scales
self.units[:] = to
return self
[docs] def describe(self, verbose=True, **kwargs):
""" """
metadata = self._df.loc[
"value",
[
"ModelName",
"Reservoir",
"ModelType",
"Reference",
"Citation",
"DOI",
"Description",
],
]
metadata[pd.isnull(metadata)] = None
desc = ""
if verbose:
desc += str(self)
desc += "\n"
if metadata["Description"] is not None:
desc += metadata["Description"]
desc += "\n"
if metadata["Citation"] is not None:
desc += metadata["Citation"]
if metadata["DOI"] is not None:
desc += " "
desc += "doi: {}".format(metadata["DOI"])
return to_width(desc, **kwargs)
def __getitem__(self, variables):
"""
Allow access to model values via [] indexing e.g. Composition['Si', 'Cr'].
Parameters
----------
variables : :class:`str` | :class:`list`
Variable(s) to get.
"""
if isinstance(variables, (list, np.ndarray, pd.Index)): # if iterable
variables = [v if isinstance(v, str) else str(v) for v in variables]
else:
variables = [str(variables)]
qry = self.comp.reindex(columns=variables).values.flatten()
if len(qry) == 1:
qry = qry[0]
return qry
def __str__(self):
"""Get a string representation of the composition."""
s = ""
if self.name is not None:
s += self.name + " "
if self.reservoir is not None:
s += "Model of " + self.reservoir + " "
if self.reference is not None:
s += "from " + self.reference
s += "."
return s
def __repr__(self):
"""Get a string signature of the composition."""
r = self.__class__.__name__ + "("
if self.filename is not None:
r += "'{}'".format(Path(self.filename).name)
for par in ["name", "reference", "reservoir"]:
if getattr(self, par) is not None:
r += (
",\n"
+ " " * (len(self.__class__.__name__) + 1)
+ "{}='{}'".format(par, getattr(self, par))
)
r += ")"
return r