Source code for silicone.database_crunchers.latest_time_ratio

"""
Module for the database cruncher which uses the 'latest time ratio' technique.
"""
import logging
import warnings

import numpy as np
from pyam import IamDataFrame

from .base import _DatabaseCruncher

logger = logging.getLogger(__name__)


[docs]class LatestTimeRatio(_DatabaseCruncher): """ Database cruncher which uses the 'latest time ratio' technique. This cruncher derives the relationship between two variables by simply assuming that the follower timeseries is equal to the lead timeseries multiplied by a scaling factor. The scaling factor is derived by calculating the ratio of the follower variable to the lead variable in the latest year in which the follower variable is available in the database. Additionally, since the derived relationship only depends on a single point in the database, no regressions or other calculations are performed. Once the relationship is derived, the 'filler' function will infill following: .. math:: E_f(t) = R * E_l(t) where :math:`E_f(t)` is emissions of the follower variable and :math:`E_l(t)` is emissions of the lead variable, both in the infillee database. :math:`R` is the scaling factor, calculated as .. math:: R = \\frac{ E_f(t_{\\text{last}}) }{ e_l(t_{\\text{last}}) } where :math:`t_{\\text{last}}` is the average of all values of the follower gas at the latest time it appears in the database, and the lower case :math:`e` represents the infiller database. """
[docs] def derive_relationship(self, variable_follower, variable_leaders): """ Derive the relationship between two variables from the database. Parameters ---------- variable_follower : str The variable for which we want to calculate timeseries (e.g. ``"Emissions|C5F12"``). variable_leaders : list[str] The variable we want to use in order to infer timeseries of ``variable_follower`` (e.g. ``["Emissions|CO2"]``). Note that the 'latest time ratio' methodology gives the same result, independent of the value of ``variable_leaders`` in the database. Returns ------- :obj:`func` Function which takes a :obj:`pyam.IamDataFrame` containing ``variable_leaders`` timeseries and returns timeseries for ``variable_follower`` based on the derived relationship between the two. Please see the source code for the exact definition (and docstring) of the returned function. Raises ------ ValueError ``variable_leaders`` contains more than one variable. ValueError There is no data for ``variable_leaders`` or ``variable_follower`` in the database. """ iamdf_follower = self._get_iamdf_follower(variable_follower, variable_leaders) data_follower = iamdf_follower.data data_follower_time_col = iamdf_follower.time_col data_follower_key_timepoint = max(data_follower[data_follower_time_col]) key_timepoint_filter = {data_follower_time_col: [data_follower_key_timepoint]} data_follower_key_year_val = np.nanmean( iamdf_follower.filter(**key_timepoint_filter).data["value"].values ) data_follower_unit = data_follower["unit"].values[0] if data_follower_time_col == "time": data_follower_key_timepoint = data_follower_key_timepoint.to_pydatetime() def filler(in_iamdf, interpolate=False): """ Filler function derived from :obj:`LatestTimeRatio`. Parameters ---------- in_iamdf : :obj:`pyam.IamDataFrame` Input data to fill data in interpolate : bool If the key year for filling is not in ``in_iamdf``, should a value be interpolated? Returns ------- :obj:`pyam.IamDataFrame` Filled in data (without original source data) Raises ------ ValueError The key year for filling is not in ``in_iamdf`` and ``interpolate is False``. """ lead_var = in_iamdf.filter(variable=variable_leaders) if data_follower_time_col != in_iamdf.time_col: raise ValueError( "`in_iamdf` time column must be the same as the time column used " "to generate this filler function (`{}`)".format( data_follower_time_col ) ) if any(lead_var.data["value"] < 0): warn_str = "Note that the lead variable {} goes negative.".format( variable_leaders ) logger.warning(warn_str) print(warn_str) def get_values_in_key_timepoint(idf): # filter warning about empty data frame as we handle it ourselves with warnings.catch_warnings(): warnings.simplefilter("ignore") return idf.filter(**key_timepoint_filter) lead_var_val_in_key_timepoint = get_values_in_key_timepoint(lead_var) if lead_var_val_in_key_timepoint.data.empty: if not interpolate: error_msg = ( "Required downscaling timepoint ({}) is not in the data for " "the lead gas ({})".format( data_follower_key_timepoint, variable_leaders[0] ) ) raise ValueError(error_msg) lead_var = lead_var.interpolate( data_follower_key_timepoint, inplace=False ) lead_var_val_in_key_timepoint = get_values_in_key_timepoint(lead_var) lead_var.filter(**key_timepoint_filter, keep=False, inplace=True) lead_var_val_in_key_timepoint = lead_var_val_in_key_timepoint.timeseries() if not lead_var_val_in_key_timepoint.shape[1] == 1: # pragma: no cover raise AssertionError( "How did filtering for a single timepoint result in more than " "one column?" ) lead_var_val_in_key_timepoint = lead_var_val_in_key_timepoint.iloc[:, 0] scaling = data_follower_key_year_val / lead_var_val_in_key_timepoint output_ts = (lead_var.timeseries().T * scaling).T.reset_index() output_ts["variable"] = variable_follower output_ts["unit"] = data_follower_unit return IamDataFrame(output_ts) return filler
def _get_iamdf_follower(self, variable_follower, variable_leaders): if len(variable_leaders) > 1: raise ValueError( "For `LatestTimeRatio`, ``variable_leaders`` should only " "contain one variable" ) self._check_follower_and_leader_in_db(variable_follower, variable_leaders) return self._db.filter(variable=variable_follower)