Source code for silicone.database_crunchers.equal_quantile_walk

"""
Module for the database cruncher which uses the 'equal quantile walk' technique.
"""

import numpy as np
from pyam import IamDataFrame

from ..stats import calc_quantiles_of_data
from ..utils import _make_weighting_series
from .base import _DatabaseCruncher


[docs]class EqualQuantileWalk(_DatabaseCruncher): """ Database cruncher which uses the 'equal quantile walk' technique. This cruncher assumes that the amount of effort going into reducing one emission set is equal to that for another emission, therefore the lead and follow data should be at the same quantile of all pathways in the infiller database. It calculates the quantile of the lead infillee data in the lead infiller database, then outputs that quantile of the follow data in the infiller database. """
[docs] def derive_relationship( self, variable_follower, variable_leaders, smoothing=None, weighting=None ): """ Derive the relationship between two variables from the database. Parameters ---------- variable_follower : str The variable for which we want to calculate timeseries (e.g. ``"Emissions|C5F12"``). variable_leaders : list[str] The variable we want to use in order to infer timeseries of ``variable_follower`` (e.g. ``["Emissions|CO2"]``). smoothing : float or string By default, no smoothing is done on the distribution. If a value is provided, it is fed into :func:`scipy.stats.gaussian_kde` - see full documentation there. In short, if a float is input, we fit a Gaussian kernel density estimator with that width to the points. If a string is used, it must be either "scott" or "silverman", after those two methods of determining the best kernel bandwidth. weighting: Dict{(str, str) : float} The dictionary, mapping the (mode, scenario) tuple onto the weight (relative to a weight of 1 for the default). This does not have to include all scenarios in df, but cannot include scenarios not in df. Returns ------- :obj:`func` Function which takes a :obj:`pyam.IamDataFrame` containing ``variable_leaders`` timeseries and returns timeseries for ``variable_follower`` based on the derived relationship between the two. Please see the source code for the exact definition (and docstring) of the returned function. Raises ------ ValueError ``variable_leaders`` contains more than one variable. ValueError There is no data for ``variable_leaders`` or ``variable_follower`` in the database. """ iamdf_follower = self._get_iamdf_follower(variable_follower, variable_leaders) follower_ts = iamdf_follower.timeseries() data_follower_time_col = iamdf_follower.time_col data_follower_unit = iamdf_follower["unit"].values[0] lead_ts = self._db.filter(variable=variable_leaders).timeseries() if weighting is not None: if isinstance(weighting, dict): weighting_follow = _make_weighting_series(follower_ts, weighting) weighting_lead = _make_weighting_series(lead_ts, weighting) else: raise TypeError("``weighting`` should be a dictionary") else: weighting_follow = None weighting_lead = None lead_unit = lead_ts.index.get_level_values("unit")[0] def filler(in_iamdf): """ Filler function derived from :obj:`EqualQuantileWalk`. Parameters ---------- in_iamdf : :obj:`pyam.IamDataFrame` Input data to fill data in Returns ------- :obj:`pyam.IamDataFrame` Filled in data (without original source data) Raises ------ ValueError Not all required timepoints are present in the database we crunched... """ lead_in = in_iamdf.filter(variable=variable_leaders) if not all([unit == lead_unit for unit in lead_in.unit]): raise ValueError( "Units of lead variable is meant to be `{}`, found `{}`".format( lead_unit, lead_in.unit ) ) if data_follower_time_col != in_iamdf.time_col: raise ValueError( "`in_iamdf` time column must be the same as the time column used " "to generate this filler function (`{}`)".format( data_follower_time_col ) ) if lead_in.data.empty: raise ValueError( "There is no data for {} so it cannot be infilled".format( variable_leaders ) ) output_ts = lead_in.timeseries() if any( [ (time not in lead_ts.columns) or (time not in follower_ts.columns) for time in output_ts.columns ] ): # We allow for cases where either lead or follow have gaps raise ValueError( "Not all required timepoints are present in the database we " "crunched, we crunched \n\t{} for the lead and \n\t{} for the " "follow \nbut you passed in \n\t{}".format( lead_ts.columns, follower_ts.columns, output_ts.columns ) ) for col in output_ts.columns: output_ts[col] = self._find_same_quantile( follower_ts[col], lead_ts[col], output_ts[col], smoothing, weighting_lead, weighting_follow, ) output_ts = output_ts.reset_index() output_ts["variable"] = variable_follower output_ts["unit"] = data_follower_unit return IamDataFrame(output_ts) return filler
def _get_iamdf_follower(self, variable_follower, variable_leaders): if len(variable_leaders) > 1: raise ValueError( "For `EqualQuantileWalk`, ``variable_leaders`` should only " "contain one variable" ) self._check_follower_and_leader_in_db(variable_follower, variable_leaders) return self._db.filter(variable=variable_follower) def _find_same_quantile( self, follow_vals, lead_vals, lead_input, smoothing, weighting_lead, weighting_follow, ): # Dispose of nans that can cloud the calculation follow_vals = follow_vals[~np.isnan(follow_vals)] input_quantiles = calc_quantiles_of_data( lead_vals, lead_input, smoothing, weighting_lead ) if all(np.isnan(input_quantiles)): return np.nanmean(follow_vals) return calc_quantiles_of_data( follow_vals, input_quantiles, smoothing, weighting_follow, to_quantile=False )