Source code for freeforestml.cut

import pandas as pd
import numpy as np

[docs]class Cut: """ Representation of an analysis cut. The class can be used to apply event selections based on conditions on columns in a pandas dataframe or derived quantities. Cuts store the condition to be applied to a dataframe. New cut objects accept all events by default. The selection can be limited by passing a lambda to the constructor. >>> sel_all = Cut() >>> sel_pos = Cut(lambda df: df.value > 0) The cut object lives independently of the dataframe. Calling the cut with a dataframe returns a new dataframe containing only rows which pass the selection criteria. >>> df = pd.DataFrame([0, 1, -2, -3, 4], columns=["value"]) >>> sel_all(df) value 0 0 1 1 2 -2 3 -3 4 4 >>> sel_pos(df) value 1 1 4 4 The index array for a given data set is calculated by calling the idx_array() method with a data dataframe. >>> sel_pos.idx_array(df) 0 False 1 True 2 False 3 False 4 True Name: value, dtype: bool Cuts can be used to build logical expression using the bitwise and (&), or (|), xor (^) and not (~). >>> sel_even = Cut(lambda df: df.value % 2 == 0) >>> sel_pos_even = sel_pos & sel_even >>> sel_pos_even(df) value 4 4 Equivalently, cuts support logical operations directly using lambdas. >>> sel_pos_even_lambda = sel_pos & (lambda df: df.value % 2 == 0) >>> sel_pos_even_lambda(df) value 4 4 Cuts might be named by passing the 'label' argument to the constructor. Cut names can be used during plotting as labels to specify the plotted region. >>> sel_sr = Cut(lambda df: df.is_sr == 1, label="Signal Region") >>> sel_sr.label 'Signal Region' If the application of a cut requires to change the event weights by a so called scale factors, you can pass additional optional keyword arguments that specify how the new weight should be computed. >>> sel_sample = Cut(lambda df: df.value % 2 == 0, \ weight=lambda df: df.weight * 2) The argument name 'weight' in this example is arbitrary. It is even possible to add new columns to the returned dataframe in this way, however, this is not recommended. """
[docs] def __init__(self, func=None, label=None, **columns): """ Creates a new cut. The optional func argument is called with the dataframe upon evaluation. The function must return an index array. If the optional function is omitted, Every row in the dataframe is accepted by this cut. """ if isinstance(func, Cut): self.func = func.func self.label = label or func.label self.columns = columns or func.columns else: self.func = func self.label = label self.columns = columns
[docs] def __call__(self, dataframe): """ Applies the internally stored cut to the given dataframe and returns a new dataframe containing only entries passing the event selection. """ new_df = dataframe[self.idx_array(dataframe)] if self.columns: new_df = new_df.assign(**self.columns) return new_df
[docs] def idx_array(self, dataframe): """ Applies the internally stored cut to the given dataframe and returns an index array, specifying which event passed the event selection. """ if self.func is None: return pd.Series(np.ones(len(dataframe), dtype='bool')) return self.func(dataframe)
[docs] def __and__(self, other): """ Returns a new cut implementing the logical AND of this cut and the other cut. The other cat be a Cut or any callable. """ if isinstance(other, Cut): return Cut(lambda df: self.idx_array(df) & other.idx_array(df)) elif callable(other): return Cut(lambda df: self.idx_array(df) & other(df)) else: return Cut(lambda df: self.idx_array(df) & other)
[docs] def __or__(self, other): """ Returns a new cut implementing the logical OR of this cut and the other cut. The other cat be a Cut or any callable. """ if isinstance(other, Cut): return Cut(lambda df: self.idx_array(df) | other.idx_array(df)) elif callable(other): return Cut(lambda df: self.idx_array(df) | other(df)) else: return Cut(lambda df: self.idx_array(df) | other)
[docs] def __xor__(self, other): """ Returns a new cut implementing the logical XOR of this cut and the other cut. The other can be a callable. """ if isinstance(other, Cut): return Cut(lambda df: self.idx_array(df) ^ other.idx_array(df)) elif callable(other): return Cut(lambda df: self.idx_array(df) ^ other(df)) else: return Cut(lambda df: self.idx_array(df) ^ other)
[docs] def __invert__(self): """ Returns a new cut implementing the logical NOT of this cut. """ return Cut(lambda df: ~self.idx_array(df))
def __rand__(self, other): return self & other def __ror__(self, other): return self | other def __rxor__(self, other): return self ^ other