Source code for etna.transforms.feature_selection.filter
from typing import List
from typing import Optional
from typing import Sequence
import pandas as pd
from etna.transforms.base import ReversibleTransform
[docs]class FilterFeaturesTransform(ReversibleTransform):
"""Filters features in each segment of the dataframe."""
def __init__(
self,
include: Optional[Sequence[str]] = None,
exclude: Optional[Sequence[str]] = None,
return_features: bool = False,
):
"""Create instance of FilterFeaturesTransform.
Parameters
----------
include:
list of columns to pass through filter
exclude:
list of columns to not pass through
return_features:
indicates whether to return features or not.
Raises
------
ValueError:
if both options set or non of them
"""
super().__init__(required_features="all")
self.include: Optional[Sequence[str]] = None
self.exclude: Optional[Sequence[str]] = None
self.return_features: bool = return_features
self._df_removed: Optional[pd.DataFrame] = None
if include is not None and exclude is None:
self.include = list(set(include))
elif exclude is not None and include is None:
self.exclude = list(set(exclude))
else:
raise ValueError("There should be exactly one option set: include or exclude")
[docs] def get_regressors_info(self) -> List[str]:
"""Return the list with regressors created by the transform."""
return []
def _fit(self, df: pd.DataFrame) -> "FilterFeaturesTransform":
"""Fit method does nothing and is kept for compatibility.
Parameters
----------
df:
dataframe with data.
Returns
-------
result: FilterFeaturesTransform
"""
return self
def _transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Filter features according to include/exclude parameters.
Parameters
----------
df:
dataframe with data to transform.
Returns
-------
result: pd.Dataframe
transformed dataframe
"""
result = df
features = df.columns.get_level_values("feature")
if self.include is not None:
if not set(self.include).issubset(features):
raise ValueError(f"Features {set(self.include) - set(features)} are not present in the dataset.")
result = result.loc[:, pd.IndexSlice[:, self.include]]
if self.exclude is not None and self.exclude:
if not set(self.exclude).issubset(features):
raise ValueError(f"Features {set(self.exclude) - set(features)} are not present in the dataset.")
result = df.drop(columns=self.exclude, level="feature")
if self.return_features:
self._df_removed = df.drop(result.columns, axis=1)
result = result.sort_index(axis=1)
return result
def _inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame:
"""Apply inverse transform to the data.
Parameters
----------
df:
dataframe to apply inverse transformation
Returns
-------
result: pd.DataFrame
dataframe before transformation
"""
return pd.concat([df, self._df_removed], axis=1)