Source code for etna.transforms.decomposition.change_points_based.level
from typing import Dict
from typing import Optional
from ruptures import Binseg
from etna.distributions import BaseDistribution
from etna.distributions import CategoricalDistribution
from etna.distributions import IntDistribution
from etna.transforms.decomposition.change_points_based.base import BaseChangePointsModelAdapter
from etna.transforms.decomposition.change_points_based.base import ReversibleChangePointsTransform
from etna.transforms.decomposition.change_points_based.change_points_models.ruptures_based import (
RupturesChangePointsModel,
)
from etna.transforms.decomposition.change_points_based.detrend import _OneSegmentChangePointsTrendTransform
from etna.transforms.decomposition.change_points_based.per_interval_models import MeanPerIntervalModel
from etna.transforms.decomposition.change_points_based.per_interval_models import StatisticsPerIntervalModel
[docs]class _OneSegmentChangePointsLevelTransform(_OneSegmentChangePointsTrendTransform):
def __init__(
self,
in_column: str,
change_points_model: BaseChangePointsModelAdapter,
per_interval_model: StatisticsPerIntervalModel,
):
"""Init _OneSegmentChangePointsTransform.
Parameters
----------
in_column:
name of column to apple transform to
change_points_model:
model to get change points from data
per_interval_model:
model to process intervals between change points
"""
super().__init__(
in_column=in_column, change_points_model=change_points_model, per_interval_model=per_interval_model
)
[docs]class ChangePointsLevelTransform(ReversibleChangePointsTransform):
"""Transform that makes a detrending of change-point intervals.
This class differs from :py:class:`~etna.transforms.decomposition.change_points_based.detrend.ChangePointsTrendTransform`
only by default values for ``change_points_model`` and ``per_interval_model``.
Transform divides each segment into intervals using ``change_points_model``.
Then a separate model is fitted on each interval using ``per_interval_model``.
Values predicted by the model are subtracted from each interval.
Evaluated function can be linear, mean, median, etc. Look at the signature to find out which models can be used.
Warning
-------
This transform can suffer from look-ahead bias. For transforming data at some timestamp
it uses information from the whole train part.
"""
_default_change_points_model = RupturesChangePointsModel(
change_points_model=Binseg(model="ar"),
n_bkps=5,
)
_default_per_interval_model = MeanPerIntervalModel()
def __init__(
self,
in_column: str,
change_points_model: Optional[BaseChangePointsModelAdapter] = None,
per_interval_model: Optional[StatisticsPerIntervalModel] = None,
):
"""Init ChangePointsTrendTransform.
Parameters
----------
in_column:
name of column to apply transform to
change_points_model:
model to get trend change points,
by default :py:class:`ruptures.detection.Binseg` in a wrapper with ``n_bkps=5`` is used
per_interval_model:
model to process intervals of segment,
by default mean value is used to evaluate the interval
"""
self.in_column = in_column
self.change_points_model = (
change_points_model if change_points_model is not None else self._default_change_points_model
)
self.per_interval_model = (
per_interval_model if per_interval_model is not None else self._default_per_interval_model
)
super().__init__(
transform=_OneSegmentChangePointsLevelTransform(
in_column=self.in_column,
change_points_model=self.change_points_model,
per_interval_model=self.per_interval_model,
),
required_features=[in_column],
)
@property
def _is_change_points_model_default(self) -> bool:
# it can't see the difference between Binseg(model="ar") and Binseg(model="l1")
return self.change_points_model.to_dict() == self._default_change_points_model.to_dict()
[docs] def params_to_tune(self) -> Dict[str, BaseDistribution]:
"""Get default grid for tuning hyperparameters.
If ``self.change_points_model`` is equal to default then this grid tunes parameters:
``change_points_model.change_points_model.model``, ``change_points_model.n_bkps``.
Other parameters are expected to be set by the user.
Returns
-------
:
Grid to tune.
"""
if self._is_change_points_model_default:
return {
"change_points_model.change_points_model.model": CategoricalDistribution(
["l1", "l2", "normal", "rbf", "cosine", "linear", "clinear", "ar", "mahalanobis", "rank"]
),
"change_points_model.n_bkps": IntDistribution(low=5, high=30),
}
else:
return {}