Source code for etna.transforms.math.log

import warnings
from typing import List
from typing import Optional

import numpy as np
import pandas as pd

from etna.datasets import TSDataset
from etna.datasets import set_columns_wide
from etna.transforms.base import ReversibleTransform
from etna.transforms.utils import match_target_quantiles


[docs]class LogTransform(ReversibleTransform): """LogTransform applies logarithm transformation for given series.""" def __init__(self, in_column: str, base: int = 10, inplace: bool = True, out_column: Optional[str] = None): """Init LogTransform. Parameters ---------- in_column: column to apply transform base: base of logarithm to apply to series inplace: * if True, apply logarithm transformation inplace to in_column, * if False, add column add transformed column to dataset out_column: name of added column. If not given, use ``self.__repr__()`` """ super().__init__(required_features=[in_column]) self.in_column = in_column self.base = base self.inplace = inplace self.out_column = out_column self.in_column_regressor: Optional[bool] = None if self.inplace and out_column: warnings.warn("Transformation will be applied inplace, out_column param will be ignored") def _get_column_name(self) -> str: if self.inplace: return self.in_column elif self.out_column: return self.out_column else: return self.__repr__() def _fit(self, df: pd.DataFrame) -> "LogTransform": """Fit method does nothing and is kept for compatibility. Parameters ---------- df: dataframe with data. Returns ------- result: LogTransform """ return self
[docs] def fit(self, ts: TSDataset) -> "LogTransform": """Fit the transform.""" self.in_column_regressor = self.in_column in ts.regressors super().fit(ts) return self
def _transform(self, df: pd.DataFrame) -> pd.DataFrame: """Apply log transformation to the dataset. Parameters ---------- df: dataframe with data to transform. Returns ------- result: pd.Dataframe transformed dataframe """ segments = sorted(set(df.columns.get_level_values("segment"))) features = df.loc[:, pd.IndexSlice[:, self.in_column]] if (features < 0).any().any(): raise ValueError("LogPreprocess can be applied only to non-negative series") result = df transformed_features = np.log1p(features) / np.log(self.base) if self.inplace: result = set_columns_wide( result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] ) else: column_name = self._get_column_name() transformed_features.columns = pd.MultiIndex.from_product([segments, [column_name]]) result = pd.concat((result, transformed_features), axis=1) result = result.sort_index(axis=1) return result def _inverse_transform(self, df: pd.DataFrame) -> pd.DataFrame: """Apply inverse transformation to the dataset. Parameters ---------- df: dataframe with data to transform. Returns ------- result: pd.DataFrame transformed series """ result = df if self.inplace: features = df.loc[:, pd.IndexSlice[:, self.in_column]] transformed_features = np.expm1(features * np.log(self.base)) result = set_columns_wide( result, transformed_features, features_left=[self.in_column], features_right=[self.in_column] ) if self.in_column == "target": segment_columns = result.columns.get_level_values("feature").tolist() quantiles = match_target_quantiles(set(segment_columns)) for quantile_column_nm in quantiles: features = df.loc[:, pd.IndexSlice[:, quantile_column_nm]] transformed_features = np.expm1(features * np.log(self.base)) result = set_columns_wide( result, transformed_features, features_left=[quantile_column_nm], features_right=[quantile_column_nm], ) return result
[docs] def get_regressors_info(self) -> List[str]: """Return the list with regressors created by the transform.""" if self.in_column_regressor is None: raise ValueError("Fit the transform to get the correct regressors info!") return [self._get_column_name()] if self.in_column_regressor and not self.inplace else []
__all__ = ["LogTransform"]