From 2f63bd3a6d6bb20378b225474387eb94cb06781e Mon Sep 17 00:00:00 2001
From: Alfonso Tobar <48638337+datacubeR@users.noreply.github.com>
Date: Thu, 23 Mar 2023 04:21:49 -0300
Subject: [PATCH 1/6] Add code examples in timeseries module's docstrings
 (#647)

* Adding code examples for Timeseries Module

* Fixing details in the examples
---
 .../forecasting/expanding_window_features.py  | 22 +++++++++++++++++++
 .../timeseries/forecasting/lag_features.py    | 22 +++++++++++++++++++
 .../timeseries/forecasting/window_features.py | 22 +++++++++++++++++++
 3 files changed, 66 insertions(+)

diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 3cabf0142..c8306c085 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -117,6 +117,28 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
     pandas.expanding
     pandas.aggregate
     pandas.shift
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.timeseries.forecasting import ExpandingWindowFeatures
+    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
+    >>>                               "2022-09-19",
+    >>>                               "2022-09-20",
+    >>>                               "2022-09-21",
+    >>>                               "2022-09-22"],
+    >>>                       x1 = [1,2,3,4,5],
+    >>>                       x2 = [6,7,8,9,10]
+    >>>                     ))
+    >>> ewf = ExpandingWindowFeatures()
+    >>> ewf.fit_transform(X)
+             date  x1  x2  x1_expanding_mean  x2_expanding_mean
+    0  2022-09-18   1   6                NaN                NaN
+    1  2022-09-19   2   7                1.0                6.0
+    2  2022-09-20   3   8                1.5                6.5
+    3  2022-09-21   4   9                2.0                7.0
+    4  2022-09-22   5  10                2.5                7.5
     """
 
     def __init__(
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 6abccb949..a7a1cef26 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -95,6 +95,28 @@ class LagFeatures(BaseForecastTransformer):
     See Also
     --------
     pandas.shift
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.timeseries.forecasting import LagFeatures
+    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
+    >>>                               "2022-09-19",
+    >>>                               "2022-09-20",
+    >>>                               "2022-09-21",
+    >>>                               "2022-09-22"],
+    >>>                       x1 = [1,2,3,4,5],
+    >>>                       x2 = [6,7,8,9,10]
+    >>>                     ))
+    >>> lf = LagFeatures(periods=[1,2])
+    >>> lf.fit_transform(X)
+                date  x1  x2  x1_lag_1  x2_lag_1  x1_lag_2  x2_lag_2
+    0  2022-09-18   1   6       NaN       NaN       NaN       NaN
+    1  2022-09-19   2   7       1.0       6.0       NaN       NaN
+    2  2022-09-20   3   8       2.0       7.0       1.0       6.0
+    3  2022-09-21   4   9       3.0       8.0       2.0       7.0
+    4  2022-09-22   5  10       4.0       9.0       3.0       8.0
     """
 
     def __init__(
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index dc17123fb..f3c4e90d8 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -121,6 +121,28 @@ class WindowFeatures(BaseForecastTransformer):
     pandas.rolling
     pandas.aggregate
     pandas.shift
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.timeseries.forecasting import WindowFeatures
+    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
+    >>>                               "2022-09-19",
+    >>>                               "2022-09-20",
+    >>>                               "2022-09-21",
+    >>>                               "2022-09-22"],
+    >>>                       x1 = [1,2,3,4,5],
+    >>>                       x2 = [6,7,8,9,10]
+    >>>                     ))
+    >>> wf = WindowFeatures(window = 2)
+    >>> wf.fit_transform(X)
+             date  x1  x2  x1_window_2_mean  x2_window_2_mean
+    0  2022-09-18   1   6               NaN               NaN
+    1  2022-09-19   2   7               NaN               NaN
+    2  2022-09-20   3   8               1.5               6.5
+    3  2022-09-21   4   9               2.5               7.5
+    4  2022-09-22   5  10               3.5               8.5
     """
 
     def __init__(

From 3fa9c8a53d8b35e71a11d45c2390153c79ba405c Mon Sep 17 00:00:00 2001
From: Alfonso Tobar <48638337+datacubeR@users.noreply.github.com>
Date: Thu, 23 Mar 2023 04:28:36 -0300
Subject: [PATCH 2/6] Add code examples in transformation module's docstrings
 (#646)

* Adding code examples for Transformation module

* Fixing details in the examples
---
 feature_engine/transformation/arcsin.py     | 18 +++++++++++
 feature_engine/transformation/boxcox.py     | 19 +++++++++++
 feature_engine/transformation/log.py        | 36 +++++++++++++++++++++
 feature_engine/transformation/power.py      | 18 +++++++++++
 feature_engine/transformation/reciprocal.py | 18 +++++++++++
 feature_engine/transformation/yeojohnson.py | 23 +++++++++++--
 6 files changed, 130 insertions(+), 2 deletions(-)

diff --git a/feature_engine/transformation/arcsin.py b/feature_engine/transformation/arcsin.py
index 7dcad3a18..0cb9c80d1 100644
--- a/feature_engine/transformation/arcsin.py
+++ b/feature_engine/transformation/arcsin.py
@@ -80,6 +80,24 @@ class ArcsinTransformer(BaseNumericalTransformer):
     transform:
         Apply the arcsin transformation.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import ArcsinTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.beta(1, 1, size = 100)))
+    >>> ast = ArcsinTransformer()
+    >>> ast.fit(X)
+    >>> X = ast.transform(X)
+    >>> X.head()
+              x
+    0  0.785437
+    1  0.253389
+    2  0.144664
+    3  0.783236
+    4  0.650777
     """
 
     def __init__(
diff --git a/feature_engine/transformation/boxcox.py b/feature_engine/transformation/boxcox.py
index fa9de5caa..9597e5f79 100644
--- a/feature_engine/transformation/boxcox.py
+++ b/feature_engine/transformation/boxcox.py
@@ -93,6 +93,25 @@ class BoxCoxTransformer(BaseNumericalTransformer):
     .. [1] Box and Cox. "An Analysis of Transformations". Read at a RESEARCH MEETING,
         1964.
         https://rss.onlinelibrary.wiley.com/doi/abs/10.1111/j.2517-6161.1964.tb00553.x
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import BoxCoxTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.lognormal(size = 100)))
+    >>> bct = BoxCoxTransformer()
+    >>> bct.fit(X)
+    >>> X = bct.transform(X)
+    >>> X.head()
+              x
+    0  0.505485
+    1 -0.137595
+    2  0.662654
+    3  1.607518
+    4 -0.232237
     """
 
     def __init__(
diff --git a/feature_engine/transformation/log.py b/feature_engine/transformation/log.py
index 2c1a466ce..b94269220 100644
--- a/feature_engine/transformation/log.py
+++ b/feature_engine/transformation/log.py
@@ -77,6 +77,24 @@ class LogTransformer(BaseNumericalTransformer):
     transform:
         Transform the variables using the logarithm.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import LogTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.lognormal(size = 100)))
+    >>> lt = LogTransformer()
+    >>> lt.fit(X)
+    >>> X = lt.transform(X)
+    >>> X.head()
+            x
+    0  0.496714
+    1 -0.138264
+    2  0.647689
+    3  1.523030
+    4 -0.234153
     """
 
     def __init__(
@@ -263,6 +281,24 @@ class LogCpTransformer(BaseNumericalTransformer, FitFromDictMixin):
     transform:
         Transform the variables with the logarithm of x plus C.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import LogCpTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.lognormal(size = 100)))
+    >>> lct = LogCpTransformer()
+    >>> lct.fit(X)
+    >>> X = lct.transform(X)
+    >>> X.head()
+              x
+    0  0.944097
+    1  0.586701
+    2  1.043204
+    3  1.707159
+    4  0.541405
     """
 
     def __init__(
diff --git a/feature_engine/transformation/power.py b/feature_engine/transformation/power.py
index c318e1d6b..456b5d873 100644
--- a/feature_engine/transformation/power.py
+++ b/feature_engine/transformation/power.py
@@ -74,6 +74,24 @@ class PowerTransformer(BaseNumericalTransformer):
     transform:
         Apply the power transformation to the variables.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import PowerTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.lognormal(size = 100)))
+    >>> pt = PowerTransformer()
+    >>> pt.fit(X)
+    >>> X = pt.transform(X)
+    >>> X.head()
+              x
+    0  1.281918
+    1  0.933203
+    2  1.382432
+    3  2.141518
+    4  0.889517
     """
 
     def __init__(
diff --git a/feature_engine/transformation/reciprocal.py b/feature_engine/transformation/reciprocal.py
index 4ad399004..05546952e 100644
--- a/feature_engine/transformation/reciprocal.py
+++ b/feature_engine/transformation/reciprocal.py
@@ -73,6 +73,24 @@ class ReciprocalTransformer(BaseNumericalTransformer):
     transform:
         Apply the reciprocal 1 / x transformation.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import ReciprocalTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = 10 - np.random.exponential(size = 100)))
+    >>> rt = ReciprocalTransformer()
+    >>> rt.fit(X)
+    >>> X = rt.transform(X)
+    >>> X.head()
+            x
+    0  0.104924
+    1  0.143064
+    2  0.115164
+    3  0.110047
+    4  0.101726
     """
 
     def __init__(
diff --git a/feature_engine/transformation/yeojohnson.py b/feature_engine/transformation/yeojohnson.py
index fb6b1586a..c658fd502 100644
--- a/feature_engine/transformation/yeojohnson.py
+++ b/feature_engine/transformation/yeojohnson.py
@@ -74,11 +74,30 @@ class YeoJohnsonTransformer(BaseNumericalTransformer):
     References
     ----------
     .. [1] Yeo, In-Kwon and Johnson, Richard (2000).
-       A new family of power transformations to improve normality or symmetry.
-       Biometrika, 87, 954-959.
+        A new family of power transformations to improve normality or symmetry.
+        Biometrika, 87, 954-959.
 
     .. [2] Weisberg S. "Yeo-Johnson Power Transformations".
         https://www.stat.umn.edu/arc/yjpower.pdf
+
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.transformation import YeoJohnsonTransformer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.lognormal(size = 100) - 10))
+    >>> yjt = YeoJohnsonTransformer()
+    >>> yjt.fit(X)
+    >>> X = yjt.transform(X)
+    >>> X.head()
+                   x
+    0 -267042.906453
+    1 -444357.138990
+    2 -221626.115742
+    3  -23647.632651
+    4 -467264.993249
     """
 
     def __init__(

From 1d9ccc47976cc1d2f4ce36396e629594edda189c Mon Sep 17 00:00:00 2001
From: Alfonso Tobar <48638337+datacubeR@users.noreply.github.com>
Date: Thu, 23 Mar 2023 04:40:21 -0300
Subject: [PATCH 3/6] Add code examples in outliers module's docstrings (#644)

* Adding code examples for Winsorizer, ArbitraryCapper and Trimmer

* Fixing details in the examples

* add display output to example

---------

Co-authored-by: Soledad Galli <solegalli@protonmail.com>
---
 feature_engine/outliers/artbitrary.py | 21 ++++++++++
 feature_engine/outliers/trimmer.py    | 55 +++++++++++++++++++++++++++
 feature_engine/outliers/winsorizer.py | 42 ++++++++++++++++++++
 3 files changed, 118 insertions(+)

diff --git a/feature_engine/outliers/artbitrary.py b/feature_engine/outliers/artbitrary.py
index d010aecd6..ceb7f2cfc 100644
--- a/feature_engine/outliers/artbitrary.py
+++ b/feature_engine/outliers/artbitrary.py
@@ -91,6 +91,27 @@ class ArbitraryOutlierCapper(BaseOutlier):
     transform:
         Cap the variables.
 
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.outliers import ArbitraryOutlierCapper
+    >>> X = pd.DataFrame(dict(x1 = [1,2,3,4,5,6,7,8,9,10]))
+    >>> aoc = ArbitraryOutlierCapper(max_capping_dict=dict(x1 =  8),
+    >>>                              min_capping_dict=dict(x1 = 2))
+    >>> aoc.fit(X)
+    >>> aoc.transform(X)
+       x1
+    0   2
+    1   2
+    2   3
+    3   4
+    4   5
+    5   6
+    6   7
+    7   8
+    8   8
+    9   8
     """
 
     def __init__(
diff --git a/feature_engine/outliers/trimmer.py b/feature_engine/outliers/trimmer.py
index 55afbb579..897c91634 100644
--- a/feature_engine/outliers/trimmer.py
+++ b/feature_engine/outliers/trimmer.py
@@ -89,6 +89,61 @@ class OutlierTrimmer(WinsorizerBase):
     transform:
         Remove outliers.
 
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.outliers import OutlierTrimmer
+    >>> X = pd.DataFrame(dict(x = [0.49671,
+    >>>                         -0.1382,
+    >>>                          0.64768,
+    >>>                          1.52302,
+    >>>                         -0.2341,
+    >>>                         -17.2341,
+    >>>                          1.57921,
+    >>>                          0.76743,
+    >>>                         -0.4694,
+    >>>                          0.54256]))
+    >>> ot = OutlierTrimmer(capping_method='gaussian', tail='left', fold=3)
+    >>> ot.fit(X)
+    >>> ot.transform(X)
+              x
+    0   0.49671
+    1  -0.13820
+    2   0.64768
+    3   1.52302
+    4  -0.23410
+    5 -17.23410
+    6   1.57921
+    7   0.76743
+    8  -0.46940
+    9   0.54256
+
+    >>> import pandas as pd
+    >>> from feature_engine.outliers import OutlierTrimmer
+    >>> X = pd.DataFrame(dict(x = [0.49671,
+    >>>                         -0.1382,
+    >>>                          0.64768,
+    >>>                          1.52302,
+    >>>                         -0.2341,
+    >>>                         -17.2341,
+    >>>                          1.57921,
+    >>>                          0.76743,
+    >>>                         -0.4694,
+    >>>                          0.54256]))
+    >>> ot = OutlierTrimmer(capping_method='mad', tail='left', fold=3)
+    >>> ot.fit(X)
+    >>> ot.transform(X)
+             x
+    0  0.49671
+    1 -0.13820
+    2  0.64768
+    3  1.52302
+    4 -0.23410
+    6  1.57921
+    7  0.76743
+    8 -0.46940
+    9  0.54256
     """
 
     def transform(self, X: pd.DataFrame) -> pd.DataFrame:
diff --git a/feature_engine/outliers/winsorizer.py b/feature_engine/outliers/winsorizer.py
index 92480438d..4a44e6052 100644
--- a/feature_engine/outliers/winsorizer.py
+++ b/feature_engine/outliers/winsorizer.py
@@ -97,6 +97,48 @@ class Winsorizer(WinsorizerBase):
     transform:
         Cap the variables.
 
+    Examples
+    --------
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.outliers import Winsorizer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.normal(size = 10)))
+    >>> wz = Winsorizer(capping_method='mad', tail='both', fold=3)
+    >>> wz.fit(X)
+    >>> wz.transform(X)
+              x
+    0  0.496714
+    1 -0.138264
+    2  0.647689
+    3  1.523030
+    4 -0.234153
+    5 -0.234137
+    6  1.579213
+    7  0.767435
+    8 -0.469474
+    9  0.542560
+
+    >>> import numpy as np
+    >>> import pandas as pd
+    >>> from feature_engine.outliers import Winsorizer
+    >>> np.random.seed(42)
+    >>> X = pd.DataFrame(dict(x = np.random.normal(size = 10)))
+    >>> wz = Winsorizer(capping_method='mad', tail='both', fold=3)
+    >>> wz.fit(X)
+    >>> wz.transform(X)
+              x
+    0  0.496714
+    1 -0.138264
+    2  0.647689
+    3  1.523030
+    4 -0.234153
+    5 -0.234137
+    6  1.579213
+    7  0.767435
+    8 -0.469474
+    9  0.542560
     """
 
     def __init__(

From 9d4db3ea974dc6a2ea004391caf77ded55d262a7 Mon Sep 17 00:00:00 2001
From: Alfonso Tobar <48638337+datacubeR@users.noreply.github.com>
Date: Thu, 23 Mar 2023 04:46:06 -0300
Subject: [PATCH 4/6] Add code examples in preprocessing and wrappers modules'
 docstrings (#643)

* Adding code examples for SkWrapper, MatchCategories, and MatchVariables

* Fixing details in the examples

* modify sparse parameter in ohe

---------

Co-authored-by: datacubeR <datacuber@pop-os.localdomain>
Co-authored-by: Soledad Galli <solegalli@protonmail.com>
---
 .../preprocessing/match_categories.py         | 21 +++++++++
 feature_engine/preprocessing/match_columns.py | 44 +++++++++++++++++++
 feature_engine/wrappers/wrappers.py           | 40 +++++++++++++++++
 3 files changed, 105 insertions(+)

diff --git a/feature_engine/preprocessing/match_categories.py b/feature_engine/preprocessing/match_categories.py
index f75709c3f..e4be96251 100644
--- a/feature_engine/preprocessing/match_categories.py
+++ b/feature_engine/preprocessing/match_categories.py
@@ -88,6 +88,27 @@ class MatchCategories(
 
     transform:
         Enforce the type of categorical variables as dtype `categorical`.
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.preprocessing import MatchCategories
+    >>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [4,5,6]))
+    >>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"], x2 = [5,6,4,7]))
+    >>> mc = MatchCategories(missing_values="ignore")
+    >>> mc.fit(X_train)
+    >>> mc.transform(X_train)
+      x1  x2
+    0  a   4
+    1  b   5
+    2  c   6
+    >>> mc.transform(X_test)
+        x1  x2
+    0    c   5
+    1    b   6
+    2    a   4
+    3  NaN   7
     """
 
     def __init__(
diff --git a/feature_engine/preprocessing/match_columns.py b/feature_engine/preprocessing/match_columns.py
index ff2b4f22a..c40243d13 100644
--- a/feature_engine/preprocessing/match_columns.py
+++ b/feature_engine/preprocessing/match_columns.py
@@ -100,6 +100,50 @@ class MatchVariables(BaseEstimator, TransformerMixin, GetFeatureNamesOutMixin):
 
     transform:
         Add or delete variables to match those observed in the train set.
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.preprocessing import MatchVariables
+    >>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [4,5,6]))
+    >>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"],
+    >>>                             x2 = [5,6,4,7],
+    >>>                             x3 = [1,1,1,1]))
+    >>> mv = MatchVariables(missing_values="ignore")
+    >>> mv.fit(X_train)
+    >>> mv.transform(X_train)
+    x1  x2
+    0  a   4
+    1  b   5
+    2  c   6
+    >>> mv.transform(X_test)
+    The following variables are dropped from the DataFrame: ['x3']
+      x1  x2
+    0  c   5
+    1  b   6
+    2  a   4
+    3  d   7
+
+    >>> import pandas as pd
+    >>> from feature_engine.preprocessing import MatchVariables
+    >>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"],
+    >>>                             x2 = [4,5,6], x3 = [1,1,1]))
+    >>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"], x2 = [5,6,4,7]))
+    >>> mv = MatchVariables(missing_values="ignore")
+    >>> mv.fit(X_train)
+    >>> mv.transform(X_train)
+      x1  x2  x3
+    0  a   4   1
+    1  b   5   1
+    2  c   6   1
+    >>> mv.transform(X_test)
+    The following variables are added to the DataFrame: ['x3']
+      x1  x2  x3
+    0  c   5 NaN
+    1  b   6 NaN
+    2  a   4 NaN
+    3  d   7 NaN
     """
 
     def __init__(
diff --git a/feature_engine/wrappers/wrappers.py b/feature_engine/wrappers/wrappers.py
index de4e8aa46..a621a85b5 100644
--- a/feature_engine/wrappers/wrappers.py
+++ b/feature_engine/wrappers/wrappers.py
@@ -144,6 +144,46 @@ class SklearnTransformerWrapper(BaseEstimator, TransformerMixin):
     See Also
     --------
     sklearn.compose.ColumnTransformer
+
+    Examples
+    --------
+
+    >>> import pandas as pd
+    >>> from feature_engine.wrappers import SklearnTransformerWrapper
+    >>> from sklearn.preprocessing import StandardScaler
+    >>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
+    >>> skw = SklearnTransformerWrapper(StandardScaler())
+    >>> skw.fit(X)
+    >>> skw.transform(X)
+      x1        x2        x3
+    0  a -1.224745 -1.224745
+    1  b  0.000000  0.000000
+    2  c  1.224745  1.224745
+
+    >>> import pandas as pd
+    >>> from feature_engine.wrappers import SklearnTransformerWrapper
+    >>> from sklearn.preprocessing import OneHotEncoder
+    >>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
+    >>> skw = SklearnTransformerWrapper(
+    >>>     OneHotEncoder(sparse_output = False), variables = "x1")
+    >>> skw.fit(X)
+    >>> skw.transform(X)
+       x2  x3  x1_a  x1_b  x1_c
+    0   1   4   1.0   0.0   0.0
+    1   2   5   0.0   1.0   0.0
+    2   3   6   0.0   0.0   1.0
+
+    >>> import pandas as pd
+    >>> from feature_engine.wrappers import SklearnTransformerWrapper
+    >>> from sklearn.preprocessing import PolynomialFeatures
+    >>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
+    >>> skw = SklearnTransformerWrapper(PolynomialFeatures(include_bias = False))
+    >>> skw.fit(X)
+    >>> skw.transform(X)
+      x1   x2   x3  x2^2  x2 x3  x3^2
+    0  a  1.0  4.0   1.0    4.0  16.0
+    1  b  2.0  5.0   4.0   10.0  25.0
+    2  c  3.0  6.0   9.0   18.0  36.0
     """
 
     def __init__(

From e73772d7529baf3599193a488a544e3d043ee6d3 Mon Sep 17 00:00:00 2001
From: Soledad Galli <solegalli@protonmail.com>
Date: Mon, 27 Mar 2023 19:05:12 +0200
Subject: [PATCH 5/6] Release 1.6 (#617)

* release 1.6

* bump version

* fixes typo in username

* update what's new with latest commits

* add code examples contribution

* update what's new and fix typos
---
 .circleci/config.yml     |  2 +-
 docs/whats_new/index.rst |  1 +
 docs/whats_new/v_160.rst | 94 ++++++++++++++++++++++++++++++++++++++++
 feature_engine/VERSION   |  2 +-
 4 files changed, 97 insertions(+), 2 deletions(-)
 create mode 100644 docs/whats_new/v_160.rst

diff --git a/.circleci/config.yml b/.circleci/config.yml
index d7fbb25c5..cd3e2bffc 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -151,4 +151,4 @@ workflows:
           filters:
             branches:
               only:
-                - 1.5.X
\ No newline at end of file
+                - 1.6.X
\ No newline at end of file
diff --git a/docs/whats_new/index.rst b/docs/whats_new/index.rst
index 91cb58947..2a1106fb0 100644
--- a/docs/whats_new/index.rst
+++ b/docs/whats_new/index.rst
@@ -8,6 +8,7 @@ Find out what's new in each new version release.
 .. toctree::
    :maxdepth: 2
 
+   v_160
    v_150
    v_140
    v_130
diff --git a/docs/whats_new/v_160.rst b/docs/whats_new/v_160.rst
new file mode 100644
index 000000000..24eb89159
--- /dev/null
+++ b/docs/whats_new/v_160.rst
@@ -0,0 +1,94 @@
+Version 1.6.X
+=============
+
+Version 1.6.0
+-------------
+
+Deployed: 16th March 2023
+
+Contributors
+~~~~~~~~~~~~
+
+- `Gleb Levitski <https://github.com/GLevv>`_
+- `Morgan Sell <https://github.com/Morgan-Sell>`_
+- `Alfonso Tobar <https://github.com/datacubeR>`_
+- `Nodar Okroshiashvili <https://github.com/Okroshiashvili>`_
+- `Luís Seabra  <https://github.com/luismavs>`_
+- `Kyle Gilde <https://github.com/kylegilde>`_
+- `Soledad Galli <https://github.com/solegalli>`_
+
+In this release, we make Feature-engine transformers compatible with the `set_output`
+API from Scikit-learn, which was released in version 1.2.0. We also make Feature-engine
+compatible with the newest direction of pandas, in removing the `inplace` functionality
+that our transformers use under the hood.
+
+We introduce a major change: most of the **categorical encoders can now encode variables
+even if they have missing data**.
+
+We are also releasing **3 brand new transformers**: One for discretization, one for feature
+selection and one for operations between datetime variables.
+
+We also made a major improvement in the performance of the `DropDuplicateFeatures` and some
+smaller bug fixes here and there.
+
+We'd like to thank all contributors for fixing bugs and expanding the functionality
+and documentation of Feature-engine.
+
+Thank you so much to all contributors and to those of you who created issues flagging bugs or
+requesting new functionality.
+
+New transformers
+~~~~~~~~~~~~~~~~
+
+- **ProbeFeatureSelection**: introduces random features and selects variables whose importance is greater than the random ones (`Morgan Sell <https://github.com/Morgan-Sell>`_ and `Soledad Galli <https://github.com/solegalli>`_)
+- **DatetimeSubtraction**: creates new features by subtracting datetime variables (`Kyle Gilde <https://github.com/kylegilde>`_ and `Soledad Galli <https://github.com/solegalli>`_)
+- **GeometricWidthDiscretiser**: sorts continuous variables into intervals determined by geometric progression (`Gleb Levitski <https://github.com/GLevv>`_)
+
+New functionality
+~~~~~~~~~~~~~~~~~
+
+- Allow categorical encoders to encode variables with NaN (`Soledad Galli <https://github.com/solegalli>`_)
+- Make transformers compatible with new `set_output` functionality from sklearn (`Soledad Galli <https://github.com/solegalli>`_)
+- The `ArbitraryDiscretiser()` now includes the lowest limits in the intervals (`Soledad Galli <https://github.com/solegalli>`_)
+
+New modules
+~~~~~~~~~~~
+
+- New **Datasets** module with functions to load specific datasets (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- New **variable_handling** module with functions to automatically select numerical, categorical, or datetime variables (`Soledad Galli <https://github.com/solegalli>`_)
+
+Bug fixes
+~~~~~~~~~
+
+- Fixed bug in `DropFeatures()` (`Luís Seabra  <https://github.com/luismavs>`_)
+- Fixed bug in `RecursiveFeatureElimination()` caused when only 1 feature remained in data (`Soledad Galli <https://github.com/solegalli>`_)
+
+Documentation
+~~~~~~~~~~~~~
+
+- Add example code snippets to the selection module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Add example code snippets to the outlier module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Add example code snippets to the transformation module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Add example code snippets to the time series module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Add example code snippets to the preprocessing module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Add example code snippets to the wrapper module API docs (`Alfonso Tobar <https://github.com/datacubeR>`_)
+- Updated documentation using new Dataset module (`Alfonso Tobar <https://github.com/datacubeR>`_ and `Soledad Galli <https://github.com/solegalli>`_)
+- Reorganized Readme badges (`Gleb Levitski <https://github.com/GLevv>`_)
+- New Jupyter notebooks for `GeometricWidthDiscretiser` (`Gleb Levitski <https://github.com/GLevv>`_)
+- Fixed typos (`Gleb Levitski <https://github.com/GLevv>`_)
+- Remove examples using the boston house dataset (`Soledad Galli <https://github.com/solegalli>`_)
+- Update sponsor page and contribute page (`Soledad Galli <https://github.com/solegalli>`_)
+
+
+Deprecations
+~~~~~~~~~~~~
+
+- The class `PRatioEncoder` is no longer supported and was removed from the API (`Soledad Galli <https://github.com/solegalli>`_)
+
+Code improvements
+~~~~~~~~~~~~~~~~~
+
+- Massive improvement in the performance (speed) of `DropDuplicateFeatures()` (`Nodar Okroshiashvili <https://github.com/Okroshiashvili>`_)
+- Remove `inplace` and other issues related to pandas new direction (`Luís Seabra  <https://github.com/luismavs>`_)
+- Move most docstrings to dedicated docstrings module  (`Soledad Galli <https://github.com/solegalli>`_)
+- Unnest tests for encoders (`Soledad Galli <https://github.com/solegalli>`_)
diff --git a/feature_engine/VERSION b/feature_engine/VERSION
index 4cda8f19e..dc1e644a1 100644
--- a/feature_engine/VERSION
+++ b/feature_engine/VERSION
@@ -1 +1 @@
-1.5.2
+1.6.0

From feddb0628c9e7ffe866418a81c697cecc62c1987 Mon Sep 17 00:00:00 2001
From: Claudio Salvatore Arcidiacono
 <22871978+ClaudioSalvatoreArcidiacono@users.noreply.github.com>
Date: Sat, 22 Apr 2023 14:14:52 +0200
Subject: [PATCH 6/6] refactor code to work with pandas 2.0 (#660)

* Transform positional argument into keyword argument

From pandas 2.0 any only accepts keyworkd arguments
ref https://github.com/pandas-dev/pandas/pull/44896

* Change how reciprocal is computed

I have not fully understood why this solve the problem, but splitting
the operation in 2 lines does not seem to work

* Catch warnings from pandas.to_datetime

Now pandas.to_datetime raises a warning when the column cannot be converted

* check_dtype=False in tests datetime features

Pandas dataframes created from python integers are created with int
column types `int64` but the operation tested returns `int32` which
caused issues

* Use droplevel before merging

Merging dfs with different column lelvels has been disallowed
ref https://github.com/pandas-dev/pandas/issues/34862

* Change expected values for months

I am not sure why this caused an issue, maybe due to type casting?

* run black

* run black on tests

* isort _variable_type_checks.py

* Fix datetime_subtraction

---------

Co-authored-by: Claudio Salvatore Arcidiacono <claudio.arcidiacono@mollie.com>
---
 .../datetime/datetime_subtraction.py          |  2 +-
 .../imputation/drop_missing_data.py           |  2 +-
 feature_engine/transformation/reciprocal.py   |  6 ++---
 .../_variable_type_checks.py                  |  8 +++---
 tests/test_datetime/test_datetime_features.py | 26 ++++++++++++++-----
 .../test_forecasting/test_window_features.py  |  4 +++
 6 files changed, 34 insertions(+), 14 deletions(-)

diff --git a/feature_engine/datetime/datetime_subtraction.py b/feature_engine/datetime/datetime_subtraction.py
index 2a900a280..8fdb83b2e 100644
--- a/feature_engine/datetime/datetime_subtraction.py
+++ b/feature_engine/datetime/datetime_subtraction.py
@@ -318,7 +318,7 @@ def _sub(self, dt_df: pd.DataFrame):
             new_df[new_varnames] = (
                 dt_df[self.variables_]
                 .sub(dt_df[reference], axis=0)
-                .apply(lambda s: s / np.timedelta64(1, self.output_unit))
+                .div(np.timedelta64(1, self.output_unit).astype("timedelta64[ns]"))
             )
 
         if self.new_variables_names is not None:
diff --git a/feature_engine/imputation/drop_missing_data.py b/feature_engine/imputation/drop_missing_data.py
index cd9e6fe12..c6af28366 100644
--- a/feature_engine/imputation/drop_missing_data.py
+++ b/feature_engine/imputation/drop_missing_data.py
@@ -205,7 +205,7 @@ def return_na_data(self, X: pd.DataFrame) -> pd.DataFrame:
             idx = pd.isnull(X[self.variables_]).mean(axis=1) >= self.threshold
             idx = idx[idx]
         else:
-            idx = pd.isnull(X[self.variables_]).any(1)
+            idx = pd.isnull(X[self.variables_]).any(axis=1)
             idx = idx[idx]
 
         return X.loc[idx.index, :]
diff --git a/feature_engine/transformation/reciprocal.py b/feature_engine/transformation/reciprocal.py
index 05546952e..66fe7f38b 100644
--- a/feature_engine/transformation/reciprocal.py
+++ b/feature_engine/transformation/reciprocal.py
@@ -96,7 +96,6 @@ class ReciprocalTransformer(BaseNumericalTransformer):
     def __init__(
         self, variables: Union[None, int, str, List[Union[str, int]]] = None
     ) -> None:
-
         self.variables = _check_init_parameter_variables(variables)
 
     def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
@@ -152,8 +151,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         # transform
         # for some reason reciprocal does not work with integers
-        X.loc[:, self.variables_] = X.loc[:, self.variables_].astype("float")
-        X.loc[:, self.variables_] = np.reciprocal(X.loc[:, self.variables_])
+        X.loc[:, self.variables_] = np.reciprocal(
+            X.loc[:, self.variables_].astype("float")
+        )
 
         return X
 
diff --git a/feature_engine/variable_handling/_variable_type_checks.py b/feature_engine/variable_handling/_variable_type_checks.py
index 4031a0597..fe4f0ac2d 100644
--- a/feature_engine/variable_handling/_variable_type_checks.py
+++ b/feature_engine/variable_handling/_variable_type_checks.py
@@ -1,3 +1,5 @@
+import warnings
+
 import pandas as pd
 from pandas.core.dtypes.common import is_categorical_dtype as is_categorical
 from pandas.core.dtypes.common import is_datetime64_any_dtype as is_datetime
@@ -6,7 +8,6 @@
 
 
 def _is_categorical_and_is_not_datetime(column: pd.Series) -> bool:
-
     # check for datetime only if object cannot be cast as numeric because
     # if it could pd.to_datetime would convert it to datetime regardless
     if is_object(column):
@@ -25,7 +26,9 @@ def _is_categories_num(column: pd.Series) -> bool:
 
 
 def _is_convertible_to_dt(column: pd.Series) -> bool:
-    return is_datetime(pd.to_datetime(column, errors="ignore", utc=True))
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        return is_datetime(pd.to_datetime(column, errors="ignore", utc=True))
 
 
 def _is_convertible_to_num(column: pd.Series) -> bool:
@@ -33,7 +36,6 @@ def _is_convertible_to_num(column: pd.Series) -> bool:
 
 
 def _is_categorical_and_is_datetime(column: pd.Series) -> bool:
-
     # check for datetime only if object cannot be cast as numeric because
     # if it could pd.to_datetime would convert it to datetime regardless
     if is_object(column):
diff --git a/tests/test_datetime/test_datetime_features.py b/tests/test_datetime/test_datetime_features.py
index 410e14d68..1727f27b6 100644
--- a/tests/test_datetime/test_datetime_features.py
+++ b/tests/test_datetime/test_datetime_features.py
@@ -183,6 +183,7 @@ def test_extract_datetime_features_with_default_options(
         df_datetime_transformed[
             vars_non_dt + [var + feat for var in vars_dt for feat in feat_names_default]
         ],
+        check_dtype=False,
     )
 
 
@@ -198,6 +199,7 @@ def test_extract_datetime_features_from_specified_variables(
             + ["datetime_range", "date_obj2", "time_obj"]
             + ["date_obj1" + feat for feat in feat_names_default]
         ],
+        check_dtype=False,
     )
 
     # multiple datetime variables
@@ -215,6 +217,7 @@ def test_extract_datetime_features_from_specified_variables(
                 for feat in feat_names_default
             ]
         ],
+        check_dtype=False,
     )
 
     # multiple datetime variables in different order than they appear in the df
@@ -232,6 +235,7 @@ def test_extract_datetime_features_from_specified_variables(
                 for feat in feat_names_default
             ]
         ],
+        check_dtype=False,
     )
 
     # datetime variable is index
@@ -251,12 +255,15 @@ def test_extract_datetime_features_from_specified_variables(
             ],
             axis=1,
         ),
+        check_dtype=False,
     )
 
 
 def test_extract_all_datetime_features(df_datetime, df_datetime_transformed):
     X = DatetimeFeatures(features_to_extract="all").fit_transform(df_datetime)
-    pd.testing.assert_frame_equal(X, df_datetime_transformed.drop(vars_dt, axis=1))
+    pd.testing.assert_frame_equal(
+        X, df_datetime_transformed.drop(vars_dt, axis=1), check_dtype=False
+    )
 
 
 def test_extract_specified_datetime_features(df_datetime, df_datetime_transformed):
@@ -269,6 +276,7 @@ def test_extract_specified_datetime_features(df_datetime, df_datetime_transforme
             vars_non_dt
             + [var + "_" + feat for var in vars_dt for feat in ["semester", "week"]]
         ],
+        check_dtype=False,
     )
 
     # different order than they appear in the glossary
@@ -281,6 +289,7 @@ def test_extract_specified_datetime_features(df_datetime, df_datetime_transforme
             vars_non_dt
             + [var + "_" + feat for var in vars_dt for feat in ["hour", "day_of_week"]]
         ],
+        check_dtype=False,
     )
 
 
@@ -290,7 +299,9 @@ def test_extract_features_from_categorical_variable(
     cat_date = pd.DataFrame({"date_obj1": df_datetime["date_obj1"].astype("category")})
     X = DatetimeFeatures(variables="date_obj1").fit_transform(cat_date)
     pd.testing.assert_frame_equal(
-        X, df_datetime_transformed[["date_obj1" + feat for feat in feat_names_default]]
+        X,
+        df_datetime_transformed[["date_obj1" + feat for feat in feat_names_default]],
+        check_dtype=False,
     )
 
 
@@ -311,6 +322,7 @@ def test_extract_features_from_different_timezones(
         df_datetime_transformed[["time_obj_hour"]].apply(
             lambda x: x.subtract(time_zones)
         ),
+        check_dtype=False,
     )
     exp_err_msg = (
         "ValueError: variable(s) time_obj "
@@ -356,7 +368,7 @@ def test_extract_features_from_localized_tz_variables():
     # transform
     X = transformer.transform(tz_df)
     df_expected = pd.DataFrame({"date_var_hour": [1, 2, 2, 2, 2, 3, 3]})
-    pd.testing.assert_frame_equal(X, df_expected)
+    pd.testing.assert_frame_equal(X, df_expected, check_dtype=False)
 
     # when utc is True
     transformer = DatetimeFeatures(features_to_extract=["hour"], utc=True).fit(tz_df)
@@ -372,7 +384,7 @@ def test_extract_features_from_localized_tz_variables():
     # transform
     X = transformer.transform(tz_df)
     df_expected = pd.DataFrame({"date_var_hour": [5, 6, 6, 6, 6, 7, 7]})
-    pd.testing.assert_frame_equal(X, df_expected)
+    pd.testing.assert_frame_equal(X, df_expected, check_dtype=False)
 
 
 def test_extract_features_without_dropping_original_variables(
@@ -399,6 +411,7 @@ def test_extract_features_without_dropping_original_variables(
             ],
             axis=1,
         ),
+        check_dtype=False,
     )
 
 
@@ -435,6 +448,7 @@ def test_extract_features_with_different_datetime_parsing_options(df_datetime):
     pd.testing.assert_frame_equal(
         X,
         pd.DataFrame({"date_obj2_day_of_month": [10, 31, 30, 17]}),
+        check_dtype=False,
     )
 
     X = DatetimeFeatures(features_to_extract=["year"], yearfirst=True).fit_transform(
@@ -443,6 +457,7 @@ def test_extract_features_with_different_datetime_parsing_options(df_datetime):
     pd.testing.assert_frame_equal(
         X,
         pd.DataFrame({"date_obj2_year": [2010, 2009, 1995, 2004]}),
+        check_dtype=False,
     )
 
 
@@ -457,8 +472,7 @@ def test_get_feature_names_out(df_datetime, df_datetime_transformed):
         transformer.get_feature_names_out(input_features=vars_dt)
 
     with pytest.raises(ValueError):
-        transformer.get_feature_names_out(input_features=["date_obj1"])\
-
+        transformer.get_feature_names_out(input_features=["date_obj1"])
     # default features from 1 variable
     transformer = DatetimeFeatures(variables="date_obj1")
     X = transformer.fit_transform(df_datetime)
diff --git a/tests/test_time_series/test_forecasting/test_window_features.py b/tests/test_time_series/test_forecasting/test_window_features.py
index 344f90e3f..a03259b7e 100644
--- a/tests/test_time_series/test_forecasting/test_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_window_features.py
@@ -380,8 +380,10 @@ def test_multiple_windows(df_time):
     X = df_time.copy()
     num_vars = ["ambient_temp", "module_temp", "irradiation"]
     tmp = X[num_vars].rolling(2).agg(["sum", "mean"]).shift(periods=15, freq="min")
+    tmp.columns = tmp.columns.droplevel()
     X_tr = X.merge(tmp, left_index=True, right_index=True, how="left")
     tmp = X[num_vars].rolling(3).agg(["sum", "mean"]).shift(periods=15, freq="min")
+    tmp.columns = tmp.columns.droplevel()
     X_tr = X_tr.merge(tmp, left_index=True, right_index=True, how="left")
     X_tr.columns = transformer.get_feature_names_out()
 
@@ -404,6 +406,7 @@ def test_multiple_windows(df_time):
         .agg(["sum", "mean"])
         .shift(freq="30min")
     )
+    tmp.columns = tmp.columns.droplevel()
     X_tr = X.merge(tmp, left_index=True, right_index=True, how="left")
     tmp = (
         X[["ambient_temp", "irradiation"]]
@@ -411,6 +414,7 @@ def test_multiple_windows(df_time):
         .agg(["sum", "mean"])
         .shift(freq="30min")
     )
+    tmp.columns = tmp.columns.droplevel()
     X_tr = X_tr.merge(tmp, left_index=True, right_index=True, how="left")
     X_tr.columns = transformer.get_feature_names_out()