scikit-learn-contrib · Valentin-Laurent · Dec 16, 2024 · Dec 13, 2024 · Dec 13, 2024
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -12,6 +12,8 @@ History
 * Fix issue 528 to correct broken ENS image in the documentation
 * Fix issue 548 to correct labels generated in tutorial
 * Fix issue 547 to fix wrong warning
+* Fix issue 480 (correct display of mathematical equations in generated notebooks)
+* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit
 
 0.9.1 (2024-09-13)
 ------------------

diff --git a/mapie/estimator/__init__.py b/mapie/estimator/__init__.py
@@ -1,9 +1,7 @@
-from .interface import EnsembleEstimator
 from .regressor import EnsembleRegressor
 from .classifier import EnsembleClassifier
 
 __all__ = [
-    "EnsembleEstimator",
     "EnsembleRegressor",
     "EnsembleClassifier",
 ]
diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py
@@ -10,11 +10,10 @@
 from sklearn.utils.validation import _num_samples, check_is_fitted
 
 from mapie._typing import ArrayLike, NDArray
-from mapie.estimator.interface import EnsembleEstimator
 from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes
 
 
-class EnsembleClassifier(EnsembleEstimator):
+class EnsembleClassifier:
     """
     This class implements methods to handle the training and usage of the
     estimator. This estimator can be unique or composed by cross validated

diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py
diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py
@@ -6,17 +6,16 @@
 from joblib import Parallel, delayed
 from sklearn.base import RegressorMixin, clone
 from sklearn.model_selection import BaseCrossValidator
-from sklearn.utils import _safe_indexing
+from sklearn.utils import _safe_indexing, deprecated
 from sklearn.utils.validation import _num_samples, check_is_fitted
 
 from mapie._typing import ArrayLike, NDArray
 from mapie.aggregation_functions import aggregate_all, phi2D
-from mapie.estimator.interface import EnsembleEstimator
 from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv,
                          fit_estimator)
 
 
-class EnsembleRegressor(EnsembleEstimator):
+class EnsembleRegressor:
     """
     This class implements methods to handle the training and usage of the
     estimator. This estimator can be unique or composed by cross validated
@@ -409,6 +408,11 @@ def predict_calib(
 
         return y_pred
 
+    @deprecated(
+        "WARNING: EnsembleRegressor.fit is deprecated."
+        "Instead use EnsembleRegressor.fit_single_estimator"
+        "then EnsembleRegressor.fit_multi_estimators"
+    )
     def fit(
         self,
         X: ArrayLike,
@@ -451,42 +455,60 @@ def fit(
         EnsembleRegressor
             The estimator fitted.
         """
-        # Initialization
-        single_estimator_: RegressorMixin
-        estimators_: List[RegressorMixin] = []
-        full_indexes = np.arange(_num_samples(X))
-        cv = self.cv
-        self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
-        estimator = self.estimator
+        self.fit_single_estimator(
+            X,
+            y,
+            sample_weight,
+            groups,
+            **fit_params
+        )
+
+        self.fit_multi_estimators(
+            X,
+            y,
+            sample_weight,
+            groups,
+            **fit_params
+        )
+
+        return self
+
+    def fit_multi_estimators(
+        self,
+        X: ArrayLike,
+        y: ArrayLike,
+        sample_weight: Optional[ArrayLike] = None,
+        groups: Optional[ArrayLike] = None,
+        **fit_params
+    ) -> EnsembleRegressor:
+
         n_samples = _num_samples(y)
+        estimators: List[RegressorMixin] = []
 
-        # Computation
-        if cv == "prefit":
-            single_estimator_ = estimator
+        if self.cv == "prefit":
+
+            # Create a placeholder attribute 'k_' filled with NaN values
+            # This attribute is defined for consistency but
+            # is not used in prefit mode
             self.k_ = np.full(
                 shape=(n_samples, 1), fill_value=np.nan, dtype=float
             )
+
         else:
-            single_estimator_ = self._fit_oof_estimator(
-                clone(estimator),
-                X,
-                y,
-                full_indexes,
-                sample_weight,
-                **fit_params
-            )
-            cv = cast(BaseCrossValidator, cv)
+            cv = cast(BaseCrossValidator, self.cv)
             self.k_ = np.full(
                 shape=(n_samples, cv.get_n_splits(X, y, groups)),
                 fill_value=np.nan,
                 dtype=float,
             )
-            if self.method == "naive":
-                estimators_ = [single_estimator_]
-            else:
-                estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(
+
+            if self.method != "naive":
+                estimators = Parallel(
+                    self.n_jobs,
+                    verbose=self.verbose
+                )(
                     delayed(self._fit_oof_estimator)(
-                        clone(estimator),
+                        clone(self.estimator),
                         X,
                         y,
                         train_index,
@@ -495,13 +517,47 @@ def fit(
                     )
                     for train_index, _ in cv.split(X, y, groups)
                 )
-                # In split-CP, we keep only the model fitted on train dataset
-                if self.use_split_method_:
-                    single_estimator_ = estimators_[0]
 
-        self.single_estimator_ = single_estimator_
-        self.estimators_ = estimators_
+        self.estimators_ = estimators
+
+        return self
+
+    def fit_single_estimator(
+        self,
+        X: ArrayLike,
+        y: ArrayLike,
+        sample_weight: Optional[ArrayLike] = None,
+        groups: Optional[ArrayLike] = None,
+        **fit_params
+    ) -> EnsembleRegressor:
+
+        self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
+        single_estimator_: RegressorMixin
+
+        if self.cv == "prefit":
+            single_estimator_ = self.estimator
+        else:
+            cv = cast(BaseCrossValidator, self.cv)
+            if self.use_split_method_:
+                train_indexes = [
+                    train_index for train_index, test_index in cv.split(
+                        X, y, groups)
+                ][0]
+                indexes = train_indexes
+            else:
+                full_indexes = np.arange(_num_samples(X))
+                indexes = full_indexes
+
+            single_estimator_ = self._fit_oof_estimator(
+                    clone(self.estimator),
+                    X,
+                    y,
+                    indexes,
+                    sample_weight,
+                    **fit_params
+                )
 
+        self.single_estimator_ = single_estimator_
         return self
 
     def predict(

diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py
@@ -513,12 +513,26 @@ def fit(
         MapieRegressor
             The model itself.
         """
-        fit_params = kwargs.pop('fit_params', {})
-        predict_params = kwargs.pop('predict_params', {})
-        if len(predict_params) > 0:
-            self._predict_params = True
-        else:
-            self._predict_params = False
+
+        X, y, sample_weight, groups = self.init_fit(
+            X, y, sample_weight, groups, **kwargs
+        )
+
+        self.fit_estimator(X, y, sample_weight, groups)
+        self.conformalize(X, y, sample_weight, groups, **kwargs)
+
+        return self
+
+    def init_fit(
+        self,
+        X: ArrayLike,
+        y: ArrayLike,
+        sample_weight: Optional[ArrayLike] = None,
+        groups: Optional[ArrayLike] = None,
+        **kwargs: Any
+    ):
+
+        self._fit_params = kwargs.pop('fit_params', {})
 
         # Checks
         (estimator,
@@ -540,9 +554,47 @@ def fit(
             self.test_size,
             self.verbose
         )
-        # Fit the prediction function
-        self.estimator_ = self.estimator_.fit(
-            X, y, sample_weight=sample_weight, groups=groups, **fit_params
+
+        return (
+            X, y, sample_weight, groups
+        )
+
+    def fit_estimator(
+        self,
+        X: ArrayLike,
+        y: ArrayLike,
+        sample_weight: Optional[ArrayLike] = None,
+        groups: Optional[ArrayLike] = None,
+    ) -> MapieRegressor:
+
+        self.estimator_.fit_single_estimator(
+            X,
+            y,
+            sample_weight=sample_weight,
+            groups=groups,
+            **self._fit_params
+        )
+
+        return self
+
+    def conformalize(
+        self,
+        X: ArrayLike,
+        y: ArrayLike,
+        sample_weight: Optional[ArrayLike] = None,
+        groups: Optional[ArrayLike] = None,
+        **kwargs: Any
+    ) -> MapieRegressor:
+
+        predict_params = kwargs.pop('predict_params', {})
+        self._predict_params = len(predict_params) > 0
+
+        self.estimator_.fit_multi_estimators(
+            X,
+            y,
+            sample_weight,
+            groups,
+            **self._fit_params
         )
 
         # Predict on calibration data

diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py
@@ -1036,3 +1036,21 @@ def test_check_change_method_to_base(method: str, cv: str) -> None:
     )
     mapie_reg.fit(X_val, y_val)
     assert mapie_reg.method == "base"
+
+
+def test_deprecated_ensemble_regressor_fit_warning() -> None:
+    ens_reg = EnsembleRegressor(
+        LinearRegression(),
+        "plus",
+        KFold(n_splits=5, random_state=None, shuffle=True),
+        "nonsense",
+        None,
+        random_state,
+        0.20,
+        False
+    )
+    with pytest.warns(
+        FutureWarning,
+        match=r".WARNING: EnsembleRegressor.fit is deprecated.*"
+    ):
+        ens_reg.fit(X, y)