From e4fbcf5c94d6776b9645520831450828da27af89 Mon Sep 17 00:00:00 2001 From: Valentin Laurent Date: Fri, 13 Dec 2024 17:59:18 +0100 Subject: [PATCH 1/2] REFACTO: split MapieRegressor.fit into .init_fit, .fit_estimator, and .conformalize, split EnsembleRegressor .fit into .fit_single_estimator and .fit_multi_estimators, remove EnsembleEstimator useless interface --- mapie/estimator/__init__.py | 2 - mapie/estimator/classifier.py | 3 +- mapie/estimator/interface.py | 40 ----------- mapie/estimator/regressor.py | 120 ++++++++++++++++++++++++--------- mapie/regression/regression.py | 70 ++++++++++++++++--- mapie/tests/test_regression.py | 18 +++++ 6 files changed, 168 insertions(+), 85 deletions(-) delete mode 100644 mapie/estimator/interface.py diff --git a/mapie/estimator/__init__.py b/mapie/estimator/__init__.py index 5758db9e6..f4b325fed 100644 --- a/mapie/estimator/__init__.py +++ b/mapie/estimator/__init__.py @@ -1,9 +1,7 @@ -from .interface import EnsembleEstimator from .regressor import EnsembleRegressor from .classifier import EnsembleClassifier __all__ = [ - "EnsembleEstimator", "EnsembleRegressor", "EnsembleClassifier", ] diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py index ac882996a..0777b9673 100644 --- a/mapie/estimator/classifier.py +++ b/mapie/estimator/classifier.py @@ -10,11 +10,10 @@ from sklearn.utils.validation import _num_samples, check_is_fitted from mapie._typing import ArrayLike, NDArray -from mapie.estimator.interface import EnsembleEstimator from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes -class EnsembleClassifier(EnsembleEstimator): +class EnsembleClassifier: """ This class implements methods to handle the training and usage of the estimator. This estimator can be unique or composed by cross validated diff --git a/mapie/estimator/interface.py b/mapie/estimator/interface.py deleted file mode 100644 index e015d4d7c..000000000 --- a/mapie/estimator/interface.py +++ /dev/null @@ -1,40 +0,0 @@ -from __future__ import annotations - -from abc import ABCMeta, abstractmethod -from typing import Tuple, Union - -from mapie._typing import ArrayLike, NDArray - - -class EnsembleEstimator(metaclass=ABCMeta): - """ - This class implements methods to handle the training and usage of the - estimator. This estimator can be unique or composed by cross validated - estimators. - """ - - @abstractmethod - def fit( - self, - X: ArrayLike, - y: ArrayLike, - **kwargs - ) -> EnsembleEstimator: - """ - Fit the base estimator under the ``single_estimator_`` attribute. - Fit all cross-validated estimator clones - and rearrange them into a list, the ``estimators_`` attribute. - Out-of-fold conformity scores are stored under - the ``conformity_scores_`` attribute. - """ - - @abstractmethod - def predict( - self, - X: ArrayLike, - **kwargs - ) -> Union[NDArray, Tuple[NDArray, NDArray, NDArray]]: - """ - Predict target from X. It also computes the prediction per train sample - for each test sample according to ``self.method``. - """ diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py index d300863a9..bad8988ca 100644 --- a/mapie/estimator/regressor.py +++ b/mapie/estimator/regressor.py @@ -6,17 +6,16 @@ from joblib import Parallel, delayed from sklearn.base import RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator -from sklearn.utils import _safe_indexing +from sklearn.utils import _safe_indexing, deprecated from sklearn.utils.validation import _num_samples, check_is_fitted from mapie._typing import ArrayLike, NDArray from mapie.aggregation_functions import aggregate_all, phi2D -from mapie.estimator.interface import EnsembleEstimator from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv, fit_estimator) -class EnsembleRegressor(EnsembleEstimator): +class EnsembleRegressor: """ This class implements methods to handle the training and usage of the estimator. This estimator can be unique or composed by cross validated @@ -409,6 +408,11 @@ def predict_calib( return y_pred + @deprecated( + "WARNING: EnsembleRegressor.fit is deprecated." + "Instead use EnsembleRegressor.fit_single_estimator" + "then EnsembleRegressor.fit_multi_estimators" + ) def fit( self, X: ArrayLike, @@ -451,42 +455,60 @@ def fit( EnsembleRegressor The estimator fitted. """ - # Initialization - single_estimator_: RegressorMixin - estimators_: List[RegressorMixin] = [] - full_indexes = np.arange(_num_samples(X)) - cv = self.cv - self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) - estimator = self.estimator + self.fit_single_estimator( + X, + y, + sample_weight, + groups, + **fit_params + ) + + self.fit_multi_estimators( + X, + y, + sample_weight, + groups, + **fit_params + ) + + return self + + def fit_multi_estimators( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params + ) -> EnsembleRegressor: + n_samples = _num_samples(y) + estimators: List[RegressorMixin] = [] - # Computation - if cv == "prefit": - single_estimator_ = estimator + if self.cv == "prefit": + + # Create a placeholder attribute 'k_' filled with NaN values + # This attribute is defined for consistency but + # is not used in prefit mode self.k_ = np.full( shape=(n_samples, 1), fill_value=np.nan, dtype=float ) + else: - single_estimator_ = self._fit_oof_estimator( - clone(estimator), - X, - y, - full_indexes, - sample_weight, - **fit_params - ) - cv = cast(BaseCrossValidator, cv) + cv = cast(BaseCrossValidator, self.cv) self.k_ = np.full( shape=(n_samples, cv.get_n_splits(X, y, groups)), fill_value=np.nan, dtype=float, ) - if self.method == "naive": - estimators_ = [single_estimator_] - else: - estimators_ = Parallel(self.n_jobs, verbose=self.verbose)( + + if self.method != "naive": + estimators = Parallel( + self.n_jobs, + verbose=self.verbose + )( delayed(self._fit_oof_estimator)( - clone(estimator), + clone(self.estimator), X, y, train_index, @@ -495,13 +517,47 @@ def fit( ) for train_index, _ in cv.split(X, y, groups) ) - # In split-CP, we keep only the model fitted on train dataset - if self.use_split_method_: - single_estimator_ = estimators_[0] - self.single_estimator_ = single_estimator_ - self.estimators_ = estimators_ + self.estimators_ = estimators + + return self + + def fit_single_estimator( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **fit_params + ) -> EnsembleRegressor: + + self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_) + single_estimator_: RegressorMixin + + if self.cv == "prefit": + single_estimator_ = self.estimator + else: + cv = cast(BaseCrossValidator, self.cv) + if self.use_split_method_: + train_indexes = [ + train_index for train_index, test_index in cv.split( + X, y, groups) + ][0] + indexes = train_indexes + else: + full_indexes = np.arange(_num_samples(X)) + indexes = full_indexes + + single_estimator_ = self._fit_oof_estimator( + clone(self.estimator), + X, + y, + indexes, + sample_weight, + **fit_params + ) + self.single_estimator_ = single_estimator_ return self def predict( diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 8d6e10ffc..950a9f6af 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -513,12 +513,26 @@ def fit( MapieRegressor The model itself. """ - fit_params = kwargs.pop('fit_params', {}) - predict_params = kwargs.pop('predict_params', {}) - if len(predict_params) > 0: - self._predict_params = True - else: - self._predict_params = False + + X, y, sample_weight, groups = self.init_fit( + X, y, sample_weight, groups, **kwargs + ) + + self.fit_estimator(X, y, sample_weight, groups) + self.conformalize(X, y, sample_weight, groups, **kwargs) + + return self + + def init_fit( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **kwargs: Any + ): + + self._fit_params = kwargs.pop('fit_params', {}) # Checks (estimator, @@ -540,9 +554,47 @@ def fit( self.test_size, self.verbose ) - # Fit the prediction function - self.estimator_ = self.estimator_.fit( - X, y, sample_weight=sample_weight, groups=groups, **fit_params + + return ( + X, y, sample_weight, groups + ) + + def fit_estimator( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + ) -> MapieRegressor: + + self.estimator_.fit_single_estimator( + X, + y, + sample_weight=sample_weight, + groups=groups, + **self._fit_params + ) + + return self + + def conformalize( + self, + X: ArrayLike, + y: ArrayLike, + sample_weight: Optional[ArrayLike] = None, + groups: Optional[ArrayLike] = None, + **kwargs: Any + ) -> MapieRegressor: + + predict_params = kwargs.pop('predict_params', {}) + self._predict_params = len(predict_params) > 0 + + self.estimator_.fit_multi_estimators( + X, + y, + sample_weight, + groups, + **self._fit_params ) # Predict on calibration data diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 9fe6f9c5c..1840ddf91 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -1036,3 +1036,21 @@ def test_check_change_method_to_base(method: str, cv: str) -> None: ) mapie_reg.fit(X_val, y_val) assert mapie_reg.method == "base" + + +def test_deprecated_ensemble_regressor_fit_warning() -> None: + ens_reg = EnsembleRegressor( + LinearRegression(), + "plus", + KFold(n_splits=5, random_state=None, shuffle=True), + "nonsense", + None, + random_state, + 0.20, + False + ) + with pytest.warns( + FutureWarning, + match=r".WARNING: EnsembleRegressor.fit is deprecated.*" + ): + ens_reg.fit(X, y) From 88a26f898dbac77b59d4e303286e6075f5c9db87 Mon Sep 17 00:00:00 2001 From: Valentin Laurent Date: Fri, 13 Dec 2024 18:17:38 +0100 Subject: [PATCH 2/2] DOC: update HISTORY.rst for this PR and the previous one --- HISTORY.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/HISTORY.rst b/HISTORY.rst index 18a4fe855..af40fbb2b 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -12,6 +12,8 @@ History * Fix issue 528 to correct broken ENS image in the documentation * Fix issue 548 to correct labels generated in tutorial * Fix issue 547 to fix wrong warning +* Fix issue 480 (correct display of mathematical equations in generated notebooks) +* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit 0.9.1 (2024-09-13) ------------------