Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

REFACTO: split MapieRegressor.fit into .init_fit, .fit_estimator, and .conformalize, split EnsembleRegressor.fit into .fit_single_estimator and .fit_multi_estimators, remove EnsembleEstimator useless interface #564

Merged
merged 2 commits into from
Dec 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ History
* Fix issue 528 to correct broken ENS image in the documentation
* Fix issue 548 to correct labels generated in tutorial
* Fix issue 547 to fix wrong warning
* Fix issue 480 (correct display of mathematical equations in generated notebooks)
* Refactor MapieRegressor and EnsembleRegressor, deprecate EnsembleRegressor.fit

0.9.1 (2024-09-13)
------------------
Expand Down
2 changes: 0 additions & 2 deletions mapie/estimator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from .interface import EnsembleEstimator
from .regressor import EnsembleRegressor
from .classifier import EnsembleClassifier

__all__ = [
"EnsembleEstimator",
"EnsembleRegressor",
"EnsembleClassifier",
]
3 changes: 1 addition & 2 deletions mapie/estimator/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
from sklearn.utils.validation import _num_samples, check_is_fitted

from mapie._typing import ArrayLike, NDArray
from mapie.estimator.interface import EnsembleEstimator
from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes


class EnsembleClassifier(EnsembleEstimator):
class EnsembleClassifier:
"""
This class implements methods to handle the training and usage of the
estimator. This estimator can be unique or composed by cross validated
Expand Down
40 changes: 0 additions & 40 deletions mapie/estimator/interface.py

This file was deleted.

120 changes: 88 additions & 32 deletions mapie/estimator/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
from joblib import Parallel, delayed
from sklearn.base import RegressorMixin, clone
from sklearn.model_selection import BaseCrossValidator
from sklearn.utils import _safe_indexing
from sklearn.utils import _safe_indexing, deprecated
from sklearn.utils.validation import _num_samples, check_is_fitted

from mapie._typing import ArrayLike, NDArray
from mapie.aggregation_functions import aggregate_all, phi2D
from mapie.estimator.interface import EnsembleEstimator
from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv,
fit_estimator)


class EnsembleRegressor(EnsembleEstimator):
class EnsembleRegressor:
"""
This class implements methods to handle the training and usage of the
estimator. This estimator can be unique or composed by cross validated
Expand Down Expand Up @@ -409,6 +408,11 @@ def predict_calib(

return y_pred

@deprecated(
"WARNING: EnsembleRegressor.fit is deprecated."
"Instead use EnsembleRegressor.fit_single_estimator"
"then EnsembleRegressor.fit_multi_estimators"
)
def fit(
self,
X: ArrayLike,
Expand Down Expand Up @@ -451,42 +455,60 @@ def fit(
EnsembleRegressor
The estimator fitted.
"""
# Initialization
single_estimator_: RegressorMixin
estimators_: List[RegressorMixin] = []
full_indexes = np.arange(_num_samples(X))
cv = self.cv
self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
estimator = self.estimator
self.fit_single_estimator(
X,
y,
sample_weight,
groups,
**fit_params
)

self.fit_multi_estimators(
X,
y,
sample_weight,
groups,
**fit_params
)

return self

def fit_multi_estimators(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleRegressor:

n_samples = _num_samples(y)
estimators: List[RegressorMixin] = []

# Computation
if cv == "prefit":
single_estimator_ = estimator
if self.cv == "prefit":

# Create a placeholder attribute 'k_' filled with NaN values
# This attribute is defined for consistency but
# is not used in prefit mode
self.k_ = np.full(
shape=(n_samples, 1), fill_value=np.nan, dtype=float
)

else:
single_estimator_ = self._fit_oof_estimator(
clone(estimator),
X,
y,
full_indexes,
sample_weight,
**fit_params
)
cv = cast(BaseCrossValidator, cv)
cv = cast(BaseCrossValidator, self.cv)
self.k_ = np.full(
shape=(n_samples, cv.get_n_splits(X, y, groups)),
fill_value=np.nan,
dtype=float,
)
if self.method == "naive":
estimators_ = [single_estimator_]
else:
estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(

if self.method != "naive":
estimators = Parallel(
self.n_jobs,
verbose=self.verbose
)(
delayed(self._fit_oof_estimator)(
clone(estimator),
clone(self.estimator),
X,
y,
train_index,
Expand All @@ -495,13 +517,47 @@ def fit(
)
for train_index, _ in cv.split(X, y, groups)
)
# In split-CP, we keep only the model fitted on train dataset
if self.use_split_method_:
single_estimator_ = estimators_[0]

self.single_estimator_ = single_estimator_
self.estimators_ = estimators_
self.estimators_ = estimators

return self

def fit_single_estimator(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleRegressor:

self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
single_estimator_: RegressorMixin

if self.cv == "prefit":
single_estimator_ = self.estimator
else:
cv = cast(BaseCrossValidator, self.cv)
if self.use_split_method_:
train_indexes = [
train_index for train_index, test_index in cv.split(
X, y, groups)
][0]
indexes = train_indexes
else:
full_indexes = np.arange(_num_samples(X))
indexes = full_indexes

single_estimator_ = self._fit_oof_estimator(
clone(self.estimator),
X,
y,
indexes,
sample_weight,
**fit_params
)

self.single_estimator_ = single_estimator_
return self

def predict(
Expand Down
70 changes: 61 additions & 9 deletions mapie/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,12 +513,26 @@ def fit(
MapieRegressor
The model itself.
"""
fit_params = kwargs.pop('fit_params', {})
predict_params = kwargs.pop('predict_params', {})
if len(predict_params) > 0:
self._predict_params = True
else:
self._predict_params = False

X, y, sample_weight, groups = self.init_fit(
X, y, sample_weight, groups, **kwargs
)

self.fit_estimator(X, y, sample_weight, groups)
self.conformalize(X, y, sample_weight, groups, **kwargs)

return self

def init_fit(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**kwargs: Any
):

self._fit_params = kwargs.pop('fit_params', {})

# Checks
(estimator,
Expand All @@ -540,9 +554,47 @@ def fit(
self.test_size,
self.verbose
)
# Fit the prediction function
self.estimator_ = self.estimator_.fit(
X, y, sample_weight=sample_weight, groups=groups, **fit_params

return (
X, y, sample_weight, groups
)

def fit_estimator(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
) -> MapieRegressor:

self.estimator_.fit_single_estimator(
X,
y,
sample_weight=sample_weight,
groups=groups,
**self._fit_params
)

return self

def conformalize(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**kwargs: Any
) -> MapieRegressor:

predict_params = kwargs.pop('predict_params', {})
self._predict_params = len(predict_params) > 0

self.estimator_.fit_multi_estimators(
X,
y,
sample_weight,
groups,
**self._fit_params
)

# Predict on calibration data
Expand Down
18 changes: 18 additions & 0 deletions mapie/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,3 +1036,21 @@ def test_check_change_method_to_base(method: str, cv: str) -> None:
)
mapie_reg.fit(X_val, y_val)
assert mapie_reg.method == "base"


def test_deprecated_ensemble_regressor_fit_warning() -> None:
ens_reg = EnsembleRegressor(
LinearRegression(),
"plus",
KFold(n_splits=5, random_state=None, shuffle=True),
"nonsense",
None,
random_state,
0.20,
False
)
with pytest.warns(
FutureWarning,
match=r".WARNING: EnsembleRegressor.fit is deprecated.*"
):
ens_reg.fit(X, y)
Loading