Skip to content

Commit

Permalink
Merge branch 'master' into refactor_cqr_split_fit
Browse files Browse the repository at this point in the history
  • Loading branch information
Valentin-Laurent authored Dec 16, 2024
2 parents 1b89d83 + e47171c commit fd9d293
Show file tree
Hide file tree
Showing 7 changed files with 170 additions and 86 deletions.
3 changes: 2 additions & 1 deletion HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ History
* Fix issue 528 to correct broken ENS image in the documentation
* Fix issue 548 to correct labels generated in tutorial
* Fix issue 547 to fix wrong warning
* Refactored `MapieQuantileRegressor` fit into `prefit_estimators`, `fit_estimators`, and `conformalize` for improved modularity.
* Fix issue 480 (correct display of mathematical equations in generated notebooks)
* Refactor MapieRegressor, EnsembleRegressor, and MapieQuantileRegressor, to prepare for the release of v1.0.0

0.9.1 (2024-09-13)
------------------
Expand Down
2 changes: 0 additions & 2 deletions mapie/estimator/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,7 @@
from .interface import EnsembleEstimator
from .regressor import EnsembleRegressor
from .classifier import EnsembleClassifier

__all__ = [
"EnsembleEstimator",
"EnsembleRegressor",
"EnsembleClassifier",
]
3 changes: 1 addition & 2 deletions mapie/estimator/classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,10 @@
from sklearn.utils.validation import _num_samples, check_is_fitted

from mapie._typing import ArrayLike, NDArray
from mapie.estimator.interface import EnsembleEstimator
from mapie.utils import check_no_agg_cv, fit_estimator, fix_number_of_classes


class EnsembleClassifier(EnsembleEstimator):
class EnsembleClassifier:
"""
This class implements methods to handle the training and usage of the
estimator. This estimator can be unique or composed by cross validated
Expand Down
40 changes: 0 additions & 40 deletions mapie/estimator/interface.py

This file was deleted.

120 changes: 88 additions & 32 deletions mapie/estimator/regressor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,16 @@
from joblib import Parallel, delayed
from sklearn.base import RegressorMixin, clone
from sklearn.model_selection import BaseCrossValidator
from sklearn.utils import _safe_indexing
from sklearn.utils import _safe_indexing, deprecated
from sklearn.utils.validation import _num_samples, check_is_fitted

from mapie._typing import ArrayLike, NDArray
from mapie.aggregation_functions import aggregate_all, phi2D
from mapie.estimator.interface import EnsembleEstimator
from mapie.utils import (check_nan_in_aposteriori_prediction, check_no_agg_cv,
fit_estimator)


class EnsembleRegressor(EnsembleEstimator):
class EnsembleRegressor:
"""
This class implements methods to handle the training and usage of the
estimator. This estimator can be unique or composed by cross validated
Expand Down Expand Up @@ -409,6 +408,11 @@ def predict_calib(

return y_pred

@deprecated(
"WARNING: EnsembleRegressor.fit is deprecated."
"Instead use EnsembleRegressor.fit_single_estimator"
"then EnsembleRegressor.fit_multi_estimators"
)
def fit(
self,
X: ArrayLike,
Expand Down Expand Up @@ -451,42 +455,60 @@ def fit(
EnsembleRegressor
The estimator fitted.
"""
# Initialization
single_estimator_: RegressorMixin
estimators_: List[RegressorMixin] = []
full_indexes = np.arange(_num_samples(X))
cv = self.cv
self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
estimator = self.estimator
self.fit_single_estimator(
X,
y,
sample_weight,
groups,
**fit_params
)

self.fit_multi_estimators(
X,
y,
sample_weight,
groups,
**fit_params
)

return self

def fit_multi_estimators(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleRegressor:

n_samples = _num_samples(y)
estimators: List[RegressorMixin] = []

# Computation
if cv == "prefit":
single_estimator_ = estimator
if self.cv == "prefit":

# Create a placeholder attribute 'k_' filled with NaN values
# This attribute is defined for consistency but
# is not used in prefit mode
self.k_ = np.full(
shape=(n_samples, 1), fill_value=np.nan, dtype=float
)

else:
single_estimator_ = self._fit_oof_estimator(
clone(estimator),
X,
y,
full_indexes,
sample_weight,
**fit_params
)
cv = cast(BaseCrossValidator, cv)
cv = cast(BaseCrossValidator, self.cv)
self.k_ = np.full(
shape=(n_samples, cv.get_n_splits(X, y, groups)),
fill_value=np.nan,
dtype=float,
)
if self.method == "naive":
estimators_ = [single_estimator_]
else:
estimators_ = Parallel(self.n_jobs, verbose=self.verbose)(

if self.method != "naive":
estimators = Parallel(
self.n_jobs,
verbose=self.verbose
)(
delayed(self._fit_oof_estimator)(
clone(estimator),
clone(self.estimator),
X,
y,
train_index,
Expand All @@ -495,13 +517,47 @@ def fit(
)
for train_index, _ in cv.split(X, y, groups)
)
# In split-CP, we keep only the model fitted on train dataset
if self.use_split_method_:
single_estimator_ = estimators_[0]

self.single_estimator_ = single_estimator_
self.estimators_ = estimators_
self.estimators_ = estimators

return self

def fit_single_estimator(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**fit_params
) -> EnsembleRegressor:

self.use_split_method_ = check_no_agg_cv(X, self.cv, self.no_agg_cv_)
single_estimator_: RegressorMixin

if self.cv == "prefit":
single_estimator_ = self.estimator
else:
cv = cast(BaseCrossValidator, self.cv)
if self.use_split_method_:
train_indexes = [
train_index for train_index, test_index in cv.split(
X, y, groups)
][0]
indexes = train_indexes
else:
full_indexes = np.arange(_num_samples(X))
indexes = full_indexes

single_estimator_ = self._fit_oof_estimator(
clone(self.estimator),
X,
y,
indexes,
sample_weight,
**fit_params
)

self.single_estimator_ = single_estimator_
return self

def predict(
Expand Down
70 changes: 61 additions & 9 deletions mapie/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,12 +513,26 @@ def fit(
MapieRegressor
The model itself.
"""
fit_params = kwargs.pop('fit_params', {})
predict_params = kwargs.pop('predict_params', {})
if len(predict_params) > 0:
self._predict_params = True
else:
self._predict_params = False

X, y, sample_weight, groups = self.init_fit(
X, y, sample_weight, groups, **kwargs
)

self.fit_estimator(X, y, sample_weight, groups)
self.conformalize(X, y, sample_weight, groups, **kwargs)

return self

def init_fit(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**kwargs: Any
):

self._fit_params = kwargs.pop('fit_params', {})

# Checks
(estimator,
Expand All @@ -540,9 +554,47 @@ def fit(
self.test_size,
self.verbose
)
# Fit the prediction function
self.estimator_ = self.estimator_.fit(
X, y, sample_weight=sample_weight, groups=groups, **fit_params

return (
X, y, sample_weight, groups
)

def fit_estimator(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
) -> MapieRegressor:

self.estimator_.fit_single_estimator(
X,
y,
sample_weight=sample_weight,
groups=groups,
**self._fit_params
)

return self

def conformalize(
self,
X: ArrayLike,
y: ArrayLike,
sample_weight: Optional[ArrayLike] = None,
groups: Optional[ArrayLike] = None,
**kwargs: Any
) -> MapieRegressor:

predict_params = kwargs.pop('predict_params', {})
self._predict_params = len(predict_params) > 0

self.estimator_.fit_multi_estimators(
X,
y,
sample_weight,
groups,
**self._fit_params
)

# Predict on calibration data
Expand Down
18 changes: 18 additions & 0 deletions mapie/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -1036,3 +1036,21 @@ def test_check_change_method_to_base(method: str, cv: str) -> None:
)
mapie_reg.fit(X_val, y_val)
assert mapie_reg.method == "base"


def test_deprecated_ensemble_regressor_fit_warning() -> None:
ens_reg = EnsembleRegressor(
LinearRegression(),
"plus",
KFold(n_splits=5, random_state=None, shuffle=True),
"nonsense",
None,
random_state,
0.20,
False
)
with pytest.warns(
FutureWarning,
match=r".WARNING: EnsembleRegressor.fit is deprecated.*"
):
ens_reg.fit(X, y)

0 comments on commit fd9d293

Please sign in to comment.