Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow MapieRegressor to use the optimal estimation strategy for the bounds of the prediction intervals #387

Merged
merged 9 commits into from
Dec 20, 2023
1 change: 1 addition & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ History
##### (##########)
------------------
* Integrate ConformityScore into MapieTimeSeriesRegressor.
* Add (extend) the optimal estimation strategy for the bounds of the prediction intervals for regression via ConformityScore.
* Add new checks for metrics calculations.
* Fix reference for residual normalised score in documentation.

Expand Down
4 changes: 0 additions & 4 deletions examples/regression/4-tutorials/plot_ts-tutorial.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,8 +216,6 @@ class that block bootstraps the training set.
y_pred_pfit = np.zeros(y_pred_npfit.shape)
y_pis_pfit = np.zeros(y_pis_npfit.shape)
conformity_scores_pfit = []
lower_quantiles_pfit = []
higher_quantiles_pfit = []
y_pred_pfit[:gap], y_pis_pfit[:gap, :, :] = mapie_enbpi.predict(
X_test.iloc[:gap, :], alpha=alpha, ensemble=True, optimize_beta=True
)
Expand All @@ -236,8 +234,6 @@ class that block bootstraps the training set.
optimize_beta=True
)
conformity_scores_pfit.append(mapie_enbpi.conformity_scores_)
lower_quantiles_pfit.append(mapie_enbpi.lower_quantiles_)
higher_quantiles_pfit.append(mapie_enbpi.higher_quantiles_)
coverage_pfit = regression_coverage_score(
y_test, y_pis_pfit[:, 0, 0], y_pis_pfit[:, 1, 0]
)
Expand Down
99 changes: 93 additions & 6 deletions mapie/conformity_scores/conformity_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,14 +254,72 @@ def get_quantile(
])
return quantile

@staticmethod
def _beta_optimize(
alpha_np: NDArray,
upper_bounds: NDArray,
lower_bounds: NDArray,
) -> NDArray:
"""
Minimize the width of the PIs, for a given difference of quantiles.

Parameters
----------
alpha_np: NDArray
The quantiles to compute.

upper_bounds: NDArray
The array of upper values.

lower_bounds: NDArray
The array of lower values.

Returns
-------
NDArray
Array of betas minimizing the differences
``(1-alpa+beta)-quantile - beta-quantile``.
"""
beta_np = np.full(
shape=(len(lower_bounds), len(alpha_np)),
fill_value=np.nan,
dtype=float,
)

for ind_alpha, _alpha in enumerate(alpha_np):
betas = np.linspace(
_alpha / (len(lower_bounds) + 1),
_alpha,
num=len(lower_bounds),
endpoint=True,
)
one_alpha_beta = np_nanquantile(
upper_bounds.astype(float),
1 - _alpha + betas,
axis=1,
method="higher",
)
beta = np_nanquantile(
lower_bounds.astype(float),
betas,
axis=1,
method="lower",
)
beta_np[:, ind_alpha] = betas[
np.argmin(one_alpha_beta - beta, axis=0)
]

return beta_np

def get_bounds(
self,
X: ArrayLike,
estimator: EnsembleEstimator,
conformity_scores: NDArray,
alpha_np: NDArray,
ensemble: bool,
method: str
ensemble: bool = False,
method: str = 'base',
optimize_beta: bool = False,
) -> Tuple[NDArray, NDArray, NDArray]:
"""
Compute bounds of the prediction intervals from the observed values,
Expand All @@ -285,13 +343,22 @@ def get_bounds(
ensemble: bool
Boolean determining whether the predictions are ensembled or not.

By default ``False``.

method: str
Method to choose for prediction interval estimates.
The ``"plus"`` method implies that the quantile is calculated
after estimating the bounds, whereas the other methods
(among the ``"naive"``, ``"base"`` or ``"minmax"`` methods,
for example) do the opposite.

By default ``base``.

optimize_beta: bool
Whether to optimize the PIs' width or not.

By default ``False``.

Returns
-------
Tuple[NDArray, NDArray, NDArray]
Expand All @@ -300,13 +367,33 @@ def get_bounds(
(n_samples, n_alpha).
- The upper bounds of the prediction intervals of shape
(n_samples, n_alpha).

Raises
------
ValueError
If beta optimisation with symmetrical conformity score function.
"""
if self.sym and optimize_beta:
raise ValueError(
"Beta optimisation cannot be used with "
+ "symmetrical conformity score function."
)

y_pred, y_pred_low, y_pred_up = estimator.predict(X, ensemble)
signed = -1 if self.sym else 1

if optimize_beta:
beta_np = self._beta_optimize(
alpha_np,
conformity_scores.reshape(1, -1),
conformity_scores.reshape(1, -1),
)
else:
beta_np = alpha_np / 2

if method == "plus":
alpha_low = alpha_np if self.sym else alpha_np / 2
alpha_up = 1 - alpha_np if self.sym else 1 - alpha_np / 2
alpha_low = alpha_np if self.sym else beta_np
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it a little weird to call the bound beta_np. It really associates it with the beta optimizer. Could we not simply call it the bound?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a possibility. Up until now, we've always done it that way. This could be the subject of an issue if the internal naming system poses problems in the long term.

alpha_up = 1 - alpha_np if self.sym else 1 - alpha_np + beta_np

conformity_scores_low = self.get_estimation_distribution(
X, y_pred_low, signed * conformity_scores
Expand All @@ -322,8 +409,8 @@ def get_bounds(
)
else:
quantile_search = "higher" if self.sym else "lower"
alpha_low = 1 - alpha_np if self.sym else alpha_np / 2
alpha_up = 1 - alpha_np if self.sym else 1 - alpha_np / 2
alpha_low = 1 - alpha_np if self.sym else beta_np
alpha_up = 1 - alpha_np if self.sym else 1 - alpha_np + beta_np

quantile_low = self.get_quantile(
conformity_scores, alpha_low, axis=0, method=quantile_search
Expand Down
5 changes: 4 additions & 1 deletion mapie/estimator/estimator.py
Original file line number Diff line number Diff line change
Expand Up @@ -496,9 +496,12 @@ def predict(
if self.method == "minmax":
y_pred_multi_low = np.min(y_pred_multi, axis=1, keepdims=True)
y_pred_multi_up = np.max(y_pred_multi, axis=1, keepdims=True)
else:
elif self.method == "plus":
y_pred_multi_low = y_pred_multi
y_pred_multi_up = y_pred_multi
else:
y_pred_multi_low = y_pred[:, np.newaxis]
y_pred_multi_up = y_pred[:, np.newaxis]

if ensemble:
y_pred = aggregate_all(self.agg_function, y_pred_multi)
Expand Down
20 changes: 17 additions & 3 deletions mapie/regression/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,7 @@ class MapieRegressor(BaseEstimator, RegressorMixin):
no_agg_methods_ = ["naive", "base"]
valid_agg_functions_ = [None, "median", "mean"]
ensemble_agg_functions_ = ["median", "mean"]
default_sym_ = True
fit_attributes = [
"estimator_",
"conformity_scores_",
Expand Down Expand Up @@ -424,7 +425,7 @@ def _check_fit_parameters(
estimator = self._check_estimator(self.estimator)
agg_function = self._check_agg_function(self.agg_function)
cs_estimator = check_conformity_score(
self.conformity_score
self.conformity_score, self.default_sym_
)
if isinstance(cs_estimator, ResidualNormalisedScore) and \
self.cv not in ["split", "prefit"]:
Expand Down Expand Up @@ -522,6 +523,7 @@ def predict(
X: ArrayLike,
ensemble: bool = False,
alpha: Optional[Union[float, Iterable[float]]] = None,
optimize_beta: bool = False,
) -> Union[NDArray, Tuple[NDArray, NDArray]]:
"""
Predict target on new samples with confidence intervals.
Expand Down Expand Up @@ -561,6 +563,11 @@ def predict(

By default ``None``.

optimize_beta: bool
Whether to optimize the PIs' width or not.

By default ``False``.

Returns
-------
Union[NDArray, Tuple[NDArray, NDArray]]
Expand All @@ -582,6 +589,12 @@ def predict(
return np.array(y_pred)

else:
if optimize_beta and self.method != 'enbpi':
raise UserWarning(
"Beta optimisation should only be used for "
"method='enbpi'."
)

n = len(self.conformity_scores_)
alpha_np = cast(NDArray, alpha)
check_alpha_and_n_samples(alpha_np, n)
Expand All @@ -592,7 +605,8 @@ def predict(
self.estimator_,
self.conformity_scores_,
alpha_np,
ensemble,
self.method
ensemble=ensemble,
method=self.method,
optimize_beta=optimize_beta
)
return np.array(y_pred), np.stack([y_pred_low, y_pred_up], axis=1)
Loading
Loading