scikit-learn-contrib · thibaultcordier · Jan 3, 2024 · Aug 8, 2023 · Aug 11, 2023 · Aug 11, 2023
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -32,5 +32,7 @@ Contributors
 * Daniel Herbst <[email protected]>
 * Candice Moyet <[email protected]>
 * Sofiane Ziane <[email protected]>
+* Remi Colliaux <[email protected]>
+* Arthur Phan <[email protected]>
 
 To be continued ...
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -10,6 +10,8 @@ History
 * Refactor MapieRegressor and ConformityScore to add the possibility to use X in ConformityScore.
 * Separate the handling of the estimator from MapieRegressor into a new class called EnsembleEstimator.
 * Fix an unfixed random state in one of the classification tests.
+* Add a new method for MapieTimeSeriesRegressor called ACI
+* Add a new metric named CWC
 
 0.6.5 (2023-06-06)
 ------------------

diff --git a/mapie/metrics.py b/mapie/metrics.py
@@ -737,3 +737,255 @@ def hsic(
     coef_hsic = np.sqrt(np.matrix.trace(hsic_mat, axis1=1, axis2=2))
 
     return coef_hsic
+
+
+def _picp(
+    y_true: ArrayLike,
+    y_pred_low: ArrayLike,
+    y_pred_up: ArrayLike
+) -> float:
+    """
+    Calculate the Prediction Interval Coverage Probability (PICP).
+
+    The Prediction Interval Coverage Probability (PICP) is a measure used to
+    estimate the fraction of true labels that lie within the prediction
+    intervals.
+
+    [5] Vilde Jensen, Filippo Maria Bianchi, Stian Norman Anfinsen (2022).
+    Ensemble Conformalized Quantile Regression for Probabilistic Time Series
+    Forecasting.
+
+    Parameters
+    ----------
+    y_true : ArrayLike
+        Array of true labels.
+    y_pred_low : ArrayLike
+        Array of lower bounds of prediction intervals.
+    y_pred_up : ArrayLike
+        Array of upper bounds of prediction intervals.
+
+    Returns
+    -------
+    float
+        Prediction Interval Coverage Probability (PICP).
+
+    Notes
+    -----
+    The PICP is calculated as follows:
+    1. Determine the number of true labels that lie within the prediction
+    intervals.
+       This is done by counting the number of elements that satisfy both
+       conditions:
+       (y_true >= y_pred_low) and (y_true <= y_pred_up). Let this count be
+       'in_the_range'.
+    2. Calculate the coverage by dividing 'in_the_range' by the total number
+    of true labels.
+
+    Examples
+    --------
+    >>> y_true = [5, 7.5, 9.5, 10.5, 12.5]
+    >>> y_pred_low = [4, 6, 9, 8.5, 10.5]
+    >>> y_pred_up = [6, 9, 10, 12.5, 12]
+    >>> y_true = np.array(y_true)
+    >>> y_pred_low = np.array(y_pred_low)
+    >>> y_pred_up = np.array(y_pred_up)
+    >>> print(_picp(y_true, y_pred_low, y_pred_up))
+    0.8
+    """
+
+    # Ensure inputs are NumPy arrays for consistent operations
+    y_true = np.asarray(y_true)
+    y_pred_low = np.asarray(y_pred_low)
+    y_pred_up = np.asarray(y_pred_up)
+
+    in_the_range = np.sum((np.greater_equal(y_true, y_pred_low))
+                          & (np.less_equal(y_true, y_pred_up)))
+
+    coverage = in_the_range / np.prod(y_true.shape)
+
+    return float(coverage)
+
+
+def _pinaw(
+    y_true: ArrayLike,
+    y_pred_low: ArrayLike,
+    y_pred_up: ArrayLike
+) -> float:
+    """
+    Calculate the Prediction Interval Normalized Average Width (PINAW).
+
+    The Prediction Interval Normalized Average Width (PINAW) is a measure
+    used to evaluate the average width of prediction intervals (PIs) in
+    relation to the range of the true labels.
+
+
+    [5] Vilde Jensen, Filippo Maria Bianchi, Stian Norman Anfinsen (2022).
+    Ensemble Conformalized Quantile Regression for Probabilistic Time Series
+    Forecasting.
+
+    Parameters
+    ----------
+    y_true : ArrayLike
+        Array of true labels.
+    y_pred_low : ArrayLike
+        Array of lower bounds of prediction intervals.
+    y_pred_up : ArrayLike
+        Array of upper bounds of prediction intervals.
+
+    Returns
+    -------
+    float
+        Prediction Interval Normalized Average Width (PINAW).
+
+    Notes
+    -----
+    The PINAW is calculated as follows:
+    1. Calculate the average width of prediction intervals by taking the mean
+       of the absolute differences between the upper and lower bounds:
+       avg_length.
+    2. Normalize avg_length by dividing it by the range of the true labels:
+       (y_true.max() - y_true.min()).
+    3. The resulting value is the PINAW.
+
+    Examples
+    --------
+    >>> y_true = [5, 7.5, 9.5, 10.5, 12.5]
+    >>> y_pred_low = [4, 6, 9, 8.5, 10.5]
+    >>> y_pred_up = [6, 9, 10, 12.5, 12]
+    >>> y_true = np.array(y_true)
+    >>> y_pred_low = np.array(y_pred_low)
+    >>> y_pred_up = np.array(y_pred_up)
+    >>> print(np.round(_pinaw(y_true, y_pred_low, y_pred_up),2))
+    0.31
+    """
+    # Convert y_true to a NumPy array of floats
+    y_true = np.array(y_true, dtype=float)
+
+    avg_length = np.mean(np.abs(np.subtract(y_pred_up, y_pred_low)))
+    avg_length = avg_length / (np.subtract(float(y_true.max()),
+                                           float(y_true.min())))
+
+    return float(avg_length)
+
+
+def cwc(
+    y_true: ArrayLike,
+    y_pred_low: ArrayLike,
+    y_pred_up: ArrayLike,
+    eta: float,
+    mu: float
+) -> float:
+    """
+    Coverage width-based criterion obtained by the prediction intervals.
+
+    The effective coverage score is a criterion used to evaluate the quality
+    of prediction intervals (PIs) based on their coverage and width.
+
+
+    [5] Vilde Jensen, Filippo Maria Bianchi, Stian Norman Anfinsen (2022).
+    Ensemble Conformalized Quantile Regression for Probabilistic Time Series
+    Forecasting.
+
+    Parameters
+    ----------
+    picp : float
+        Prediction interval coverage probability (PICP), which is the estimated
+        fraction of true labels that lie within the prediction intervals.
+    pinaw : float
+        Prediction interval normalized average width (PINAW), calculated as
+        the average width of the prediction intervals.
+    eta : int
+        A user-defined parameter that balances the contributions of PINAW and
+        PICP
+        in the coverage width-based criterion (CWC) calculation.
+    mu : float
+        A user-defined parameter representing the designed confidence level of
+        the PI.
+
+    Returns
+    -------
+    float
+        Effective coverage score (CWC) obtained by the prediction intervals.
+
+    Notes
+    -----
+    The effective coverage score (CWC) is calculated using the following
+    formula:
+    CWC = (1 - PINAW) * exp(-eta * (PICP - mu)**2)
+
+    The CWC penalizes under- and overcoverage in the same way and summarizes
+    the quality of the prediction intervals in a single value.
+
+    High Eta (Large Positive Value):
+
+    When eta is a high positive value, such as 10 or 100, it will strongly
+    emphasize the contribution of (1-pinaw). This means that the algorithm
+    will prioritize reducing the average width of the prediction intervals
+    (pinaw) over achieving a high coverage probability (picp).
+    The exponential term np.exp(-eta*(picp-mu)**2) will have a sharp decline
+    as picp deviates from mu. So, achieving a high picp becomes less important
+    compared to minimizing pinaw.
+    The impact will be narrower prediction intervals on average, which may
+    result in more precise but less conservative predictions.
+
+    Low Eta (Small Positive Value):
+
+    When eta is a low positive value, such as 0.01 or 0.1, it will still
+    prioritize reducing the average width of the prediction intervals (pinaw)
+    but with less emphasis compared to higher eta values.
+    The exponential term will be less steep, meaning that deviations of picp
+    from mu will have a moderate impact.
+    You'll get a balance between prediction precision and coverage, but the
+    exact balance will depend on the specific value of eta.
+
+    Negative Eta (Any Negative Value):
+
+    When eta is negative, it will have a different effect on the formula.
+    Negative values of eta will cause the exponential term
+    np.exp(-eta*(picp-mu)**2)
+    to become larger as picp deviates from mu. This means that a negative eta
+    prioritizes achieving a high coverage probability (picp) over minimizing
+    pinaw.
+    In this case, the algorithm will aim to produce wider prediction intervals
+    to ensure a higher likelihood of capturing the true values within those
+    intervals, even if it sacrifices precision.
+    Negative eta values might be used in scenarios where avoiding errors or
+    outliers is critical.
+
+    Null Eta (Eta = 0):
+
+    When eta is exactly zero, both pinaw and picp will have equal importance,
+    as the exponential term becomes 1. This means there is a balance
+    between minimizing the average width of prediction intervals (pinaw)
+    and achieving a high coverage probability (picp).
+    The algorithm will aim for a trade-off between precision and coverage,
+    without giving preference to either one.
+    In summary, the choice of eta determines how much importance you place on
+    pinaw versus picp in your coverage width-based criterion.
+
+    A high eta emphasizes precision, a low positive eta balances precision
+    and coverage, a negative eta prioritizes coverage,
+    and a null eta equally values both precision and coverage.
+    The specific choice of eta should align with your objectives and the
+    trade-offs that are acceptable in your particular application.
+
+    Examples
+    --------
+    >>> y_true = np.array([5, 7.5, 9.5, 10.5, 12.5])
+    >>> y_preds_low = np.array([4, 6, 9, 8.5, 10.5])
+    >>> y_preds_up = np.array([6, 9, 10, 12.5, 12])
+    >>> eta = 30
+    >>> mu = 0.9
+    >>> print(np.round(cwc(y_true, y_preds_low, y_preds_up, eta, mu),2))
+    0.51
+    """
+
+    if 0 <= mu <= 1:
+        # Mu is within the valid range
+        picp = _picp(y_true, y_pred_low, y_pred_up)
+        pinaw = _pinaw(y_true, y_pred_low, y_pred_up)
+        cwc = (1-pinaw)*np.exp(-eta*(picp-mu)**2)
+
+        return float(cwc)
+    else:
+        raise ValueError("mu must be between 0 and 1")