Merge branch 'fix/581-default-correction=0-for-cov-var' into feature/…

…541-add-standard-deviation-as-a-function
eliegoudout · Oct 12, 2023 · 8897f29 · 8897f29
2 parents 6b5e47d + 93fdf3d
commit 8897f29
Show file tree

Hide file tree

Showing 31 changed files with 456 additions and 282 deletions.
diff --git a/docs/index.rst b/docs/index.rst
@@ -34,7 +34,7 @@ Github you can find more information related to the development of the package.
    :caption: More documentation
 
    apilist
-   glossary
+   Glossary <glossary>
    contributors
 
 An exhaustive list of all the contents of the package can be found in the

diff --git a/docs/refs.bib b/docs/refs.bib
@@ -364,6 +364,21 @@ @article{pini++_2018_hotelling
   keywords = {Functional data,High-dimensional data Hotelling’s,Hilbert space,Nonparametric inference,Permutation test}
 }
 
+@incollection{pintado+romo_2005_depthbased,
+  title = {Depth-Based Classification for Functional Data},
+  shorttitle = {Data {{Depth}}},
+  booktitle = {Data {{Depth}}: {{Robust Multivariate Analysis}}, {{Computational Geometry}} and {{Applications}}},
+  author = {Pintado, Sara and Romo, Juan},
+  year = {2005},
+  month = nov,
+  series = {{{DIMACS Series}} in {{Discrete Mathematics}} and {{Theoretical Computer Science}}},
+  volume = {72},
+  pages = {103--119},
+  publisher = {{American Mathematical Society}},
+  doi = {10.1090/dimacs/072/08},
+  isbn = {978-0-8218-3596-8}
+}
+
 @inbook{ramsay+silverman_2005_functionala,
   title = {From Functional Data to Smooth Functions},
   booktitle = {Functional Data Analysis},

diff --git a/skfda/__init__.py b/skfda/__init__.py
@@ -30,4 +30,4 @@
         concatenate as concatenate,
     )
 
-__version__ = "0.8.1"
+__version__ = "0.9.dev1"
diff --git a/skfda/_utils/ndfunction/__init__.py b/skfda/_utils/ndfunction/__init__.py
@@ -0,0 +1,18 @@
+from typing import TYPE_CHECKING
+
+import lazy_loader as lazy
+
+__getattr__, __dir__, __all__ = lazy.attach(
+    __name__,
+    submod_attrs={
+        "_functions": [
+            "average_function_value",
+        ],
+    },
+)
+
+if TYPE_CHECKING:
+
+    from ._functions import (
+        average_function_value as average_function_value,
+    )
diff --git a/skfda/_utils/ndfunction/_functions.py b/skfda/_utils/ndfunction/_functions.py
@@ -0,0 +1,109 @@
+"""Functions for working with arrays of functions."""
+from __future__ import annotations
+
+import math
+from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar
+
+from ...misc.validation import validate_domain_range
+from .. import nquad_vec
+
+if TYPE_CHECKING:
+    from ...typing._numpy import NDArrayFloat
+    from ...representation import FData
+    from ...typing._base import DomainRangeLike
+
+
+UfuncMethod = Literal[
+    "__call__",
+    "reduce",
+    "reduceat",
+    "accumulate",
+    "outer",
+    "inner",
+]
+
+
+class _SupportsArrayUFunc(Protocol):
+    def __array_ufunc__(
+        self,
+        ufunc: Any,
+        method: UfuncMethod,
+        *inputs: Any,
+        **kwargs: Any,
+    ) -> Any:
+        pass
+
+
+T = TypeVar("T", bound=_SupportsArrayUFunc)
+
+
+class _UnaryUfunc(Protocol):
+
+    def __call__(self, __arg: T) -> T:  # noqa: WPS112
+        pass
+
+
+def _average_function_ufunc(
+    data: FData,
+    ufunc: _UnaryUfunc,
+    *,
+    domain: DomainRangeLike | None = None,
+) -> NDArrayFloat:
+
+    if domain is None:
+        domain = data.domain_range
+    else:
+        domain = validate_domain_range(domain)
+
+    lebesgue_measure = math.prod(
+        (
+            (iterval[1] - iterval[0])
+            for iterval in domain
+        ),
+    )
+
+    try:
+        data_eval = ufunc(data)
+    except TypeError:
+
+        def integrand(*args: NDArrayFloat) -> NDArrayFloat:  # noqa: WPS430
+            f1 = data(args)[:, 0, :]
+            return ufunc(f1)
+
+        return nquad_vec(
+            integrand,
+            domain,
+        ) / lebesgue_measure
+
+    else:
+        return data_eval.integrate(domain=domain) / lebesgue_measure
+
+
+def average_function_value(
+    data: FData,
+    *,
+    domain: DomainRangeLike | None = None,
+) -> NDArrayFloat:
+    r"""
+    Calculate the average function value for each function.
+
+    This is the value that, if integrated over the whole domain of each
+    function, has the same integral as the function itself.
+
+    .. math::
+        \bar{x} = \frac{1}{\text{Vol}(\mathcal{T})}\int_{\mathcal{T}} x(t) dt
+
+    Args:
+        data: Functions where we want to calculate the expected value.
+        domain: Integration domain. By default, the whole domain is used.
+
+    Returns:
+        ndarray of shape (n_dimensions, n_samples) with the values of the
+        expectations.
+
+    See also:
+        `Entry on Wikipedia
+        <https://en.wikipedia.org/wiki/Mean_of_a_function>`_
+
+    """
+    return _average_function_ufunc(data, ufunc=lambda x: x, domain=domain)
diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py
@@ -11,9 +11,9 @@
 from typing import TypeVar
 
 import numpy as np
-import scipy.integrate
 
 from ..._utils._sklearn_adapter import BaseEstimator
+from ..._utils.ndfunction import average_function_value
 from ...misc.metrics import l2_distance
 from ...misc.metrics._utils import _fit_metric
 from ...representation import FData, FDataGrid
@@ -82,25 +82,11 @@ def fit(  # noqa: D102
 
     def transform(self, X: FDataGrid) -> NDArrayFloat:  # noqa: D102
 
-        pointwise_depth = self.multivariate_depth_.transform(X.data_matrix)
-
-        interval_len = (
-            self._domain_range[0][1]
-            - self._domain_range[0][0]
+        pointwise_depth = X.copy(
+            data_matrix=self.multivariate_depth_.transform(X.data_matrix),
         )
 
-        integrand = pointwise_depth
-
-        for d, s in zip(X.domain_range, X.grid_points):
-            integrand = scipy.integrate.simps(
-                integrand,
-                x=s,
-                axis=1,
-            )
-            interval_len = d[1] - d[0]
-            integrand /= interval_len
-
-        return integrand
+        return average_function_value(pointwise_depth).ravel()
 
     @property
     def max(self) -> float:

diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py
@@ -98,7 +98,7 @@ def directional_outlyingness_stats(  # noqa: WPS218
             Method used to order the data. Defaults to :func:`projection
             depth <skfda.exploratory.depth.multivariate.ProjectionDepth>`.
         pointwise_weights (array_like, optional): an array containing the
-            weights of each point of discretisation where values have been
+            weights of each point of discretization where values have been
             recorded. Defaults to the same weight for each of the points:
             1/len(interval).
 
@@ -183,58 +183,67 @@ def directional_outlyingness_stats(  # noqa: WPS218
             fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0]
         )
 
+    if not isinstance(pointwise_weights, FDataGrid):
+        pointwise_weights = fdatagrid.copy(
+            data_matrix=pointwise_weights,
+            sample_names=(None,),
+        )
+
     depth_pointwise = multivariate_depth(fdatagrid.data_matrix)
     assert depth_pointwise.shape == fdatagrid.data_matrix.shape[:-1]
 
+    depth_pointwise_fd = fdatagrid.copy(
+        data_matrix=depth_pointwise,
+    )
+
     # Obtaining the pointwise median sample Z, to calculate
     # v(t) = {X(t) − Z(t)}/|| X(t) − Z(t) ||
     median_index = np.argmax(depth_pointwise, axis=0)
-    pointwise_median = fdatagrid.data_matrix[
-        median_index,
-        range(fdatagrid.data_matrix.shape[1]),
-    ]
-    assert pointwise_median.shape == fdatagrid.data_matrix.shape[1:]
-    v = fdatagrid.data_matrix - pointwise_median
-    assert v.shape == fdatagrid.data_matrix.shape
-    v_norm = la.norm(v, axis=-1, keepdims=True)
+    pointwise_median = fdatagrid.copy(
+        data_matrix=fdatagrid.data_matrix[
+            median_index,
+            range(fdatagrid.data_matrix.shape[1]),
+        ][np.newaxis],
+        sample_names=(None,),
+    )
+
+    v = fdatagrid - pointwise_median
+    v_norm = la.norm(v.data_matrix, axis=-1, keepdims=True)
 
     # To avoid ZeroDivisionError, the zeros are substituted by ones (the
     # reference implementation also does this).
     v_norm[np.where(v_norm == 0)] = 1
 
-    v_unitary = v / v_norm
+    v.data_matrix /= v_norm
 
-    # Calculation directinal outlyingness
-    dir_outlyingness = (1 / depth_pointwise[..., np.newaxis] - 1) * v_unitary
+    # Calculation directional outlyingness
+    dir_outlyingness = (
+        1 / depth_pointwise_fd - 1
+    ) * v
 
     # Calculation mean directional outlyingness
     weighted_dir_outlyingness = (
-        dir_outlyingness * pointwise_weights[:, np.newaxis]
+        dir_outlyingness * pointwise_weights
     )
-    assert weighted_dir_outlyingness.shape == dir_outlyingness.shape
 
-    mean_dir_outlyingness = scipy.integrate.simps(
-        weighted_dir_outlyingness,
-        fdatagrid.grid_points[0],
-        axis=1,
-    )
+    mean_dir_outlyingness = weighted_dir_outlyingness.integrate()
     assert mean_dir_outlyingness.shape == (
         fdatagrid.n_samples,
         fdatagrid.dim_codomain,
     )
 
     # Calculation variation directional outlyingness
-    norm = np.square(la.norm(
-        dir_outlyingness
-        - mean_dir_outlyingness[:, np.newaxis, :],
-        axis=-1,
-    ))
-    weighted_norm = norm * pointwise_weights
-    variation_dir_outlyingness = scipy.integrate.simps(
-        weighted_norm,
-        fdatagrid.grid_points[0],
-        axis=1,
+    norm = dir_outlyingness.copy(
+        data_matrix=np.square(
+            la.norm(
+                dir_outlyingness.data_matrix
+                - mean_dir_outlyingness[:, np.newaxis, :],
+                axis=-1,
+            )
+        )
     )
+    weighted_norm = norm * pointwise_weights
+    variation_dir_outlyingness = weighted_norm.integrate().ravel()
     assert variation_dir_outlyingness.shape == (fdatagrid.n_samples,)
 
     functional_dir_outlyingness = (
@@ -244,7 +253,7 @@ def directional_outlyingness_stats(  # noqa: WPS218
     assert functional_dir_outlyingness.shape == (fdatagrid.n_samples,)
 
     return DirectionalOutlyingnessStats(
-        directional_outlyingness=dir_outlyingness,
+        directional_outlyingness=dir_outlyingness.data_matrix,
         functional_directional_outlyingness=functional_dir_outlyingness,
         mean_directional_outlyingness=mean_dir_outlyingness,
         variation_directional_outlyingness=variation_dir_outlyingness,

diff --git a/skfda/exploratory/stats/_fisher_rao.py b/skfda/exploratory/stats/_fisher_rao.py
@@ -133,7 +133,7 @@ def _fisher_rao_warping_mean(
         # Compute shooting vectors
         for psi_i in psi_data:
 
-            inner = scipy.integrate.simps(mu * psi_i, x=eval_points)
+            inner = scipy.integrate.simpson(mu * psi_i, x=eval_points)
             inner = max(min(inner, 1), -1)
 
             theta = np.arccos(inner)
@@ -143,7 +143,7 @@ def _fisher_rao_warping_mean(
 
         # Mean of shooting vectors
         vmean /= warping.n_samples
-        v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean)))
+        v_norm = np.sqrt(scipy.integrate.simpson(np.square(vmean)))
 
         # Convergence criterion
         if v_norm < tol:
@@ -266,7 +266,7 @@ def fisher_rao_karcher_mean(
     # Initialize with function closest to the L2 mean with the L2 distance
     centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T
 
-    distances = scipy.integrate.simps(
+    distances = scipy.integrate.simpson(
         np.square(centered, out=centered),
         eval_points_normalized,
         axis=1,
@@ -304,14 +304,14 @@ def fisher_rao_karcher_mean(
 
         # Convergence criterion
         mu_norm = np.sqrt(
-            scipy.integrate.simps(
+            scipy.integrate.simpson(
                 np.square(mu, out=mu_aux),
                 eval_points_normalized,
             ),
         )
 
         mu_diff = np.sqrt(
-            scipy.integrate.simps(
+            scipy.integrate.simpson(
                 np.square(mu - mu_1, out=mu_aux),
                 eval_points_normalized,
             ),