Skip to content

Commit

Permalink
Merge pull request GAA-UAM#566 from GAA-UAM/feature/541-add-standard-…
Browse files Browse the repository at this point in the history
…deviation-as-a-function

Add standard deviation as a function
  • Loading branch information
vnmabus authored Oct 13, 2023
2 parents 4fe909c + 3115a6d commit 14b1416
Show file tree
Hide file tree
Showing 6 changed files with 287 additions and 4 deletions.
1 change: 1 addition & 0 deletions docs/modules/exploratory/stats.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,5 @@ statistics can be used.

skfda.exploratory.stats.cov
skfda.exploratory.stats.var
skfda.exploratory.stats.std

3 changes: 2 additions & 1 deletion skfda/_utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"_same_domain",
"_to_grid",
"_to_grid_points",
"function_to_fdatabasis",
"nquad_vec",
],
'_warping': [
Expand All @@ -44,9 +45,9 @@
_same_domain as _same_domain,
_to_grid as _to_grid,
_to_grid_points as _to_grid_points,
function_to_fdatabasis as function_to_fdatabasis,
nquad_vec as nquad_vec,
)

from ._warping import (
invert_warping as invert_warping,
normalize_scale as normalize_scale,
Expand Down
36 changes: 34 additions & 2 deletions skfda/_utils/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import functools
import numbers
from functools import singledispatch
from typing import (
TYPE_CHECKING,
Any,
Expand Down Expand Up @@ -36,7 +35,7 @@
ArrayDTypeT = TypeVar("ArrayDTypeT", bound="np.generic")

if TYPE_CHECKING:
from ..representation import FData, FDataGrid
from ..representation import FData, FDataBasis, FDataGrid
from ..representation.basis import Basis
from ..representation.extrapolation import ExtrapolationLike

Expand Down Expand Up @@ -605,3 +604,36 @@ def _classifier_get_classes(
f'one; got {classes.size} class',
)
return classes, y_ind


def function_to_fdatabasis(
f: Callable[[NDArrayFloat], NDArrayFloat],
new_basis: Basis,
) -> FDataBasis:
"""Express a math function as a FDataBasis with a given basis.
Args:
f: math function.
new_basis: the basis of the output.
Returns:
FDataBasis: FDataBasis with calculated coefficients and the new
basis.
"""
from .. import FDataBasis # noqa: WPS442
from ..misc._math import inner_product_matrix

if isinstance(f, FDataBasis) and f.basis == new_basis:
return f.copy()

inner_prod = inner_product_matrix(
new_basis,
f,
_domain_range=new_basis.domain_range,
)

gram_matrix = new_basis.gram_matrix()

coefs = np.linalg.solve(gram_matrix, inner_prod)

return FDataBasis(new_basis, coefs.T)
2 changes: 2 additions & 0 deletions skfda/exploratory/stats/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
"gmean",
"mean",
"modified_epigraph_index",
"std",
"trim_mean",
"var",
],
Expand All @@ -37,6 +38,7 @@
gmean as gmean,
mean as mean,
modified_epigraph_index as modified_epigraph_index,
std as std,
trim_mean as trim_mean,
var as var,
)
61 changes: 60 additions & 1 deletion skfda/exploratory/stats/_stats.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Functional data descriptive statistics."""
from __future__ import annotations

import functools
from builtins import isinstance
from typing import Callable, TypeVar, Union

Expand All @@ -11,7 +12,7 @@
from skfda._utils.ndfunction import average_function_value

from ...misc.metrics._lp_distances import l2_distance
from ...representation import FData, FDataGrid
from ...representation import FData, FDataBasis, FDataGrid
from ...typing._metric import Metric
from ...typing._numpy import NDArrayFloat
from ..depth import Depth, ModifiedBandDepth
Expand Down Expand Up @@ -101,6 +102,64 @@ def cov(
return X.cov(correction=correction)


@functools.singledispatch
def std(X: F, correction: int = 1) -> F:
r"""
Compute the standard deviation of all the samples in a FData object.
.. math::
\text{std}_X(t) = \sqrt{\frac{1}{N-\text{correction}}
\sum_{n=1}^{N}{\left(X_n(t) - \overline{X}(t)\right)^2}}
Args:
X: Object containing all the samples whose standard deviation is
wanted.
correction: degrees of freedom adjustment. The divisor used in the
calculation is `N - correction`, where `N` represents the number of
elements. Default: `0`.
Returns:
Standard deviation of all the samples in the original object, as a
:term:`functional data object` with just one sample.
"""
raise NotImplementedError("Not implemented for this type")


@std.register
def std_fdatagrid(X: FDataGrid, correction: int = 1) -> FDataGrid:
"""Compute the standard deviation of a FDataGrid."""
return X.copy(
data_matrix=np.std(
X.data_matrix, axis=0, ddof=correction,
)[np.newaxis, ...],
sample_names=(None,),
)


@std.register
def std_fdatabasis(X: FDataBasis, correction: int = 1) -> FDataBasis:
"""Compute the standard deviation of a FDataBasis."""
from ..._utils import function_to_fdatabasis

basis = X.basis
coeff_cov_matrix = np.cov(
X.coefficients, rowvar=False, ddof=correction,
).reshape((basis.n_basis, basis.n_basis))

def std_function(t_points: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430
basis_evaluation = basis(t_points).reshape((basis.n_basis, -1))
std_values = np.sqrt(
np.sum(
basis_evaluation * (coeff_cov_matrix @ basis_evaluation),
axis=0,
),
)
return np.reshape(std_values, (1, -1, X.dim_codomain))

return function_to_fdatabasis(f=std_function, new_basis=X.basis)


def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat:
"""
Calculate the Modified Epigraph Index of a FDataGrid.
Expand Down
188 changes: 188 additions & 0 deletions skfda/tests/test_stats_std.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Test stats functions."""

from __future__ import annotations

from typing import Any

import numpy as np
import pytest

from skfda import FDataBasis, FDataGrid
from skfda.exploratory.stats import std
from skfda.representation.basis import (
Basis,
FourierBasis,
MonomialBasis,
TensorBasis,
VectorValuedBasis,
)

# Fixtures for test_std_fdatabasis_vector_valued_basis


@pytest.fixture(params=[3, 5])
def vv_n_basis1(request: Any) -> int:
"""n_basis for 1st coordinate of vector valued basis."""
return request.param # type: ignore[no-any-return]


@pytest.fixture
def vv_basis1(vv_n_basis1: int) -> Basis:
"""1-dimensional basis to test for vector valued basis."""
# First element of the basis is assumed to be the 1 function
return MonomialBasis(
n_basis=vv_n_basis1, domain_range=(0, 1),
)


@pytest.fixture(params=[FourierBasis, MonomialBasis])
def vv_basis2(request: Any, vv_n_basis2: int = 3) -> Basis:
"""1-dimensional basis to test for vector valued basis."""
# First element of the basis is assumed to be the 1 function
return request.param( # type: ignore[no-any-return]
domain_range=(0, 1), n_basis=vv_n_basis2,
)


# Fixtures for test_std_fdatabasis_tensor_basis

@pytest.fixture(params=[FourierBasis])
def t_basis1(request: Any, t_n_basis1: int = 3) -> Basis:
"""1-dimensional basis to test for tensor basis."""
# First element of the basis is assumed to be the 1 function
return request.param( # type: ignore[no-any-return]
domain_range=(0, 1), n_basis=t_n_basis1,
)


@pytest.fixture(params=[MonomialBasis])
def t_basis2(request: Any, t_n_basis2: int = 5) -> Basis:
"""1-dimensional basis to test for tensor basis."""
# First element of the basis is assumed to be the 1 function
return request.param( # type: ignore[no-any-return]
domain_range=(0, 1), n_basis=t_n_basis2,
)


# Tests

def test_std_fdatagrid_1d_to_2d() -> None:
"""Test std_fdatagrid with R to R^2 functions."""
fd = FDataGrid(
data_matrix=[
[[0, 1, 2, 3, 4, 5], [0, -1, -2, -3, -4, -5]],
[[2, 3, 4, 5, 6, 7], [-2, -3, -4, -5, -6, -7]],
],
grid_points=[
[-2, -1],
[0, 1, 2, 3, 4, 5],
],
)
expected_std_data_matrix = np.full((1, 2, 6, 1), np.sqrt(2))
np.testing.assert_allclose(
std(fd).data_matrix,
expected_std_data_matrix,
)


def test_std_fdatagrid_2d_to_2d() -> None:
"""Test std_fdatagrid with R to R^2 functions."""
fd = FDataGrid(
data_matrix=[
[
[[10, 11], [10, 12], [11, 14]],
[[15, 16], [12, 15], [20, 13]],
],
[
[[11, 12], [11, 13], [12, 13]],
[[14, 15], [11, 16], [21, 12]],
],
],
grid_points=[
[0, 1],
[0, 1, 2],
],
)
expected_std_data_matrix = np.full((1, 2, 3, 2), np.sqrt(1 / 2))
np.testing.assert_allclose(
std(fd).data_matrix,
expected_std_data_matrix,
)


def test_std_fdatabasis_vector_valued_basis(
vv_basis1: Basis,
vv_basis2: Basis,
) -> None:
"""Test std_fdatabasis with a vector valued basis."""
basis = VectorValuedBasis([vv_basis1, vv_basis2])

# coefficients of the function===(1, 1)
one_coefficients = np.concatenate((
np.pad([1], (0, vv_basis1.n_basis - 1)),
np.pad([1], (0, vv_basis2.n_basis - 1)),
))

fd = FDataBasis(
basis=basis,
coefficients=[np.zeros(basis.n_basis), one_coefficients],
)

np.testing.assert_allclose(
std(fd).coefficients,
np.array([np.sqrt(1 / 2) * one_coefficients]),
rtol=1e-7,
atol=1e-7,
)


def test_std_fdatabasis_tensor_basis(
t_basis1: Basis,
t_basis2: Basis,
) -> None:
"""Test std_fdatabasis with a vector valued basis."""
basis = TensorBasis([t_basis1, t_basis2])

# coefficients of the function===1
one_coefficients = np.pad([1], (0, basis.n_basis - 1))

fd = FDataBasis(
basis=basis,
coefficients=[np.zeros(basis.n_basis), one_coefficients],
)

np.testing.assert_allclose(
std(fd).coefficients,
np.array([np.sqrt(1 / 2) * one_coefficients]),
rtol=1e-7,
atol=1e-7,
)


def test_std_fdatabasis_2d_to_2d() -> None:
"""Test std_fdatabasis with R^2 to R^2 basis."""
basis = VectorValuedBasis([
TensorBasis([
MonomialBasis(domain_range=(0, 1), n_basis=2),
MonomialBasis(domain_range=(0, 1), n_basis=2),
]),
TensorBasis([
MonomialBasis(domain_range=(0, 1), n_basis=2),
MonomialBasis(domain_range=(0, 1), n_basis=2),
]),
])
fd = FDataBasis(
basis=basis,
coefficients=[
[0, 0, 0, 0, 0, 0, 0, 0],
[1, 0, 0, 0, 1, 0, 0, 0],
],
)
expected_coefficients = np.array([[np.sqrt(1 / 2), 0, 0, 0] * 2])

np.testing.assert_allclose(
std(fd).coefficients,
expected_coefficients,
rtol=1e-7,
atol=1e-7,
)

0 comments on commit 14b1416

Please sign in to comment.