Skip to content

Commit

Permalink
Merge branch 'fix/581-default-correction=0-for-cov-var' into feature/…
Browse files Browse the repository at this point in the history
…541-add-standard-deviation-as-a-function
  • Loading branch information
pcuestas committed Oct 12, 2023
2 parents 6b5e47d + 93fdf3d commit 8897f29
Show file tree
Hide file tree
Showing 31 changed files with 456 additions and 282 deletions.
2 changes: 1 addition & 1 deletion docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ Github you can find more information related to the development of the package.
:caption: More documentation

apilist
glossary
Glossary <glossary>
contributors

An exhaustive list of all the contents of the package can be found in the
Expand Down
15 changes: 15 additions & 0 deletions docs/refs.bib
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,21 @@ @article{pini++_2018_hotelling
keywords = {Functional data,High-dimensional data Hotelling’s,Hilbert space,Nonparametric inference,Permutation test}
}

@incollection{pintado+romo_2005_depthbased,
title = {Depth-Based Classification for Functional Data},
shorttitle = {Data {{Depth}}},
booktitle = {Data {{Depth}}: {{Robust Multivariate Analysis}}, {{Computational Geometry}} and {{Applications}}},
author = {Pintado, Sara and Romo, Juan},
year = {2005},
month = nov,
series = {{{DIMACS Series}} in {{Discrete Mathematics}} and {{Theoretical Computer Science}}},
volume = {72},
pages = {103--119},
publisher = {{American Mathematical Society}},
doi = {10.1090/dimacs/072/08},
isbn = {978-0-8218-3596-8}
}

@inbook{ramsay+silverman_2005_functionala,
title = {From Functional Data to Smooth Functions},
booktitle = {Functional Data Analysis},
Expand Down
2 changes: 1 addition & 1 deletion skfda/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@
concatenate as concatenate,
)

__version__ = "0.8.1"
__version__ = "0.9.dev1"
18 changes: 18 additions & 0 deletions skfda/_utils/ndfunction/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from typing import TYPE_CHECKING

import lazy_loader as lazy

__getattr__, __dir__, __all__ = lazy.attach(
__name__,
submod_attrs={
"_functions": [
"average_function_value",
],
},
)

if TYPE_CHECKING:

from ._functions import (
average_function_value as average_function_value,
)
109 changes: 109 additions & 0 deletions skfda/_utils/ndfunction/_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Functions for working with arrays of functions."""
from __future__ import annotations

import math
from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar

from ...misc.validation import validate_domain_range
from .. import nquad_vec

if TYPE_CHECKING:
from ...typing._numpy import NDArrayFloat
from ...representation import FData
from ...typing._base import DomainRangeLike


UfuncMethod = Literal[
"__call__",
"reduce",
"reduceat",
"accumulate",
"outer",
"inner",
]


class _SupportsArrayUFunc(Protocol):
def __array_ufunc__(
self,
ufunc: Any,
method: UfuncMethod,
*inputs: Any,
**kwargs: Any,
) -> Any:
pass


T = TypeVar("T", bound=_SupportsArrayUFunc)


class _UnaryUfunc(Protocol):

def __call__(self, __arg: T) -> T: # noqa: WPS112
pass


def _average_function_ufunc(
data: FData,
ufunc: _UnaryUfunc,
*,
domain: DomainRangeLike | None = None,
) -> NDArrayFloat:

if domain is None:
domain = data.domain_range
else:
domain = validate_domain_range(domain)

lebesgue_measure = math.prod(
(
(iterval[1] - iterval[0])
for iterval in domain
),
)

try:
data_eval = ufunc(data)
except TypeError:

def integrand(*args: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430
f1 = data(args)[:, 0, :]
return ufunc(f1)

return nquad_vec(
integrand,
domain,
) / lebesgue_measure

else:
return data_eval.integrate(domain=domain) / lebesgue_measure


def average_function_value(
data: FData,
*,
domain: DomainRangeLike | None = None,
) -> NDArrayFloat:
r"""
Calculate the average function value for each function.
This is the value that, if integrated over the whole domain of each
function, has the same integral as the function itself.
.. math::
\bar{x} = \frac{1}{\text{Vol}(\mathcal{T})}\int_{\mathcal{T}} x(t) dt
Args:
data: Functions where we want to calculate the expected value.
domain: Integration domain. By default, the whole domain is used.
Returns:
ndarray of shape (n_dimensions, n_samples) with the values of the
expectations.
See also:
`Entry on Wikipedia
<https://en.wikipedia.org/wiki/Mean_of_a_function>`_
"""
return _average_function_ufunc(data, ufunc=lambda x: x, domain=domain)
22 changes: 4 additions & 18 deletions skfda/exploratory/depth/_depth.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,9 @@
from typing import TypeVar

import numpy as np
import scipy.integrate

from ..._utils._sklearn_adapter import BaseEstimator
from ..._utils.ndfunction import average_function_value
from ...misc.metrics import l2_distance
from ...misc.metrics._utils import _fit_metric
from ...representation import FData, FDataGrid
Expand Down Expand Up @@ -82,25 +82,11 @@ def fit( # noqa: D102

def transform(self, X: FDataGrid) -> NDArrayFloat: # noqa: D102

pointwise_depth = self.multivariate_depth_.transform(X.data_matrix)

interval_len = (
self._domain_range[0][1]
- self._domain_range[0][0]
pointwise_depth = X.copy(
data_matrix=self.multivariate_depth_.transform(X.data_matrix),
)

integrand = pointwise_depth

for d, s in zip(X.domain_range, X.grid_points):
integrand = scipy.integrate.simps(
integrand,
x=s,
axis=1,
)
interval_len = d[1] - d[0]
integrand /= interval_len

return integrand
return average_function_value(pointwise_depth).ravel()

@property
def max(self) -> float:
Expand Down
69 changes: 39 additions & 30 deletions skfda/exploratory/outliers/_directional_outlyingness.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def directional_outlyingness_stats( # noqa: WPS218
Method used to order the data. Defaults to :func:`projection
depth <skfda.exploratory.depth.multivariate.ProjectionDepth>`.
pointwise_weights (array_like, optional): an array containing the
weights of each point of discretisation where values have been
weights of each point of discretization where values have been
recorded. Defaults to the same weight for each of the points:
1/len(interval).
Expand Down Expand Up @@ -183,58 +183,67 @@ def directional_outlyingness_stats( # noqa: WPS218
fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0]
)

if not isinstance(pointwise_weights, FDataGrid):
pointwise_weights = fdatagrid.copy(
data_matrix=pointwise_weights,
sample_names=(None,),
)

depth_pointwise = multivariate_depth(fdatagrid.data_matrix)
assert depth_pointwise.shape == fdatagrid.data_matrix.shape[:-1]

depth_pointwise_fd = fdatagrid.copy(
data_matrix=depth_pointwise,
)

# Obtaining the pointwise median sample Z, to calculate
# v(t) = {X(t) − Z(t)}/|| X(t) − Z(t) ||
median_index = np.argmax(depth_pointwise, axis=0)
pointwise_median = fdatagrid.data_matrix[
median_index,
range(fdatagrid.data_matrix.shape[1]),
]
assert pointwise_median.shape == fdatagrid.data_matrix.shape[1:]
v = fdatagrid.data_matrix - pointwise_median
assert v.shape == fdatagrid.data_matrix.shape
v_norm = la.norm(v, axis=-1, keepdims=True)
pointwise_median = fdatagrid.copy(
data_matrix=fdatagrid.data_matrix[
median_index,
range(fdatagrid.data_matrix.shape[1]),
][np.newaxis],
sample_names=(None,),
)

v = fdatagrid - pointwise_median
v_norm = la.norm(v.data_matrix, axis=-1, keepdims=True)

# To avoid ZeroDivisionError, the zeros are substituted by ones (the
# reference implementation also does this).
v_norm[np.where(v_norm == 0)] = 1

v_unitary = v / v_norm
v.data_matrix /= v_norm

# Calculation directinal outlyingness
dir_outlyingness = (1 / depth_pointwise[..., np.newaxis] - 1) * v_unitary
# Calculation directional outlyingness
dir_outlyingness = (
1 / depth_pointwise_fd - 1
) * v

# Calculation mean directional outlyingness
weighted_dir_outlyingness = (
dir_outlyingness * pointwise_weights[:, np.newaxis]
dir_outlyingness * pointwise_weights
)
assert weighted_dir_outlyingness.shape == dir_outlyingness.shape

mean_dir_outlyingness = scipy.integrate.simps(
weighted_dir_outlyingness,
fdatagrid.grid_points[0],
axis=1,
)
mean_dir_outlyingness = weighted_dir_outlyingness.integrate()
assert mean_dir_outlyingness.shape == (
fdatagrid.n_samples,
fdatagrid.dim_codomain,
)

# Calculation variation directional outlyingness
norm = np.square(la.norm(
dir_outlyingness
- mean_dir_outlyingness[:, np.newaxis, :],
axis=-1,
))
weighted_norm = norm * pointwise_weights
variation_dir_outlyingness = scipy.integrate.simps(
weighted_norm,
fdatagrid.grid_points[0],
axis=1,
norm = dir_outlyingness.copy(
data_matrix=np.square(
la.norm(
dir_outlyingness.data_matrix
- mean_dir_outlyingness[:, np.newaxis, :],
axis=-1,
)
)
)
weighted_norm = norm * pointwise_weights
variation_dir_outlyingness = weighted_norm.integrate().ravel()
assert variation_dir_outlyingness.shape == (fdatagrid.n_samples,)

functional_dir_outlyingness = (
Expand All @@ -244,7 +253,7 @@ def directional_outlyingness_stats( # noqa: WPS218
assert functional_dir_outlyingness.shape == (fdatagrid.n_samples,)

return DirectionalOutlyingnessStats(
directional_outlyingness=dir_outlyingness,
directional_outlyingness=dir_outlyingness.data_matrix,
functional_directional_outlyingness=functional_dir_outlyingness,
mean_directional_outlyingness=mean_dir_outlyingness,
variation_directional_outlyingness=variation_dir_outlyingness,
Expand Down
10 changes: 5 additions & 5 deletions skfda/exploratory/stats/_fisher_rao.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def _fisher_rao_warping_mean(
# Compute shooting vectors
for psi_i in psi_data:

inner = scipy.integrate.simps(mu * psi_i, x=eval_points)
inner = scipy.integrate.simpson(mu * psi_i, x=eval_points)
inner = max(min(inner, 1), -1)

theta = np.arccos(inner)
Expand All @@ -143,7 +143,7 @@ def _fisher_rao_warping_mean(

# Mean of shooting vectors
vmean /= warping.n_samples
v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean)))
v_norm = np.sqrt(scipy.integrate.simpson(np.square(vmean)))

# Convergence criterion
if v_norm < tol:
Expand Down Expand Up @@ -266,7 +266,7 @@ def fisher_rao_karcher_mean(
# Initialize with function closest to the L2 mean with the L2 distance
centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T

distances = scipy.integrate.simps(
distances = scipy.integrate.simpson(
np.square(centered, out=centered),
eval_points_normalized,
axis=1,
Expand Down Expand Up @@ -304,14 +304,14 @@ def fisher_rao_karcher_mean(

# Convergence criterion
mu_norm = np.sqrt(
scipy.integrate.simps(
scipy.integrate.simpson(
np.square(mu, out=mu_aux),
eval_points_normalized,
),
)

mu_diff = np.sqrt(
scipy.integrate.simps(
scipy.integrate.simpson(
np.square(mu - mu_1, out=mu_aux),
eval_points_normalized,
),
Expand Down
Loading

0 comments on commit 8897f29

Please sign in to comment.