From 4f4f332d9ef50a43d9b07d4a9bbc120c57c788c2 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Mon, 28 Mar 2022 17:24:49 +0200 Subject: [PATCH 01/17] Smoothing in several dimensions Close #132 --- skfda/misc/hat_matrix.py | 74 +++++++++++-------- .../smoothing/_kernel_smoothers.py | 17 +++-- skfda/preprocessing/smoothing/_linear.py | 18 ++++- 3 files changed, 70 insertions(+), 39 deletions(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index 82d53f444..25c086a0d 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -145,7 +145,7 @@ def _hat_matrix_function_not_normalized( if self.bandwidth is None: percentage = 15 - self.bandwidth = np.percentile(np.abs(delta_x), percentage) + self.bandwidth = np.percentile(delta_x, percentage) return self.kernel(delta_x / self.bandwidth) @@ -222,8 +222,8 @@ def __call__( # noqa: D102 self, *, delta_x: NDArrayFloat, - X_train: Optional[Union[FDataBasis, GridPoints]] = None, - X: Optional[Union[FDataBasis, GridPoints]] = None, + X_train: Optional[Union[FDataBasis, NDArrayFloat]] = None, + X: Optional[Union[FDataBasis, NDArrayFloat]] = None, y_train: Optional[NDArrayFloat] = None, weights: Optional[NDArrayFloat] = None, _cv: bool = False, @@ -231,21 +231,45 @@ def __call__( # noqa: D102 if self.bandwidth is None: percentage = 15 - self.bandwidth = np.percentile(np.abs(delta_x), percentage) + self.bandwidth = np.percentile(delta_x, percentage) - # Regression + # Smoothing for functions of one variable + if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1: + delta_x = np.subtract.outer( + X.flatten(), + X_train.flatten(), + ) + + return super().__call__( # noqa: WPS503 + delta_x=delta_x, + X_train=X_train, + X=X, + y_train=y_train, + weights=weights, + _cv=_cv, + ) + + # Regression and smoothing for multivariable functions if isinstance(X_train, FDataBasis): + m1 = X_train.coefficients + m2 = X.coefficients if y_train is None: y_train = np.identity(X_train.n_samples) - m1 = X_train.coefficients - m2 = X.coefficients + else: + m1 = X_train + m2 = X - # Subtract previous matrices obtaining a 3D matrix - # The i-th element contains the matrix X_train - X[i] - C = m1 - m2[:, np.newaxis] + if y_train is None: + y_train = np.identity(X_train.shape[0]) + # Subtract previous matrices obtaining a 3D matrix + # The i-th element contains the matrix X_train - X[i] + C = m1 - m2[:, np.newaxis] + + # Inner product matrix only is applicable in regression + if isinstance(X_train, FDataBasis): inner_product_matrix = X_train.basis.inner_product_matrix() # Calculate new coefficients taking into account cross-products @@ -256,27 +280,15 @@ def __call__( # noqa: D102 inner_product_matrix, ) - # Adding a column of ones in the first position of all matrices - dims = (C.shape[0], C.shape[1], 1) - C = np.c_[np.ones(dims), C] + # Adding a column of ones in the first position of all matrices + dims = (C.shape[0], C.shape[1], 1) + C = np.c_[np.ones(dims), C] - return self._solve_least_squares( - delta_x=delta_x, - coefs=C, - y_train=y_train, - ) - - # Smoothing - else: - - return super().__call__( # noqa: WPS503 - delta_x=delta_x, - X_train=X_train, - X=X, - y_train=y_train, - weights=weights, - _cv=_cv, - ) + return self._solve_least_squares( + delta_x=delta_x, + coefs=C, + y_train=y_train, + ) def _solve_least_squares( self, @@ -392,7 +404,7 @@ def _hat_matrix_function_not_normalized( # For each row in the distances matrix, it calculates the furthest # point within the k nearest neighbours vec = np.percentile( - np.abs(delta_x), + delta_x, self.n_neighbors / input_points_len * 100, axis=1, interpolation='lower', diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py index 6dc7d82e3..0805b4a38 100644 --- a/skfda/preprocessing/smoothing/_kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/_kernel_smoothers.py @@ -9,9 +9,10 @@ import numpy as np -from ..._utils._utils import _to_grid_points +from ..._utils._utils import _to_grid_points, _cartesian_product from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix -from ...representation._typing import GridPointsLike, NDArrayFloat +from ...misc.metrics import PairwiseMetric, l2_distance, Metric +from ...representation._typing import GridPointsLike, NDArrayFloat, Vector from ._linear import _LinearSmoother @@ -113,10 +114,12 @@ def __init__( kernel_estimator: Optional[HatMatrix] = None, weights: Optional[NDArrayFloat] = None, output_points: Optional[GridPointsLike] = None, + metric: Metric[Vector] = l2_distance, ): self.kernel_estimator = kernel_estimator self.weights = weights self.output_points = output_points + self.metric = metric self._cv = False # For testing purposes only def _hat_matrix( @@ -125,18 +128,18 @@ def _hat_matrix( output_points: GridPointsLike, ) -> NDArrayFloat: - input_points = _to_grid_points(input_points) - output_points = _to_grid_points(output_points) + input_points = _cartesian_product(_to_grid_points(input_points)) + output_points = _cartesian_product(_to_grid_points(output_points)) if self.kernel_estimator is None: self.kernel_estimator = NadarayaWatsonHatMatrix() - delta_x = np.subtract.outer(output_points[0], input_points[0]) + delta_x = PairwiseMetric(self.metric)(output_points, input_points) return self.kernel_estimator( delta_x=delta_x, weights=self.weights, - X_train=input_points, - X=output_points, + X_train=np.array(input_points), + X=np.array(output_points), _cv=self._cv, ) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index f9d787d73..082872aa4 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -114,9 +114,25 @@ def transform( ) ) + dims = [len(e) for e in self.output_points_] + dims += [len(e) for e in self.input_points_] + + hat_matrix = np.reshape( + self.hat_matrix_, + (*dims,) + ) + + data_matrix = np.einsum( + hat_matrix, + [Ellipsis, *range(1, len(self.output_points_) + 1)], + X.data_matrix, + [0, *range(1, len(self.output_points_) + 2)], + [0, Ellipsis, len(self.output_points_) + 1], + ) + # The matrix is cached return X.copy( - data_matrix=self.hat_matrix_ @ X.data_matrix, + data_matrix=data_matrix, grid_points=self.output_points_, ) From a80f779f9590b29d650697945fdc8caf541c0813 Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Mon, 28 Mar 2022 17:40:42 +0200 Subject: [PATCH 02/17] Fix code style --- skfda/misc/hat_matrix.py | 2 +- skfda/preprocessing/smoothing/_kernel_smoothers.py | 4 ++-- skfda/preprocessing/smoothing/_linear.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index 25c086a0d..5a564117f 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -15,7 +15,7 @@ from sklearn.base import BaseEstimator, RegressorMixin from ..representation._functional_data import FData -from ..representation._typing import GridPoints, GridPointsLike, NDArrayFloat +from ..representation._typing import GridPointsLike, NDArrayFloat from ..representation.basis import FDataBasis from . import kernels diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py index 0805b4a38..1d3f2c262 100644 --- a/skfda/preprocessing/smoothing/_kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/_kernel_smoothers.py @@ -9,9 +9,9 @@ import numpy as np -from ..._utils._utils import _to_grid_points, _cartesian_product +from ..._utils._utils import _cartesian_product, _to_grid_points from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix -from ...misc.metrics import PairwiseMetric, l2_distance, Metric +from ...misc.metrics import Metric, PairwiseMetric, l2_distance from ...representation._typing import GridPointsLike, NDArrayFloat, Vector from ._linear import _LinearSmoother diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 082872aa4..4f1ebf626 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -119,7 +119,7 @@ def transform( hat_matrix = np.reshape( self.hat_matrix_, - (*dims,) + dims, ) data_matrix = np.einsum( From 1a58019338a33c84049e35cbdada58c37f3fb43f Mon Sep 17 00:00:00 2001 From: ElenaPetrunina Date: Wed, 30 Mar 2022 13:06:47 +0200 Subject: [PATCH 03/17] Update hat_matrix.py --- skfda/misc/hat_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index ffb7dc3fe..d280a03ff 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -281,7 +281,7 @@ def __call__( # noqa: D102 # Calculate new coefficients taking into account cross-products # if the basis is orthonormal, C would not change C = C @ inner_product_matrix # noqa: WPS350 - + # Adding a column of ones in the first position of all matrices dims = (C.shape[0], C.shape[1], 1) C = np.concatenate((np.ones(dims), C), axis=-1) From ab396f646cb8ff4d03921a9191091cbd8f32f9fa Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 17 Aug 2023 13:15:56 +0200 Subject: [PATCH 04/17] Fix tests. --- skfda/preprocessing/smoothing/_kernel_smoothers.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py index 796541985..dc605fc47 100644 --- a/skfda/preprocessing/smoothing/_kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/_kernel_smoothers.py @@ -11,8 +11,9 @@ from ..._utils._utils import _cartesian_product, _to_grid_points from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix -from ...misc.metrics import Metric, PairwiseMetric, l2_distance +from ...misc.metrics import PairwiseMetric, l2_distance from ...typing._base import GridPointsLike, Vector +from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother From ae810e806cf9213342ae563a2816f111497cdec9 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 17 Aug 2023 16:37:40 +0200 Subject: [PATCH 05/17] Fix Mypy errors. --- skfda/preprocessing/smoothing/_kernel_smoothers.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py index dc605fc47..e9b56f44d 100644 --- a/skfda/preprocessing/smoothing/_kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/_kernel_smoothers.py @@ -12,7 +12,7 @@ from ..._utils._utils import _cartesian_product, _to_grid_points from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix from ...misc.metrics import PairwiseMetric, l2_distance -from ...typing._base import GridPointsLike, Vector +from ...typing._base import GridPointsLike from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother @@ -116,7 +116,7 @@ def __init__( *, weights: Optional[NDArrayFloat] = None, output_points: Optional[GridPointsLike] = None, - metric: Metric[Vector] = l2_distance, + metric: Metric[NDArrayFloat] = l2_distance, ): self.kernel_estimator = kernel_estimator self.weights = weights From 1644ad0427019b07874173044b371f6fd12ffb6f Mon Sep 17 00:00:00 2001 From: VNMabus Date: Thu, 21 Sep 2023 09:38:05 +0200 Subject: [PATCH 06/17] Fix docs. --- docs/index.rst | 2 +- skfda/__init__.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/index.rst b/docs/index.rst index 55cbe94dc..b5ee3c933 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,7 +34,7 @@ Github you can find more information related to the development of the package. :caption: More documentation apilist - glossary + Glossary contributors An exhaustive list of all the contents of the package can be found in the diff --git a/skfda/__init__.py b/skfda/__init__.py index 1a8f41021..f9434fa97 100644 --- a/skfda/__init__.py +++ b/skfda/__init__.py @@ -30,4 +30,4 @@ concatenate as concatenate, ) -__version__ = "0.8.1" +__version__ = "0.9.dev1" From ee676cda04becb33c99907ea0bf7761b499a14b5 Mon Sep 17 00:00:00 2001 From: E105D104U125 Date: Mon, 25 Sep 2023 00:28:39 +0200 Subject: [PATCH 07/17] Update citation and See also in DTMClassifier class --- docs/refs.bib | 15 +++++++++++++++ skfda/ml/classification/_centroid_classifiers.py | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/docs/refs.bib b/docs/refs.bib index cb1255044..c0aedacb8 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -364,6 +364,21 @@ @article{pini++_2018_hotelling keywords = {Functional data,High-dimensional data Hotelling’s,Hilbert space,Nonparametric inference,Permutation test} } +@incollection{pintado+romo_2005_depthbased, + title = {Depth-Based Classification for Functional Data}, + shorttitle = {Data {{Depth}}}, + booktitle = {Data {{Depth}}: {{Robust Multivariate Analysis}}, {{Computational Geometry}} and {{Applications}}}, + author = {Pintado, Sara and Romo, Juan}, + year = {2005}, + month = nov, + series = {{{DIMACS Series}} in {{Discrete Mathematics}} and {{Theoretical Computer Science}}}, + volume = {72}, + pages = {103--119}, + publisher = {{American Mathematical Society}}, + doi = {10.1090/dimacs/072/08}, + isbn = {978-0-8218-3596-8} +} + @inbook{ramsay+silverman_2005_functionala, title = {From Functional Data to Smooth Functions}, booktitle = {Functional Data Analysis}, diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index dd19c2f1a..4bac8ad9e 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -127,7 +127,7 @@ class DTMClassifier(NearestCentroid[Input, Target]): Test samples are classified to the class that minimizes the distance of the observation to the trimmed mean of the group - :footcite:`fraiman+muniz_2001_trimmed`. + :footcite:`pintado+romo_2005_depthbased`. Parameters: proportiontocut: @@ -174,6 +174,7 @@ class DTMClassifier(NearestCentroid[Input, Target]): See also: :class:`~skfda.ml.classification.NearestCentroid` + :class:`~skfda.exploratory.stats.trim_mean` References: .. footbibliography:: From 1bac4963e5861f6c769c496a3d0ff6649673d12a Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 4 Oct 2023 13:52:21 +0200 Subject: [PATCH 08/17] Fix message and condition (n_basis < order) in the initialization of a BSplineBasis. Condition is equivalent to previous one. The message has been corrected to match the condition. --- skfda/representation/basis/_bspline_basis.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/skfda/representation/basis/_bspline_basis.py b/skfda/representation/basis/_bspline_basis.py index 7b4898f96..15059e49c 100644 --- a/skfda/representation/basis/_bspline_basis.py +++ b/skfda/representation/basis/_bspline_basis.py @@ -129,11 +129,10 @@ def __init__( # noqa: WPS238 ) n_basis = len(knots) + order - 2 - if (n_basis - order + 2) < 2: + if n_basis < order: raise ValueError( - f"The number of basis ({n_basis}) minus the " - f"order of the bspline ({order}) should be " - f"greater than 3.", + f"The number of basis ({n_basis}) should not be smaller " + f"than the order of the bspline ({order}).", ) self._order = order From 37536a1658c9389b0d65b55229f6ffed76d62f3b Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 4 Oct 2023 14:10:10 +0200 Subject: [PATCH 09/17] Change `ddof` argument name to `correction` --- skfda/exploratory/stats/_stats.py | 20 +++++++-------- skfda/misc/covariances.py | 30 +++++++++++------------ skfda/misc/scoring.py | 2 +- skfda/ml/clustering/_kmeans.py | 2 +- skfda/representation/_functional_data.py | 12 ++++----- skfda/representation/basis/_fdatabasis.py | 25 ++++++++++--------- skfda/representation/grid.py | 24 +++++++++--------- 7 files changed, 58 insertions(+), 57 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 20d2443e2..835eb93e6 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -41,22 +41,22 @@ def mean( return (X * weight).sum() -def var(X: FData, ddof: int = 1) -> FDataGrid: +def var(X: FData, correction: int = 1) -> FDataGrid: """ Compute the variance of a set of samples in a FData object. Args: X: Object containing all the set of samples whose variance is desired. - ddof: "Delta Degrees of Freedom": the divisor used in the calculation - is `N - ddof`, where `N` represents the number of elements. By - default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `1`. Returns: Variance of all the samples in the original object, as a :term:`functional data object` with just one sample. """ - return X.var(ddof=ddof) # type: ignore[no-any-return] + return X.var(correction=correction) # type: ignore[no-any-return] def gmean(X: FDataGrid) -> FDataGrid: @@ -76,7 +76,7 @@ def gmean(X: FDataGrid) -> FDataGrid: def cov( X: FData, - ddof: int = 1 + correction: int = 1, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: """ Compute the covariance. @@ -86,9 +86,9 @@ def cov( Args: X: Object containing different samples of a functional variable. - ddof: "Delta Degrees of Freedom": the divisor used in the calculation - is `N - ddof`, where `N` represents the number of elements. By - default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `1`. Returns: @@ -96,7 +96,7 @@ def cov( callable. """ - return X.cov(ddof=ddof) + return X.cov(correction=correction) def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index e74dab770..05d94f21f 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -768,27 +768,27 @@ class Empirical(Covariance): The sample covariance function is defined as . math:: - K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N\left(x_n(t) - + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the mean of the samples. :math:`N` is the number of samples, - :math:`\text{ddof}` means "Delta Degrees of Freedom" and is such that - :math:`N-\text{ddof}` is the divisor used in the calculation of the - covariance function. + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. """ _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N(x_n(t) - \bar{x}(t))" - r"(x_n(s) - \bar{x}(s))" + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" ) _parameters_str = [ ("data", "data"), ] cov_fdata: FData - ddof: int + correction: int @abc.abstractmethod def __init__(self, data: FData) -> None: @@ -815,17 +815,17 @@ class EmpiricalGrid(Empirical): """Sample covariance function for FDataGrid.""" cov_fdata: FDataGrid - ddof: int + correction: int - def __init__(self, data: FDataGrid, ddof: int = 1) -> None: + def __init__(self, data: FDataGrid, correction: int = 1) -> None: super().__init__(data=data) - self.ddof = ddof + self.correction = correction self.cov_fdata = data.copy( data_matrix=np.cov( data.data_matrix[..., 0], rowvar=False, - ddof=ddof, + ddof=correction, )[np.newaxis, ...], grid_points=[ data.grid_points[0], @@ -851,16 +851,16 @@ class EmpiricalBasis(Empirical): cov_fdata: FDataBasis coeff_matrix: NDArrayFloat - ddof: int + correction: int - def __init__(self, data: FDataBasis, ddof: int = 1) -> None: + def __init__(self, data: FDataBasis, correction: int = 1) -> None: super().__init__(data=data) - self.ddof = ddof + self.correction = correction self.coeff_matrix = np.cov( data.coefficients, rowvar=False, - ddof=ddof, + ddof=correction, ) self.cov_fdata = FDataBasis( basis=TensorBasis([data.basis, data.basis]), diff --git a/skfda/misc/scoring.py b/skfda/misc/scoring.py index 419d25c0f..7491b92ff 100644 --- a/skfda/misc/scoring.py +++ b/skfda/misc/scoring.py @@ -74,7 +74,7 @@ def _var( from ..exploratory.stats import mean, var if weights is None: - return var(x, ddof=0) + return var(x, correction=0) return mean( # type: ignore[no-any-return] np.power(x - mean(x, weights=weights), 2), diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index 9d58e068a..c433eb186 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -147,7 +147,7 @@ def _check_clustering(self, fdata: Input) -> Input: return fdata def _tolerance(self, fdata: Input) -> float: - variance = fdata.var(ddof=0) + variance = fdata.var(correction=0) mean_variance = np.mean(variance[0].data_matrix) return float(mean_variance * self.tol) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 2696e7b25..f62e8ccde 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -826,7 +826,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 1, ) -> NDArrayFloat: pass @@ -834,7 +834,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 1, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -844,7 +844,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 1, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -864,9 +864,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Points where the covariance function is evaluated. t_points: Points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `1`. Returns: Covariance function. diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 8289471d4..12608dda3 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -445,7 +445,7 @@ def sum( # noqa: WPS125 def var( self: T, eval_points: Optional[NDArrayFloat] = None, - ddof: int = 1, + correction: int = 1, ) -> T: """Compute the variance of the functional data object. @@ -460,15 +460,16 @@ def var( numpy.linspace with bounds equal to the ones defined in self.domain_range and the number of points the maximum between 501 and 10 times the number of basis. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number of - elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `1`. Returns: Variance of the original object. """ - return self.to_grid(eval_points).var(ddof=ddof).to_basis(self.basis) + return self.to_grid( + eval_points).var(correction=correction).to_basis(self.basis) @overload def cov( # noqa: WPS451 @@ -476,7 +477,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 1, ) -> NDArrayFloat: pass @@ -484,7 +485,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 1, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -493,7 +494,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 1, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -515,9 +516,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Points where the covariance function is evaluated. t_points: Points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `1`. Returns: Covariance function. @@ -525,7 +526,7 @@ def cov( # noqa: WPS320, WPS451 """ # To avoid circular imports from ...misc.covariances import EmpiricalBasis - cov_function = EmpiricalBasis(self, ddof=ddof) + cov_function = EmpiricalBasis(self, correction=correction) if s_points is None or t_points is None: return cov_function return cov_function(s_points, t_points) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 5801d3a01..f4597c7eb 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -582,13 +582,13 @@ def sum( # noqa: WPS125 sample_names=(None,), ) - def var(self: T, ddof: int = 1) -> T: + def var(self: T, correction: int = 1) -> T: """Compute the variance of a set of samples in a FDataGrid object. Args: - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number of - elements. By default `ddof` is 1. + correction: "Delta Degrees of Freedom": the divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. By default `correction` is 1. Returns: A FDataGrid object with just one sample representing the @@ -599,7 +599,7 @@ def var(self: T, ddof: int = 1) -> T: data_matrix=np.array([np.var( self.data_matrix, axis=0, - ddof=ddof, + ddof=correction, )]), sample_names=("variance",), ) @@ -610,7 +610,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 1, ) -> NDArrayFloat: pass @@ -618,7 +618,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 1, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -627,7 +627,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 1, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -643,9 +643,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Grid points where the covariance function is evaluated. t_points: Grid points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `1`. Returns: Covariance function. @@ -653,7 +653,7 @@ def cov( # noqa: WPS320, WPS451 """ # To avoid circular imports from ..misc.covariances import EmpiricalGrid - cov_function = EmpiricalGrid(self, ddof=ddof) + cov_function = EmpiricalGrid(self, correction=correction) if s_points is None or t_points is None: return cov_function return cov_function(s_points, t_points) From 025f8c021a705acf56b36ab6c05cce01a13f4b67 Mon Sep 17 00:00:00 2001 From: pcuestas Date: Wed, 4 Oct 2023 14:19:41 +0200 Subject: [PATCH 10/17] Fix trailing whitespace --- skfda/representation/basis/_fdatabasis.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 12608dda3..ae4ef0813 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -461,7 +461,7 @@ def var( self.domain_range and the number of points the maximum between 501 and 10 times the number of basis. correction: degrees of freedom adjustment. The divisor used in the - calculation is `N - correction`, where `N` represents the + calculation is `N - correction`, where `N` represents the number of elements. Default: `1`. Returns: @@ -469,7 +469,8 @@ def var( """ return self.to_grid( - eval_points).var(correction=correction).to_basis(self.basis) + eval_points, + ).var(correction=correction).to_basis(self.basis) @overload def cov( # noqa: WPS451 From d75b60ba49c874fd512b243ffd08ebf958cb327c Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 9 Oct 2023 11:41:52 +0200 Subject: [PATCH 11/17] Change simps for simpson. Extract also average_function_value. --- skfda/_utils/ndfunction/__init__.py | 0 skfda/_utils/ndfunction/_functions.py | 116 ++++++++++++++++++ skfda/exploratory/depth/_depth.py | 2 +- .../outliers/_directional_outlyingness.py | 4 +- skfda/exploratory/stats/_fisher_rao.py | 10 +- skfda/exploratory/stats/_stats.py | 2 +- skfda/misc/_math.py | 2 +- skfda/misc/metrics/_fisher_rao.py | 6 +- skfda/misc/metrics/_lp_norms.py | 2 +- .../ml/regression/_historical_linear_model.py | 2 +- skfda/preprocessing/dim_reduction/_fpca.py | 2 +- .../feature_construction/_functions.py | 73 +++-------- skfda/representation/grid.py | 2 +- 13 files changed, 153 insertions(+), 70 deletions(-) create mode 100644 skfda/_utils/ndfunction/__init__.py create mode 100644 skfda/_utils/ndfunction/_functions.py diff --git a/skfda/_utils/ndfunction/__init__.py b/skfda/_utils/ndfunction/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/skfda/_utils/ndfunction/_functions.py b/skfda/_utils/ndfunction/_functions.py new file mode 100644 index 000000000..f65747085 --- /dev/null +++ b/skfda/_utils/ndfunction/_functions.py @@ -0,0 +1,116 @@ +"""Functions for working with arrays of functions.""" +from __future__ import annotations + +import math +from typing import ( + TYPE_CHECKING, + Any, + Literal, + Protocol, + TypeVar, + runtime_checkable, +) + +from ...misc.validation import validate_domain_range +from .. import nquad_vec + +if TYPE_CHECKING: + from ...typing._numpy import NDArrayFloat + from ...representation import FData + from ...typing._base import DomainRangeLike + + +UfuncMethod = Literal[ + "__call__", + "reduce", + "reduceat", + "accumulate", + "outer", + "inner", +] + + +class _SupportsArrayUFunc(Protocol): + def __array_ufunc__( + self, + ufunc: Any, + method: UfuncMethod, + *inputs: Any, + **kwargs: Any, + ) -> Any: + pass + + +T = TypeVar("T", bound=_SupportsArrayUFunc) + + +class _UnaryUfunc(Protocol): + + def __call__(self, __arg: T) -> T: # noqa: WPS112 + pass + + +def _average_function_ufunc( + data: FData, + ufunc: _UnaryUfunc, + *, + domain: DomainRangeLike | None = None, +) -> NDArrayFloat: + + if domain is None: + domain = data.domain_range + else: + domain = validate_domain_range(domain) + + lebesgue_measure = math.prod( + ( + (iterval[1] - iterval[0]) + for iterval in domain + ), + ) + + try: + data_eval = ufunc(data) + except TypeError: + + def integrand(*args: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 + f1 = data(args)[:, 0, :] + return ufunc(f1) + + return nquad_vec( + integrand, + domain, + ) / lebesgue_measure + + else: + return data_eval.integrate(domain=domain) / lebesgue_measure + + +def average_function_value( + data: FData, + *, + domain: DomainRangeLike | None = None, +) -> NDArrayFloat: + r""" + Calculate the average function value for each function. + + This is the value that, if integrated over the whole domain of each + function, has the same integral as the function itself. + + .. math:: + \bar{x} = \frac{1}{\text{Vol}(\mathcal{T})}\int_{\mathcal{T}} x(t) dt + + Args: + data: Functions where we want to calculate the expected value. + domain: Integration domain. By default, the whole domain is used. + + Returns: + ndarray of shape (n_dimensions, n_samples) with the values of the + expectations. + + See also: + `Entry on Wikipedia + `_ + + """ + return _average_function_ufunc(data, ufunc=lambda x: x, domain=domain) diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index a0d27c2e5..dcacd8446 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -92,7 +92,7 @@ def transform(self, X: FDataGrid) -> NDArrayFloat: # noqa: D102 integrand = pointwise_depth for d, s in zip(X.domain_range, X.grid_points): - integrand = scipy.integrate.simps( + integrand = scipy.integrate.simpson( integrand, x=s, axis=1, diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py index 30e8338d3..675d149c6 100644 --- a/skfda/exploratory/outliers/_directional_outlyingness.py +++ b/skfda/exploratory/outliers/_directional_outlyingness.py @@ -213,7 +213,7 @@ def directional_outlyingness_stats( # noqa: WPS218 ) assert weighted_dir_outlyingness.shape == dir_outlyingness.shape - mean_dir_outlyingness = scipy.integrate.simps( + mean_dir_outlyingness = scipy.integrate.simpson( weighted_dir_outlyingness, fdatagrid.grid_points[0], axis=1, @@ -230,7 +230,7 @@ def directional_outlyingness_stats( # noqa: WPS218 axis=-1, )) weighted_norm = norm * pointwise_weights - variation_dir_outlyingness = scipy.integrate.simps( + variation_dir_outlyingness = scipy.integrate.simpson( weighted_norm, fdatagrid.grid_points[0], axis=1, diff --git a/skfda/exploratory/stats/_fisher_rao.py b/skfda/exploratory/stats/_fisher_rao.py index c897b13fc..5a3ffe6c9 100644 --- a/skfda/exploratory/stats/_fisher_rao.py +++ b/skfda/exploratory/stats/_fisher_rao.py @@ -133,7 +133,7 @@ def _fisher_rao_warping_mean( # Compute shooting vectors for psi_i in psi_data: - inner = scipy.integrate.simps(mu * psi_i, x=eval_points) + inner = scipy.integrate.simpson(mu * psi_i, x=eval_points) inner = max(min(inner, 1), -1) theta = np.arccos(inner) @@ -143,7 +143,7 @@ def _fisher_rao_warping_mean( # Mean of shooting vectors vmean /= warping.n_samples - v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean))) + v_norm = np.sqrt(scipy.integrate.simpson(np.square(vmean))) # Convergence criterion if v_norm < tol: @@ -266,7 +266,7 @@ def fisher_rao_karcher_mean( # Initialize with function closest to the L2 mean with the L2 distance centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T - distances = scipy.integrate.simps( + distances = scipy.integrate.simpson( np.square(centered, out=centered), eval_points_normalized, axis=1, @@ -304,14 +304,14 @@ def fisher_rao_karcher_mean( # Convergence criterion mu_norm = np.sqrt( - scipy.integrate.simps( + scipy.integrate.simpson( np.square(mu, out=mu_aux), eval_points_normalized, ), ) mu_diff = np.sqrt( - scipy.integrate.simps( + scipy.integrate.simpson( np.square(mu - mu_1, out=mu_aux), eval_points_normalized, ), diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 835eb93e6..9acaefeb7 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -124,7 +124,7 @@ def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: integrand = num_functions_above for d, s in zip(X.domain_range, X.grid_points): - integrand = integrate.simps( + integrand = integrate.simpson( integrand, x=s, axis=1, diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 4e02de2f8..94b440504 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -363,7 +363,7 @@ def _inner_product_fdatagrid( # Perform quadrature inside the einsum for i, s in enumerate(arg1.grid_points[::-1]): identity = np.eye(len(s)) - weights = scipy.integrate.simps(identity, x=s) + weights = scipy.integrate.simpson(identity, x=s) index = (slice(None),) + (np.newaxis,) * (i + 1) d1 *= weights[index] diff --git a/skfda/misc/metrics/_fisher_rao.py b/skfda/misc/metrics/_fisher_rao.py index 7fd18e4da..bfc4e2f89 100644 --- a/skfda/misc/metrics/_fisher_rao.py +++ b/skfda/misc/metrics/_fisher_rao.py @@ -234,7 +234,7 @@ def fisher_rao_amplitude_distance( penalty = np.sqrt(penalty, out=penalty) penalty -= 1 penalty = np.square(penalty, out=penalty) - penalty = scipy.integrate.simps(penalty, x=eval_points_normalized) + penalty = scipy.integrate.simpson(penalty, x=eval_points_normalized) distance = np.sqrt(distance**2 + lam * penalty) @@ -322,7 +322,7 @@ def fisher_rao_phase_distance( derivative_warping = np.sqrt(derivative_warping, out=derivative_warping) - d = scipy.integrate.simps(derivative_warping, x=eval_points_normalized) + d = scipy.integrate.simpson(derivative_warping, x=eval_points_normalized) d = np.clip(d, -1, 1) return np.arccos(d) # type: ignore[no-any-return] @@ -394,7 +394,7 @@ def _fisher_rao_warping_distance( product = np.multiply(srsf_warping1, srsf_warping2, out=srsf_warping1) - d = scipy.integrate.simps(product, x=warping1.grid_points[0]) + d = scipy.integrate.simpson(product, x=warping1.grid_points[0]) d = np.clip(d, -1, 1) return np.arccos(d) # type: ignore[no-any-return] diff --git a/skfda/misc/metrics/_lp_norms.py b/skfda/misc/metrics/_lp_norms.py index 2a6c2f9a6..e0cd2904f 100644 --- a/skfda/misc/metrics/_lp_norms.py +++ b/skfda/misc/metrics/_lp_norms.py @@ -161,7 +161,7 @@ def __call__(self, vector: Union[NDArrayFloat, FData]) -> NDArrayFloat: # Computes the norm, approximating the integral with Simpson's # rule. - res = scipy.integrate.simps( + res = scipy.integrate.simpson( data_matrix[..., 0] ** self.p, x=vector.grid_points, ) ** (1 / self.p) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index de5dc38bf..21377c77c 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -42,7 +42,7 @@ def _pairwise_fem_inner_product( eval_fd = fd(grid) prod = eval_fem * eval_fd - integral = scipy.integrate.simps(prod, grid, axis=1) + integral = scipy.integrate.simpson(prod, grid, axis=1) return np.sum(integral, axis=-1) # type: ignore[no-any-return] diff --git a/skfda/preprocessing/dim_reduction/_fpca.py b/skfda/preprocessing/dim_reduction/_fpca.py index f5c88a54a..9110f31c0 100644 --- a/skfda/preprocessing/dim_reduction/_fpca.py +++ b/skfda/preprocessing/dim_reduction/_fpca.py @@ -348,7 +348,7 @@ def _fit_grid( if self._weights is None: # grid_points is a list with one array in the 1D case identity = np.eye(len(X.grid_points[0])) - self._weights = scipy.integrate.simps(identity, X.grid_points[0]) + self._weights = scipy.integrate.simpson(identity, X.grid_points[0]) elif callable(self._weights): self._weights = self._weights(X.grid_points[0]) # if its a FDataGrid then we need to reduce the dimension to 1-D diff --git a/skfda/preprocessing/feature_construction/_functions.py b/skfda/preprocessing/feature_construction/_functions.py index 3359a77af..e426afaa4 100644 --- a/skfda/preprocessing/feature_construction/_functions.py +++ b/skfda/preprocessing/feature_construction/_functions.py @@ -3,13 +3,16 @@ from __future__ import annotations import itertools -from typing import Optional, Sequence, Tuple, TypeVar, Union, cast, overload +from typing import Optional, Sequence, Tuple, TypeVar, Union, cast import numpy as np -from typing_extensions import Literal, Protocol, TypeGuard +from typing_extensions import Literal, TypeGuard -from ..._utils import nquad_vec -from ...misc.validation import check_fdata_dimensions, validate_domain_range +from ..._utils.ndfunction._functions import ( + _average_function_ufunc, + _UnaryUfunc, +) +from ...misc.validation import check_fdata_dimensions from ...representation import FData, FDataBasis, FDataGrid from ...typing._base import DomainRangeLike from ...typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt @@ -17,20 +20,6 @@ T = TypeVar("T", bound=Union[NDArrayFloat, FDataGrid]) -class _BasicUfuncProtocol(Protocol): - - @overload - def __call__(self, __arg: FDataGrid) -> FDataGrid: # noqa: WPS112 - pass - - @overload - def __call__(self, __arg: NDArrayFloat) -> NDArrayFloat: # noqa: WPS112 - pass - - def __call__(self, __arg: T) -> T: # noqa: WPS112 - pass - - def _sequence_of_ints(data: Sequence[object]) -> TypeGuard[Sequence[int]]: """Check that every element is an integer.""" return all(isinstance(d, int) for d in data) @@ -332,7 +321,7 @@ def number_crossings( >>> from skfda.preprocessing.feature_construction import ( ... number_crossings, - ... ) + ... ) >>> from scipy.special import jv >>> import numpy as np >>> x_grid = np.linspace(0, 14, 14) @@ -521,7 +510,7 @@ def unconditional_moment( def unconditional_expected_value( data: FData, - function: _BasicUfuncProtocol, + function: _UnaryUfunc, *, domain: DomainRangeLike | None = None, ) -> NDArrayFloat: @@ -537,19 +526,19 @@ def unconditional_expected_value( f_p(x(t))=\frac{1}{\left( b-a\right)}\int_a^b g \left(x_p(t)\right) dt - Args: - data: FDataGrid or FDataBasis where we want to calculate - the expected value. - function: function that specifies how the expected value to is - calculated. It has to be a function of X(t). - domain: Integration domain. By default, the whole domain is used. + Args: + data: FDataGrid or FDataBasis where we want to calculate + the expected value. + function: function that specifies how the expected value to is + calculated. It has to be a function of X(t). + domain: Integration domain. By default, the whole domain is used. - Returns: - ndarray of shape (n_dimensions, n_samples) with the values of the - expectations. + Returns: + ndarray of shape (n_dimensions, n_samples) with the values of the + expectations. Examples: - We will use this funtion to calculate the logarithmic first moment + We will use this function to calculate the logarithmic first moment of the first 5 samples of the Berkeley Growth dataset. We will start by importing it. @@ -574,26 +563,4 @@ def unconditional_expected_value( [ 4.84]]) """ - if domain is None: - domain = data.domain_range - else: - domain = validate_domain_range(domain) - - lebesgue_measure = np.prod( - [ - (iterval[1] - iterval[0]) - for iterval in domain - ], - ) - - if isinstance(data, FDataGrid): - return function(data).integrate(domain=domain) / lebesgue_measure - - def integrand(*args: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 - f1 = data(args)[:, 0, :] - return function(f1) - - return nquad_vec( - integrand, - domain, - ) / lebesgue_measure + return _average_function_ufunc(data, ufunc=function, domain=domain) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index f4597c7eb..fc820c935 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -515,7 +515,7 @@ def integrate( integrand = data.data_matrix for g in data.grid_points[::-1]: - integrand = scipy.integrate.simps( + integrand = scipy.integrate.simpson( integrand, x=g, axis=-2, From cc486a34af34f62348a0e45da2eefac741f1134f Mon Sep 17 00:00:00 2001 From: VNMabus Date: Mon, 9 Oct 2023 14:31:03 +0200 Subject: [PATCH 12/17] Remove some direct uses of Simpson quadrature. --- skfda/_utils/ndfunction/__init__.py | 18 +++++ skfda/_utils/ndfunction/_functions.py | 9 +-- skfda/exploratory/depth/_depth.py | 22 ++---- .../outliers/_directional_outlyingness.py | 69 +++++++++++-------- skfda/representation/grid.py | 5 +- 5 files changed, 65 insertions(+), 58 deletions(-) diff --git a/skfda/_utils/ndfunction/__init__.py b/skfda/_utils/ndfunction/__init__.py index e69de29bb..aa0d1fe33 100644 --- a/skfda/_utils/ndfunction/__init__.py +++ b/skfda/_utils/ndfunction/__init__.py @@ -0,0 +1,18 @@ +from typing import TYPE_CHECKING + +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach( + __name__, + submod_attrs={ + "_functions": [ + "average_function_value", + ], + }, +) + +if TYPE_CHECKING: + + from ._functions import ( + average_function_value as average_function_value, + ) diff --git a/skfda/_utils/ndfunction/_functions.py b/skfda/_utils/ndfunction/_functions.py index f65747085..8f7c9b980 100644 --- a/skfda/_utils/ndfunction/_functions.py +++ b/skfda/_utils/ndfunction/_functions.py @@ -2,14 +2,7 @@ from __future__ import annotations import math -from typing import ( - TYPE_CHECKING, - Any, - Literal, - Protocol, - TypeVar, - runtime_checkable, -) +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar from ...misc.validation import validate_domain_range from .. import nquad_vec diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index dcacd8446..68e51dee8 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -11,9 +11,9 @@ from typing import TypeVar import numpy as np -import scipy.integrate from ..._utils._sklearn_adapter import BaseEstimator +from ..._utils.ndfunction import average_function_value from ...misc.metrics import l2_distance from ...misc.metrics._utils import _fit_metric from ...representation import FData, FDataGrid @@ -82,25 +82,11 @@ def fit( # noqa: D102 def transform(self, X: FDataGrid) -> NDArrayFloat: # noqa: D102 - pointwise_depth = self.multivariate_depth_.transform(X.data_matrix) - - interval_len = ( - self._domain_range[0][1] - - self._domain_range[0][0] + pointwise_depth = X.copy( + data_matrix=self.multivariate_depth_.transform(X.data_matrix), ) - integrand = pointwise_depth - - for d, s in zip(X.domain_range, X.grid_points): - integrand = scipy.integrate.simpson( - integrand, - x=s, - axis=1, - ) - interval_len = d[1] - d[0] - integrand /= interval_len - - return integrand + return average_function_value(pointwise_depth).ravel() @property def max(self) -> float: diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py index 675d149c6..91ac8ee67 100644 --- a/skfda/exploratory/outliers/_directional_outlyingness.py +++ b/skfda/exploratory/outliers/_directional_outlyingness.py @@ -98,7 +98,7 @@ def directional_outlyingness_stats( # noqa: WPS218 Method used to order the data. Defaults to :func:`projection depth `. pointwise_weights (array_like, optional): an array containing the - weights of each point of discretisation where values have been + weights of each point of discretization where values have been recorded. Defaults to the same weight for each of the points: 1/len(interval). @@ -183,58 +183,67 @@ def directional_outlyingness_stats( # noqa: WPS218 fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0] ) + if not isinstance(pointwise_weights, FDataGrid): + pointwise_weights = fdatagrid.copy( + data_matrix=pointwise_weights, + sample_names=(None,), + ) + depth_pointwise = multivariate_depth(fdatagrid.data_matrix) assert depth_pointwise.shape == fdatagrid.data_matrix.shape[:-1] + depth_pointwise_fd = fdatagrid.copy( + data_matrix=depth_pointwise, + ) + # Obtaining the pointwise median sample Z, to calculate # v(t) = {X(t) − Z(t)}/|| X(t) − Z(t) || median_index = np.argmax(depth_pointwise, axis=0) - pointwise_median = fdatagrid.data_matrix[ - median_index, - range(fdatagrid.data_matrix.shape[1]), - ] - assert pointwise_median.shape == fdatagrid.data_matrix.shape[1:] - v = fdatagrid.data_matrix - pointwise_median - assert v.shape == fdatagrid.data_matrix.shape - v_norm = la.norm(v, axis=-1, keepdims=True) + pointwise_median = fdatagrid.copy( + data_matrix=fdatagrid.data_matrix[ + median_index, + range(fdatagrid.data_matrix.shape[1]), + ][np.newaxis], + sample_names=(None,), + ) + + v = fdatagrid - pointwise_median + v_norm = la.norm(v.data_matrix, axis=-1, keepdims=True) # To avoid ZeroDivisionError, the zeros are substituted by ones (the # reference implementation also does this). v_norm[np.where(v_norm == 0)] = 1 - v_unitary = v / v_norm + v.data_matrix /= v_norm - # Calculation directinal outlyingness - dir_outlyingness = (1 / depth_pointwise[..., np.newaxis] - 1) * v_unitary + # Calculation directional outlyingness + dir_outlyingness = ( + 1 / depth_pointwise_fd - 1 + ) * v # Calculation mean directional outlyingness weighted_dir_outlyingness = ( - dir_outlyingness * pointwise_weights[:, np.newaxis] + dir_outlyingness * pointwise_weights ) - assert weighted_dir_outlyingness.shape == dir_outlyingness.shape - mean_dir_outlyingness = scipy.integrate.simpson( - weighted_dir_outlyingness, - fdatagrid.grid_points[0], - axis=1, - ) + mean_dir_outlyingness = weighted_dir_outlyingness.integrate() assert mean_dir_outlyingness.shape == ( fdatagrid.n_samples, fdatagrid.dim_codomain, ) # Calculation variation directional outlyingness - norm = np.square(la.norm( - dir_outlyingness - - mean_dir_outlyingness[:, np.newaxis, :], - axis=-1, - )) - weighted_norm = norm * pointwise_weights - variation_dir_outlyingness = scipy.integrate.simpson( - weighted_norm, - fdatagrid.grid_points[0], - axis=1, + norm = dir_outlyingness.copy( + data_matrix=np.square( + la.norm( + dir_outlyingness.data_matrix + - mean_dir_outlyingness[:, np.newaxis, :], + axis=-1, + ) + ) ) + weighted_norm = norm * pointwise_weights + variation_dir_outlyingness = weighted_norm.integrate().ravel() assert variation_dir_outlyingness.shape == (fdatagrid.n_samples,) functional_dir_outlyingness = ( @@ -244,7 +253,7 @@ def directional_outlyingness_stats( # noqa: WPS218 assert functional_dir_outlyingness.shape == (fdatagrid.n_samples,) return DirectionalOutlyingnessStats( - directional_outlyingness=dir_outlyingness, + directional_outlyingness=dir_outlyingness.data_matrix, functional_directional_outlyingness=functional_dir_outlyingness, mean_directional_outlyingness=mean_dir_outlyingness, variation_directional_outlyingness=variation_dir_outlyingness, diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index fc820c935..9aae66ac8 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -734,8 +734,9 @@ def _get_op_matrix( return other[other_index] raise ValueError( - f"Invalid dimensions in operator between FDataGrid and Numpy " - f"array: {other.shape}" + f"Invalid dimensions in operator between " + f"FDataGrid (data_matrix.shape={self.data_matrix.shape}) " + f"and Numpy array (shape={other.shape})", ) elif isinstance(other, FDataGrid): From ebab51dbf57f3f76c0df615d830868425366f425 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 10 Oct 2023 10:21:12 +0200 Subject: [PATCH 13/17] Support Lp norms for surfaces. Remove more Simpson quadrature calls. --- skfda/exploratory/stats/_stats.py | 39 ++++++++++---------------- skfda/misc/metrics/_lp_norms.py | 46 ++++++++++++++++--------------- skfda/tests/test_metrics.py | 9 +++--- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 9acaefeb7..bc40b518c 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -8,6 +8,8 @@ from scipy import integrate from scipy.stats import rankdata +from skfda._utils.ndfunction import average_function_value + from ...misc.metrics._lp_distances import l2_distance from ...representation import FData, FDataGrid from ...typing._metric import Metric @@ -108,33 +110,20 @@ def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: with all the other curves of our dataset. """ - interval_len = ( - X.domain_range[0][1] - - X.domain_range[0][0] - ) - - # Array containing at each point the number of curves + # Functions containing at each point the number of curves # are above it. - num_functions_above: NDArrayFloat = rankdata( - -X.data_matrix, - method='max', - axis=0, - ) - 1 - - integrand = num_functions_above - - for d, s in zip(X.domain_range, X.grid_points): - integrand = integrate.simpson( - integrand, - x=s, - axis=1, - ) - interval_len = d[1] - d[0] - integrand /= interval_len - - integrand /= X.n_samples + num_functions_above = X.copy( + data_matrix=rankdata( + -X.data_matrix, + method='max', + axis=0, + ) - 1, + ) - return integrand.flatten() + return ( + average_function_value(num_functions_above) + / num_functions_above.n_samples + ).ravel() def depth_based_median( diff --git a/skfda/misc/metrics/_lp_norms.py b/skfda/misc/metrics/_lp_norms.py index e0cd2904f..0465dc005 100644 --- a/skfda/misc/metrics/_lp_norms.py +++ b/skfda/misc/metrics/_lp_norms.py @@ -7,7 +7,7 @@ import scipy.integrate from typing_extensions import Final -from ...representation import FData, FDataBasis +from ...representation import FData, FDataBasis, FDataGrid from ...typing._metric import Norm from ...typing._numpy import NDArrayFloat @@ -135,20 +135,21 @@ def __call__(self, vector: Union[NDArrayFloat, FData]) -> NDArrayFloat: ) res = np.sqrt(integral[0]).flatten() - else: + elif isinstance(vector, FDataGrid): data_matrix = vector.data_matrix - original_shape = data_matrix.shape - data_matrix = data_matrix.reshape(-1, original_shape[-1]) - data_matrix = (np.linalg.norm( - vector.data_matrix, - ord=vector_norm, - axis=-1, - keepdims=True, - ) if isinstance(vector_norm, (float, int)) - else vector_norm(data_matrix) - ) - data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) + if isinstance(vector_norm, (float, int)): + data_matrix = np.linalg.norm( + vector.data_matrix, + ord=vector_norm, + axis=-1, + keepdims=True, + ) + else: + original_shape = data_matrix.shape + data_matrix = data_matrix.reshape(-1, original_shape[-1]) + data_matrix = vector_norm(data_matrix) + data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) if np.isinf(self.p): @@ -157,18 +158,19 @@ def __call__(self, vector: Union[NDArrayFloat, FData]) -> NDArrayFloat: axis=tuple(range(1, data_matrix.ndim)), ) - elif vector.dim_domain == 1: + else: + integrand = vector.copy( + data_matrix=data_matrix ** self.p, + coordinate_names=(None,), + ) # Computes the norm, approximating the integral with Simpson's # rule. - res = scipy.integrate.simpson( - data_matrix[..., 0] ** self.p, - x=vector.grid_points, - ) ** (1 / self.p) - - else: - # Needed to perform surface integration - return NotImplemented + res = integrand.integrate().ravel() ** (1 / self.p) + else: + raise NotImplementedError( + f"LpNorm not implemented for type {type(vector)}", + ) if len(res) == 1: return res[0] # type: ignore[no-any-return] diff --git a/skfda/tests/test_metrics.py b/skfda/tests/test_metrics.py index 4e4746901..440806379 100644 --- a/skfda/tests/test_metrics.py +++ b/skfda/tests/test_metrics.py @@ -89,10 +89,11 @@ def test_lp_norm_surface_inf(self) -> None: ) def test_lp_norm_surface(self) -> None: - """Test that integration of surfaces has not been implemented.""" - # Integration of surfaces not implemented, add test case after - # implementation - self.assertEqual(lp_norm(self.fd_surface, p=1), NotImplemented) + """Test the integration of surfaces.""" + np.testing.assert_allclose( + lp_norm(self.fd_surface, p=1), + [0.12566256, 0.12563755, 0.12566133], + ) def test_lp_error_dimensions(self) -> None: """Test error on metric between different kind of objects.""" From 2f43c9121bfd547e83eca04f35af13e05bb62f43 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 10 Oct 2023 11:58:22 +0200 Subject: [PATCH 14/17] Fix Mypy errors in hat matrix. --- skfda/misc/hat_matrix.py | 41 +++++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 19 deletions(-) diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index 6deae4e03..81694bd47 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -11,20 +11,21 @@ import abc import math -from typing import Callable, TypeVar, Union, overload +from typing import Callable, Final, TypeVar, Union, overload import numpy as np from .._utils._sklearn_adapter import BaseEstimator from ..representation._functional_data import FData from ..representation.basis import FDataBasis -from ..typing._base import GridPointsLike from ..typing._numpy import NDArrayFloat from . import kernels -Input = TypeVar("Input", bound=Union[FData, GridPointsLike]) +Input = TypeVar("Input", bound=Union[FData, NDArrayFloat]) Prediction = TypeVar("Prediction", bound=Union[NDArrayFloat, FData]) +DEFAULT_BANDWIDTH_PERCENTILE: Final = 15 + class HatMatrix( BaseEstimator, @@ -52,8 +53,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -65,8 +66,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: Prediction | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -77,8 +78,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: NDArrayFloat | FData | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -186,7 +187,7 @@ def _hat_matrix_function_not_normalized( ) -> NDArrayFloat: bandwidth = ( - np.percentile(delta_x, 15) + float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE)) if self.bandwidth is None else self.bandwidth ) @@ -278,8 +279,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: FData | NDArrayFloat | None = None, - X: FData | NDArrayFloat | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -291,8 +292,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: FData | GridPointsLike | None = None, - X: FData | GridPointsLike | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: Prediction | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -303,19 +304,19 @@ def __call__( # noqa: D102 self, *, delta_x: NDArrayFloat, - X_train: FData | GridPointsLike | None = None, - X: FData | GridPointsLike | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: NDArrayFloat | FData | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, ) -> NDArrayFloat | FData: bandwidth = ( - np.percentile(delta_x, 15) + float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE)) if self.bandwidth is None else self.bandwidth ) - + # Smoothing for functions of one variable if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1: delta_x = np.subtract.outer( @@ -323,6 +324,8 @@ def __call__( # noqa: D102 X_train.flatten(), ) + assert y_train is None + return super().__call__( # noqa: WPS503 delta_x=delta_x, X_train=X_train, @@ -341,7 +344,7 @@ def __call__( # noqa: D102 and isinstance(X, FDataBasis) ): raise ValueError("Only FDataBasis is supported for now.") - + m1 = X_train.coefficients m2 = X.coefficients From e6ce50550b3857e4d6d11a89f7fe2e0a4c85ac81 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Tue, 10 Oct 2023 12:46:41 +0200 Subject: [PATCH 15/17] Add test. --- skfda/preprocessing/smoothing/_linear.py | 1 + skfda/tests/test_smoothing.py | 34 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 99368b1cf..09c6fadcc 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -28,6 +28,7 @@ class _LinearSmoother( ``hat_matrix`` to define the smoothing or 'hat' matrix. """ + input_points_: GridPoints output_points_: GridPoints diff --git a/skfda/tests/test_smoothing.py b/skfda/tests/test_smoothing.py index a76825640..315d2b3a5 100644 --- a/skfda/tests/test_smoothing.py +++ b/skfda/tests/test_smoothing.py @@ -4,6 +4,7 @@ import numpy as np import sklearn +from sklearn.datasets import load_digits from typing_extensions import Literal import skfda @@ -173,6 +174,39 @@ def test_knn(self) -> None: np.testing.assert_allclose(hat_matrix, hat_matrix_r) +class TestSurfaceSmoother(unittest.TestCase): + """Test that smoothing works on surfaces.""" + + def test_knn(self) -> None: + """Check 2D smoothing using digits dataset.""" + X, y = load_digits(return_X_y=True) + X = X.reshape(-1, 8, 8) + + fd = skfda.FDataGrid(X) + + ks = KernelSmoother( + kernel_estimator=KNeighborsHatMatrix(n_neighbors=1)) + fd_trans = ks.fit_transform(fd) + + np.testing.assert_allclose(fd_trans.data_matrix, fd.data_matrix) + + ks = KernelSmoother( + kernel_estimator=KNeighborsHatMatrix(n_neighbors=3)) + fd_trans = ks.fit_transform(fd) + + res = [ + [0, 1.25, 7.75, 10.5, 8.25, 6.25, 1.5, 0], + [0, 3.2, 9.6, 10.6, 9.8, 8.4, 5.6, 1.25], + [0.75, 4.4, 9, 6.4, 4.6, 8.4, 6.4, 2], + [1, 4.8, 7.8, 2.8, 1.6, 7.2, 6.4, 2], + [1.25, 4.2, 7.2, 1.6, 2, 7.4, 6.4, 2], + [1, 4.4, 7.4, 3.4, 4.6, 8.2, 5.4, 1.75], + [0.5, 4, 7.6, 8.4, 7.6, 6.8, 3.8, 0], + [0, 2, 8.25, 8.5, 8.25, 5.5, 0, 0], + ] + np.testing.assert_allclose(fd_trans.data_matrix[0, ..., 0], res) + + class TestBasisSmoother(unittest.TestCase): """Test Basis Smoother.""" From 7b0c0b7b134cac3e0cd30ddf2d459dcc8cc06689 Mon Sep 17 00:00:00 2001 From: VNMabus Date: Wed, 11 Oct 2023 12:49:19 +0200 Subject: [PATCH 16/17] Reduce test taking too long. --- skfda/tests/test_recursive_maxima_hunting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/skfda/tests/test_recursive_maxima_hunting.py b/skfda/tests/test_recursive_maxima_hunting.py index 9a435b8f9..389e095ac 100644 --- a/skfda/tests/test_recursive_maxima_hunting.py +++ b/skfda/tests/test_recursive_maxima_hunting.py @@ -82,7 +82,7 @@ def test_fit_exponential(self) -> None: join. """ - n_samples = 10000 + n_samples = 1000 n_features = 100 def mean_1( # noqa: WPS430 From 93fdf3d0c7efa6d94cfbdc74b3c45cdfab4fbebe Mon Sep 17 00:00:00 2001 From: pcuestas Date: Thu, 12 Oct 2023 10:15:57 +0200 Subject: [PATCH 17/17] Set `correction=0` by default (and explicitly set the correction value where it was not set, to avoid changing the previous behavior of the modules) --- skfda/exploratory/stats/_stats.py | 8 ++++---- skfda/inference/anova/_anova_oneway.py | 2 ++ skfda/inference/hotelling/_hotelling.py | 2 ++ skfda/misc/covariances.py | 4 ++-- skfda/representation/_functional_data.py | 8 ++++---- skfda/representation/basis/_fdatabasis.py | 12 ++++++------ skfda/representation/grid.py | 16 ++++++++-------- 7 files changed, 28 insertions(+), 24 deletions(-) diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index bc40b518c..8d5b2478c 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -43,7 +43,7 @@ def mean( return (X * weight).sum() -def var(X: FData, correction: int = 1) -> FDataGrid: +def var(X: FData, correction: int = 0) -> FDataGrid: """ Compute the variance of a set of samples in a FData object. @@ -51,7 +51,7 @@ def var(X: FData, correction: int = 1) -> FDataGrid: X: Object containing all the set of samples whose variance is desired. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the number of - elements. Default: `1`. + elements. Default: `0`. Returns: Variance of all the samples in the original object, as a @@ -78,7 +78,7 @@ def gmean(X: FDataGrid) -> FDataGrid: def cov( X: FData, - correction: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: """ Compute the covariance. @@ -90,7 +90,7 @@ def cov( X: Object containing different samples of a functional variable. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the number of - elements. Default: `1`. + elements. Default: `0`. Returns: diff --git a/skfda/inference/anova/_anova_oneway.py b/skfda/inference/anova/_anova_oneway.py index feeb18a17..9b4752c20 100644 --- a/skfda/inference/anova/_anova_oneway.py +++ b/skfda/inference/anova/_anova_oneway.py @@ -220,6 +220,7 @@ def _anova_bootstrap( cov_est = concatenate(fd_grouped).cov( grid_points, grid_points, + correction=1, ) k_est = [cov_est] * len(fd_grouped) else: @@ -228,6 +229,7 @@ def _anova_bootstrap( fdg.cov( grid_points, grid_points, + correction=1, ) for fdg in fd_grouped ] diff --git a/skfda/inference/hotelling/_hotelling.py b/skfda/inference/hotelling/_hotelling.py index 9f07e4944..f0aba205e 100644 --- a/skfda/inference/hotelling/_hotelling.py +++ b/skfda/inference/hotelling/_hotelling.py @@ -103,10 +103,12 @@ def hotelling_t2( k1 = fd1.cov( fd1.grid_points[0], fd1.grid_points[0], + correction=1, ) k2 = fd2.cov( fd2.grid_points[0], fd2.grid_points[0], + correction=1, ) m = m.reshape((-1, 1)) # Reshaping the mean for a proper matrix product diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index 05d94f21f..43635eb02 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -817,7 +817,7 @@ class EmpiricalGrid(Empirical): cov_fdata: FDataGrid correction: int - def __init__(self, data: FDataGrid, correction: int = 1) -> None: + def __init__(self, data: FDataGrid, correction: int = 0) -> None: super().__init__(data=data) self.correction = correction @@ -853,7 +853,7 @@ class EmpiricalBasis(Empirical): coeff_matrix: NDArrayFloat correction: int - def __init__(self, data: FDataBasis, correction: int = 1) -> None: + def __init__(self, data: FDataBasis, correction: int = 0) -> None: super().__init__(data=data) self.correction = correction diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index f62e8ccde..ee8813bbc 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -826,7 +826,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - correction: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -834,7 +834,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - correction: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -844,7 +844,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - correction: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -866,7 +866,7 @@ def cov( # noqa: WPS320, WPS451 t_points: Points where the covariance function is evaluated. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the - number of elements. Default: `1`. + number of elements. Default: `0`. Returns: Covariance function. diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index ae4ef0813..945dda05d 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -445,7 +445,7 @@ def sum( # noqa: WPS125 def var( self: T, eval_points: Optional[NDArrayFloat] = None, - correction: int = 1, + correction: int = 0, ) -> T: """Compute the variance of the functional data object. @@ -462,7 +462,7 @@ def var( between 501 and 10 times the number of basis. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the - number of elements. Default: `1`. + number of elements. Default: `0`. Returns: Variance of the original object. @@ -478,7 +478,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - correction: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -486,7 +486,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - correction: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -495,7 +495,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - correction: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -519,7 +519,7 @@ def cov( # noqa: WPS320, WPS451 t_points: Points where the covariance function is evaluated. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the - number of elements. Default: `1`. + number of elements. Default: `0`. Returns: Covariance function. diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 9aae66ac8..bdd01c3e4 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -582,13 +582,13 @@ def sum( # noqa: WPS125 sample_names=(None,), ) - def var(self: T, correction: int = 1) -> T: + def var(self: T, correction: int = 0) -> T: """Compute the variance of a set of samples in a FDataGrid object. Args: - correction: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - correction`, where `N` represents the number of - elements. By default `correction` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: A FDataGrid object with just one sample representing the @@ -610,7 +610,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - correction: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -618,7 +618,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - correction: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -627,7 +627,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - correction: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -645,7 +645,7 @@ def cov( # noqa: WPS320, WPS451 t_points: Grid points where the covariance function is evaluated. correction: degrees of freedom adjustment. The divisor used in the calculation is `N - correction`, where `N` represents the - number of elements. Default: `1`. + number of elements. Default: `0`. Returns: Covariance function.