diff --git a/docs/index.rst b/docs/index.rst index 55cbe94dc..b5ee3c933 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -34,7 +34,7 @@ Github you can find more information related to the development of the package. :caption: More documentation apilist - glossary + Glossary contributors An exhaustive list of all the contents of the package can be found in the diff --git a/docs/refs.bib b/docs/refs.bib index cb1255044..c0aedacb8 100644 --- a/docs/refs.bib +++ b/docs/refs.bib @@ -364,6 +364,21 @@ @article{pini++_2018_hotelling keywords = {Functional data,High-dimensional data Hotelling’s,Hilbert space,Nonparametric inference,Permutation test} } +@incollection{pintado+romo_2005_depthbased, + title = {Depth-Based Classification for Functional Data}, + shorttitle = {Data {{Depth}}}, + booktitle = {Data {{Depth}}: {{Robust Multivariate Analysis}}, {{Computational Geometry}} and {{Applications}}}, + author = {Pintado, Sara and Romo, Juan}, + year = {2005}, + month = nov, + series = {{{DIMACS Series}} in {{Discrete Mathematics}} and {{Theoretical Computer Science}}}, + volume = {72}, + pages = {103--119}, + publisher = {{American Mathematical Society}}, + doi = {10.1090/dimacs/072/08}, + isbn = {978-0-8218-3596-8} +} + @inbook{ramsay+silverman_2005_functionala, title = {From Functional Data to Smooth Functions}, booktitle = {Functional Data Analysis}, diff --git a/skfda/__init__.py b/skfda/__init__.py index 1a8f41021..f9434fa97 100644 --- a/skfda/__init__.py +++ b/skfda/__init__.py @@ -30,4 +30,4 @@ concatenate as concatenate, ) -__version__ = "0.8.1" +__version__ = "0.9.dev1" diff --git a/skfda/_utils/ndfunction/__init__.py b/skfda/_utils/ndfunction/__init__.py new file mode 100644 index 000000000..aa0d1fe33 --- /dev/null +++ b/skfda/_utils/ndfunction/__init__.py @@ -0,0 +1,18 @@ +from typing import TYPE_CHECKING + +import lazy_loader as lazy + +__getattr__, __dir__, __all__ = lazy.attach( + __name__, + submod_attrs={ + "_functions": [ + "average_function_value", + ], + }, +) + +if TYPE_CHECKING: + + from ._functions import ( + average_function_value as average_function_value, + ) diff --git a/skfda/_utils/ndfunction/_functions.py b/skfda/_utils/ndfunction/_functions.py new file mode 100644 index 000000000..8f7c9b980 --- /dev/null +++ b/skfda/_utils/ndfunction/_functions.py @@ -0,0 +1,109 @@ +"""Functions for working with arrays of functions.""" +from __future__ import annotations + +import math +from typing import TYPE_CHECKING, Any, Literal, Protocol, TypeVar + +from ...misc.validation import validate_domain_range +from .. import nquad_vec + +if TYPE_CHECKING: + from ...typing._numpy import NDArrayFloat + from ...representation import FData + from ...typing._base import DomainRangeLike + + +UfuncMethod = Literal[ + "__call__", + "reduce", + "reduceat", + "accumulate", + "outer", + "inner", +] + + +class _SupportsArrayUFunc(Protocol): + def __array_ufunc__( + self, + ufunc: Any, + method: UfuncMethod, + *inputs: Any, + **kwargs: Any, + ) -> Any: + pass + + +T = TypeVar("T", bound=_SupportsArrayUFunc) + + +class _UnaryUfunc(Protocol): + + def __call__(self, __arg: T) -> T: # noqa: WPS112 + pass + + +def _average_function_ufunc( + data: FData, + ufunc: _UnaryUfunc, + *, + domain: DomainRangeLike | None = None, +) -> NDArrayFloat: + + if domain is None: + domain = data.domain_range + else: + domain = validate_domain_range(domain) + + lebesgue_measure = math.prod( + ( + (iterval[1] - iterval[0]) + for iterval in domain + ), + ) + + try: + data_eval = ufunc(data) + except TypeError: + + def integrand(*args: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 + f1 = data(args)[:, 0, :] + return ufunc(f1) + + return nquad_vec( + integrand, + domain, + ) / lebesgue_measure + + else: + return data_eval.integrate(domain=domain) / lebesgue_measure + + +def average_function_value( + data: FData, + *, + domain: DomainRangeLike | None = None, +) -> NDArrayFloat: + r""" + Calculate the average function value for each function. + + This is the value that, if integrated over the whole domain of each + function, has the same integral as the function itself. + + .. math:: + \bar{x} = \frac{1}{\text{Vol}(\mathcal{T})}\int_{\mathcal{T}} x(t) dt + + Args: + data: Functions where we want to calculate the expected value. + domain: Integration domain. By default, the whole domain is used. + + Returns: + ndarray of shape (n_dimensions, n_samples) with the values of the + expectations. + + See also: + `Entry on Wikipedia + `_ + + """ + return _average_function_ufunc(data, ufunc=lambda x: x, domain=domain) diff --git a/skfda/exploratory/depth/_depth.py b/skfda/exploratory/depth/_depth.py index a0d27c2e5..68e51dee8 100644 --- a/skfda/exploratory/depth/_depth.py +++ b/skfda/exploratory/depth/_depth.py @@ -11,9 +11,9 @@ from typing import TypeVar import numpy as np -import scipy.integrate from ..._utils._sklearn_adapter import BaseEstimator +from ..._utils.ndfunction import average_function_value from ...misc.metrics import l2_distance from ...misc.metrics._utils import _fit_metric from ...representation import FData, FDataGrid @@ -82,25 +82,11 @@ def fit( # noqa: D102 def transform(self, X: FDataGrid) -> NDArrayFloat: # noqa: D102 - pointwise_depth = self.multivariate_depth_.transform(X.data_matrix) - - interval_len = ( - self._domain_range[0][1] - - self._domain_range[0][0] + pointwise_depth = X.copy( + data_matrix=self.multivariate_depth_.transform(X.data_matrix), ) - integrand = pointwise_depth - - for d, s in zip(X.domain_range, X.grid_points): - integrand = scipy.integrate.simps( - integrand, - x=s, - axis=1, - ) - interval_len = d[1] - d[0] - integrand /= interval_len - - return integrand + return average_function_value(pointwise_depth).ravel() @property def max(self) -> float: diff --git a/skfda/exploratory/outliers/_directional_outlyingness.py b/skfda/exploratory/outliers/_directional_outlyingness.py index 30e8338d3..91ac8ee67 100644 --- a/skfda/exploratory/outliers/_directional_outlyingness.py +++ b/skfda/exploratory/outliers/_directional_outlyingness.py @@ -98,7 +98,7 @@ def directional_outlyingness_stats( # noqa: WPS218 Method used to order the data. Defaults to :func:`projection depth `. pointwise_weights (array_like, optional): an array containing the - weights of each point of discretisation where values have been + weights of each point of discretization where values have been recorded. Defaults to the same weight for each of the points: 1/len(interval). @@ -183,58 +183,67 @@ def directional_outlyingness_stats( # noqa: WPS218 fdatagrid.domain_range[0][1] - fdatagrid.domain_range[0][0] ) + if not isinstance(pointwise_weights, FDataGrid): + pointwise_weights = fdatagrid.copy( + data_matrix=pointwise_weights, + sample_names=(None,), + ) + depth_pointwise = multivariate_depth(fdatagrid.data_matrix) assert depth_pointwise.shape == fdatagrid.data_matrix.shape[:-1] + depth_pointwise_fd = fdatagrid.copy( + data_matrix=depth_pointwise, + ) + # Obtaining the pointwise median sample Z, to calculate # v(t) = {X(t) − Z(t)}/|| X(t) − Z(t) || median_index = np.argmax(depth_pointwise, axis=0) - pointwise_median = fdatagrid.data_matrix[ - median_index, - range(fdatagrid.data_matrix.shape[1]), - ] - assert pointwise_median.shape == fdatagrid.data_matrix.shape[1:] - v = fdatagrid.data_matrix - pointwise_median - assert v.shape == fdatagrid.data_matrix.shape - v_norm = la.norm(v, axis=-1, keepdims=True) + pointwise_median = fdatagrid.copy( + data_matrix=fdatagrid.data_matrix[ + median_index, + range(fdatagrid.data_matrix.shape[1]), + ][np.newaxis], + sample_names=(None,), + ) + + v = fdatagrid - pointwise_median + v_norm = la.norm(v.data_matrix, axis=-1, keepdims=True) # To avoid ZeroDivisionError, the zeros are substituted by ones (the # reference implementation also does this). v_norm[np.where(v_norm == 0)] = 1 - v_unitary = v / v_norm + v.data_matrix /= v_norm - # Calculation directinal outlyingness - dir_outlyingness = (1 / depth_pointwise[..., np.newaxis] - 1) * v_unitary + # Calculation directional outlyingness + dir_outlyingness = ( + 1 / depth_pointwise_fd - 1 + ) * v # Calculation mean directional outlyingness weighted_dir_outlyingness = ( - dir_outlyingness * pointwise_weights[:, np.newaxis] + dir_outlyingness * pointwise_weights ) - assert weighted_dir_outlyingness.shape == dir_outlyingness.shape - mean_dir_outlyingness = scipy.integrate.simps( - weighted_dir_outlyingness, - fdatagrid.grid_points[0], - axis=1, - ) + mean_dir_outlyingness = weighted_dir_outlyingness.integrate() assert mean_dir_outlyingness.shape == ( fdatagrid.n_samples, fdatagrid.dim_codomain, ) # Calculation variation directional outlyingness - norm = np.square(la.norm( - dir_outlyingness - - mean_dir_outlyingness[:, np.newaxis, :], - axis=-1, - )) - weighted_norm = norm * pointwise_weights - variation_dir_outlyingness = scipy.integrate.simps( - weighted_norm, - fdatagrid.grid_points[0], - axis=1, + norm = dir_outlyingness.copy( + data_matrix=np.square( + la.norm( + dir_outlyingness.data_matrix + - mean_dir_outlyingness[:, np.newaxis, :], + axis=-1, + ) + ) ) + weighted_norm = norm * pointwise_weights + variation_dir_outlyingness = weighted_norm.integrate().ravel() assert variation_dir_outlyingness.shape == (fdatagrid.n_samples,) functional_dir_outlyingness = ( @@ -244,7 +253,7 @@ def directional_outlyingness_stats( # noqa: WPS218 assert functional_dir_outlyingness.shape == (fdatagrid.n_samples,) return DirectionalOutlyingnessStats( - directional_outlyingness=dir_outlyingness, + directional_outlyingness=dir_outlyingness.data_matrix, functional_directional_outlyingness=functional_dir_outlyingness, mean_directional_outlyingness=mean_dir_outlyingness, variation_directional_outlyingness=variation_dir_outlyingness, diff --git a/skfda/exploratory/stats/_fisher_rao.py b/skfda/exploratory/stats/_fisher_rao.py index c897b13fc..5a3ffe6c9 100644 --- a/skfda/exploratory/stats/_fisher_rao.py +++ b/skfda/exploratory/stats/_fisher_rao.py @@ -133,7 +133,7 @@ def _fisher_rao_warping_mean( # Compute shooting vectors for psi_i in psi_data: - inner = scipy.integrate.simps(mu * psi_i, x=eval_points) + inner = scipy.integrate.simpson(mu * psi_i, x=eval_points) inner = max(min(inner, 1), -1) theta = np.arccos(inner) @@ -143,7 +143,7 @@ def _fisher_rao_warping_mean( # Mean of shooting vectors vmean /= warping.n_samples - v_norm = np.sqrt(scipy.integrate.simps(np.square(vmean))) + v_norm = np.sqrt(scipy.integrate.simpson(np.square(vmean))) # Convergence criterion if v_norm < tol: @@ -266,7 +266,7 @@ def fisher_rao_karcher_mean( # Initialize with function closest to the L2 mean with the L2 distance centered = (srsf.T - srsf.mean(axis=0, keepdims=True).T).T - distances = scipy.integrate.simps( + distances = scipy.integrate.simpson( np.square(centered, out=centered), eval_points_normalized, axis=1, @@ -304,14 +304,14 @@ def fisher_rao_karcher_mean( # Convergence criterion mu_norm = np.sqrt( - scipy.integrate.simps( + scipy.integrate.simpson( np.square(mu, out=mu_aux), eval_points_normalized, ), ) mu_diff = np.sqrt( - scipy.integrate.simps( + scipy.integrate.simpson( np.square(mu - mu_1, out=mu_aux), eval_points_normalized, ), diff --git a/skfda/exploratory/stats/_stats.py b/skfda/exploratory/stats/_stats.py index 903261590..009261497 100644 --- a/skfda/exploratory/stats/_stats.py +++ b/skfda/exploratory/stats/_stats.py @@ -9,6 +9,8 @@ from scipy import integrate from scipy.stats import rankdata +from skfda._utils.ndfunction import average_function_value + from ...misc.metrics._lp_distances import l2_distance from ...representation import FData, FDataBasis, FDataGrid from ...typing._metric import Metric @@ -42,22 +44,22 @@ def mean( return (X * weight).sum() -def var(X: FData, ddof: int = 1) -> FDataGrid: +def var(X: FData, correction: int = 0) -> FDataGrid: """ Compute the variance of a set of samples in a FData object. Args: X: Object containing all the set of samples whose variance is desired. - ddof: "Delta Degrees of Freedom": the divisor used in the calculation - is `N - ddof`, where `N` represents the number of elements. By - default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `0`. Returns: Variance of all the samples in the original object, as a :term:`functional data object` with just one sample. """ - return X.var(ddof=ddof) # type: ignore[no-any-return] + return X.var(correction=correction) # type: ignore[no-any-return] def gmean(X: FDataGrid) -> FDataGrid: @@ -77,7 +79,7 @@ def gmean(X: FDataGrid) -> FDataGrid: def cov( X: FData, - ddof: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: """ Compute the covariance. @@ -87,9 +89,9 @@ def cov( Args: X: Object containing different samples of a functional variable. - ddof: "Delta Degrees of Freedom": the divisor used in the calculation - is `N - ddof`, where `N` represents the number of elements. By - default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the number of + elements. Default: `0`. Returns: @@ -97,7 +99,7 @@ def cov( callable. """ - return X.cov(ddof=ddof) + return X.cov(correction=correction) @functools.singledispatch @@ -165,33 +167,20 @@ def modified_epigraph_index(X: FDataGrid) -> NDArrayFloat: with all the other curves of our dataset. """ - interval_len = ( - X.domain_range[0][1] - - X.domain_range[0][0] - ) - - # Array containing at each point the number of curves + # Functions containing at each point the number of curves # are above it. - num_functions_above: NDArrayFloat = rankdata( - -X.data_matrix, - method='max', - axis=0, - ) - 1 - - integrand = num_functions_above - - for d, s in zip(X.domain_range, X.grid_points): - integrand = integrate.simps( - integrand, - x=s, - axis=1, - ) - interval_len = d[1] - d[0] - integrand /= interval_len - - integrand /= X.n_samples + num_functions_above = X.copy( + data_matrix=rankdata( + -X.data_matrix, + method='max', + axis=0, + ) - 1, + ) - return integrand.flatten() + return ( + average_function_value(num_functions_above) + / num_functions_above.n_samples + ).ravel() def depth_based_median( diff --git a/skfda/inference/anova/_anova_oneway.py b/skfda/inference/anova/_anova_oneway.py index feeb18a17..9b4752c20 100644 --- a/skfda/inference/anova/_anova_oneway.py +++ b/skfda/inference/anova/_anova_oneway.py @@ -220,6 +220,7 @@ def _anova_bootstrap( cov_est = concatenate(fd_grouped).cov( grid_points, grid_points, + correction=1, ) k_est = [cov_est] * len(fd_grouped) else: @@ -228,6 +229,7 @@ def _anova_bootstrap( fdg.cov( grid_points, grid_points, + correction=1, ) for fdg in fd_grouped ] diff --git a/skfda/inference/hotelling/_hotelling.py b/skfda/inference/hotelling/_hotelling.py index 9f07e4944..f0aba205e 100644 --- a/skfda/inference/hotelling/_hotelling.py +++ b/skfda/inference/hotelling/_hotelling.py @@ -103,10 +103,12 @@ def hotelling_t2( k1 = fd1.cov( fd1.grid_points[0], fd1.grid_points[0], + correction=1, ) k2 = fd2.cov( fd2.grid_points[0], fd2.grid_points[0], + correction=1, ) m = m.reshape((-1, 1)) # Reshaping the mean for a proper matrix product diff --git a/skfda/misc/_math.py b/skfda/misc/_math.py index 4e02de2f8..94b440504 100644 --- a/skfda/misc/_math.py +++ b/skfda/misc/_math.py @@ -363,7 +363,7 @@ def _inner_product_fdatagrid( # Perform quadrature inside the einsum for i, s in enumerate(arg1.grid_points[::-1]): identity = np.eye(len(s)) - weights = scipy.integrate.simps(identity, x=s) + weights = scipy.integrate.simpson(identity, x=s) index = (slice(None),) + (np.newaxis,) * (i + 1) d1 *= weights[index] diff --git a/skfda/misc/covariances.py b/skfda/misc/covariances.py index e74dab770..43635eb02 100644 --- a/skfda/misc/covariances.py +++ b/skfda/misc/covariances.py @@ -768,27 +768,27 @@ class Empirical(Covariance): The sample covariance function is defined as . math:: - K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N\left(x_n(t) - + K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N\left(x_n(t) - \bar{x}(t)\right) \left(x_n(s) - \bar{x}(s)\right) where :math:`x_n(t)` is the n-th sample and :math:`\bar{x}(t)` is the mean of the samples. :math:`N` is the number of samples, - :math:`\text{ddof}` means "Delta Degrees of Freedom" and is such that - :math:`N-\text{ddof}` is the divisor used in the calculation of the - covariance function. + :math:`\text{correction}` is the degrees of freedom adjustment and is such + that :math:`N-\text{correction}` is the divisor used in the calculation of + the covariance function. """ _latex_formula = ( - r"K(t, s) = \frac{1}{N-\text{ddof}}\sum_{n=1}^N(x_n(t) - \bar{x}(t))" - r"(x_n(s) - \bar{x}(s))" + r"K(t, s) = \frac{1}{N-\text{correction}}\sum_{n=1}^N" + r"(x_n(t) - \bar{x}(t))(x_n(s) - \bar{x}(s))" ) _parameters_str = [ ("data", "data"), ] cov_fdata: FData - ddof: int + correction: int @abc.abstractmethod def __init__(self, data: FData) -> None: @@ -815,17 +815,17 @@ class EmpiricalGrid(Empirical): """Sample covariance function for FDataGrid.""" cov_fdata: FDataGrid - ddof: int + correction: int - def __init__(self, data: FDataGrid, ddof: int = 1) -> None: + def __init__(self, data: FDataGrid, correction: int = 0) -> None: super().__init__(data=data) - self.ddof = ddof + self.correction = correction self.cov_fdata = data.copy( data_matrix=np.cov( data.data_matrix[..., 0], rowvar=False, - ddof=ddof, + ddof=correction, )[np.newaxis, ...], grid_points=[ data.grid_points[0], @@ -851,16 +851,16 @@ class EmpiricalBasis(Empirical): cov_fdata: FDataBasis coeff_matrix: NDArrayFloat - ddof: int + correction: int - def __init__(self, data: FDataBasis, ddof: int = 1) -> None: + def __init__(self, data: FDataBasis, correction: int = 0) -> None: super().__init__(data=data) - self.ddof = ddof + self.correction = correction self.coeff_matrix = np.cov( data.coefficients, rowvar=False, - ddof=ddof, + ddof=correction, ) self.cov_fdata = FDataBasis( basis=TensorBasis([data.basis, data.basis]), diff --git a/skfda/misc/hat_matrix.py b/skfda/misc/hat_matrix.py index fc94adc6b..81694bd47 100644 --- a/skfda/misc/hat_matrix.py +++ b/skfda/misc/hat_matrix.py @@ -11,20 +11,21 @@ import abc import math -from typing import Callable, TypeVar, Union, overload +from typing import Callable, Final, TypeVar, Union, overload import numpy as np from .._utils._sklearn_adapter import BaseEstimator from ..representation._functional_data import FData from ..representation.basis import FDataBasis -from ..typing._base import GridPointsLike from ..typing._numpy import NDArrayFloat from . import kernels -Input = TypeVar("Input", bound=Union[FData, GridPointsLike]) +Input = TypeVar("Input", bound=Union[FData, NDArrayFloat]) Prediction = TypeVar("Prediction", bound=Union[NDArrayFloat, FData]) +DEFAULT_BANDWIDTH_PERCENTILE: Final = 15 + class HatMatrix( BaseEstimator, @@ -52,8 +53,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -65,8 +66,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: Prediction | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -77,8 +78,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: Input | None = None, - X: Input | None = None, + X_train: Input, + X: Input, y_train: NDArrayFloat | FData | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -186,7 +187,7 @@ def _hat_matrix_function_not_normalized( ) -> NDArrayFloat: bandwidth = ( - np.percentile(np.abs(delta_x), 15) + float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE)) if self.bandwidth is None else self.bandwidth ) @@ -278,8 +279,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: FData | GridPointsLike | None = None, - X: FData | GridPointsLike | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -291,8 +292,8 @@ def __call__( self, *, delta_x: NDArrayFloat, - X_train: FData | GridPointsLike | None = None, - X: FData | GridPointsLike | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: Prediction | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, @@ -303,19 +304,37 @@ def __call__( # noqa: D102 self, *, delta_x: NDArrayFloat, - X_train: FData | GridPointsLike | None = None, - X: FData | GridPointsLike | None = None, + X_train: FData | NDArrayFloat, + X: FData | NDArrayFloat, y_train: NDArrayFloat | FData | None = None, weights: NDArrayFloat | None = None, _cv: bool = False, ) -> NDArrayFloat | FData: bandwidth = ( - np.percentile(np.abs(delta_x), 15) + float(np.percentile(delta_x, DEFAULT_BANDWIDTH_PERCENTILE)) if self.bandwidth is None else self.bandwidth ) + # Smoothing for functions of one variable + if not isinstance(X_train, FDataBasis) and X_train[0].shape[0] == 1: + delta_x = np.subtract.outer( + X.flatten(), + X_train.flatten(), + ) + + assert y_train is None + + return super().__call__( # noqa: WPS503 + delta_x=delta_x, + X_train=X_train, + X=X, + y_train=y_train, + weights=weights, + _cv=_cv, + ) + # Regression if isinstance(X_train, FData): assert isinstance(X, FData) @@ -326,44 +345,41 @@ def __call__( # noqa: D102 ): raise ValueError("Only FDataBasis is supported for now.") + m1 = X_train.coefficients + m2 = X.coefficients + if y_train is None: y_train = np.identity(X_train.n_samples) - m1 = X_train.coefficients - m2 = X.coefficients + else: + m1 = X_train + m2 = X + + if y_train is None: + y_train = np.identity(X_train.shape[0]) - # Subtract previous matrices obtaining a 3D matrix - # The i-th element contains the matrix X_train - X[i] - C = m1 - m2[:, np.newaxis] + # Subtract previous matrices obtaining a 3D matrix + # The i-th element contains the matrix X_train - X[i] + C = m1 - m2[:, np.newaxis] + # Inner product matrix only is applicable in regression + if isinstance(X_train, FDataBasis): inner_product_matrix = X_train.basis.inner_product_matrix() # Calculate new coefficients taking into account cross-products # if the basis is orthonormal, C would not change C = C @ inner_product_matrix # noqa: WPS350 - # Adding a column of ones in the first position of all matrices - dims = (C.shape[0], C.shape[1], 1) - C = np.concatenate((np.ones(dims), C), axis=-1) - - return self._solve_least_squares( - delta_x=delta_x, - coefs=C, - y_train=y_train, - bandwidth=bandwidth, - ) - - # Smoothing - else: + # Adding a column of ones in the first position of all matrices + dims = (C.shape[0], C.shape[1], 1) + C = np.concatenate((np.ones(dims), C), axis=-1) - return super().__call__( # type: ignore[misc, type-var] # noqa: WPS503 - delta_x=delta_x, - X_train=X_train, - X=X, - y_train=y_train, # type: ignore[arg-type] - weights=weights, - _cv=_cv, - ) + return self._solve_least_squares( + delta_x=delta_x, + coefs=C, + y_train=y_train, + bandwidth=bandwidth, + ) def _solve_least_squares( self, @@ -485,7 +501,7 @@ def _hat_matrix_function_not_normalized( # For each row in the distances matrix, it calculates the furthest # point within the k nearest neighbours vec = np.sort( - np.abs(delta_x), + delta_x, axis=1, )[:, n_neighbors - 1] + tol diff --git a/skfda/misc/metrics/_fisher_rao.py b/skfda/misc/metrics/_fisher_rao.py index 7fd18e4da..bfc4e2f89 100644 --- a/skfda/misc/metrics/_fisher_rao.py +++ b/skfda/misc/metrics/_fisher_rao.py @@ -234,7 +234,7 @@ def fisher_rao_amplitude_distance( penalty = np.sqrt(penalty, out=penalty) penalty -= 1 penalty = np.square(penalty, out=penalty) - penalty = scipy.integrate.simps(penalty, x=eval_points_normalized) + penalty = scipy.integrate.simpson(penalty, x=eval_points_normalized) distance = np.sqrt(distance**2 + lam * penalty) @@ -322,7 +322,7 @@ def fisher_rao_phase_distance( derivative_warping = np.sqrt(derivative_warping, out=derivative_warping) - d = scipy.integrate.simps(derivative_warping, x=eval_points_normalized) + d = scipy.integrate.simpson(derivative_warping, x=eval_points_normalized) d = np.clip(d, -1, 1) return np.arccos(d) # type: ignore[no-any-return] @@ -394,7 +394,7 @@ def _fisher_rao_warping_distance( product = np.multiply(srsf_warping1, srsf_warping2, out=srsf_warping1) - d = scipy.integrate.simps(product, x=warping1.grid_points[0]) + d = scipy.integrate.simpson(product, x=warping1.grid_points[0]) d = np.clip(d, -1, 1) return np.arccos(d) # type: ignore[no-any-return] diff --git a/skfda/misc/metrics/_lp_norms.py b/skfda/misc/metrics/_lp_norms.py index 2a6c2f9a6..0465dc005 100644 --- a/skfda/misc/metrics/_lp_norms.py +++ b/skfda/misc/metrics/_lp_norms.py @@ -7,7 +7,7 @@ import scipy.integrate from typing_extensions import Final -from ...representation import FData, FDataBasis +from ...representation import FData, FDataBasis, FDataGrid from ...typing._metric import Norm from ...typing._numpy import NDArrayFloat @@ -135,20 +135,21 @@ def __call__(self, vector: Union[NDArrayFloat, FData]) -> NDArrayFloat: ) res = np.sqrt(integral[0]).flatten() - else: + elif isinstance(vector, FDataGrid): data_matrix = vector.data_matrix - original_shape = data_matrix.shape - data_matrix = data_matrix.reshape(-1, original_shape[-1]) - data_matrix = (np.linalg.norm( - vector.data_matrix, - ord=vector_norm, - axis=-1, - keepdims=True, - ) if isinstance(vector_norm, (float, int)) - else vector_norm(data_matrix) - ) - data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) + if isinstance(vector_norm, (float, int)): + data_matrix = np.linalg.norm( + vector.data_matrix, + ord=vector_norm, + axis=-1, + keepdims=True, + ) + else: + original_shape = data_matrix.shape + data_matrix = data_matrix.reshape(-1, original_shape[-1]) + data_matrix = vector_norm(data_matrix) + data_matrix = data_matrix.reshape(original_shape[:-1] + (1,)) if np.isinf(self.p): @@ -157,18 +158,19 @@ def __call__(self, vector: Union[NDArrayFloat, FData]) -> NDArrayFloat: axis=tuple(range(1, data_matrix.ndim)), ) - elif vector.dim_domain == 1: + else: + integrand = vector.copy( + data_matrix=data_matrix ** self.p, + coordinate_names=(None,), + ) # Computes the norm, approximating the integral with Simpson's # rule. - res = scipy.integrate.simps( - data_matrix[..., 0] ** self.p, - x=vector.grid_points, - ) ** (1 / self.p) - - else: - # Needed to perform surface integration - return NotImplemented + res = integrand.integrate().ravel() ** (1 / self.p) + else: + raise NotImplementedError( + f"LpNorm not implemented for type {type(vector)}", + ) if len(res) == 1: return res[0] # type: ignore[no-any-return] diff --git a/skfda/misc/scoring.py b/skfda/misc/scoring.py index 419d25c0f..7491b92ff 100644 --- a/skfda/misc/scoring.py +++ b/skfda/misc/scoring.py @@ -74,7 +74,7 @@ def _var( from ..exploratory.stats import mean, var if weights is None: - return var(x, ddof=0) + return var(x, correction=0) return mean( # type: ignore[no-any-return] np.power(x - mean(x, weights=weights), 2), diff --git a/skfda/ml/classification/_centroid_classifiers.py b/skfda/ml/classification/_centroid_classifiers.py index dd19c2f1a..4bac8ad9e 100644 --- a/skfda/ml/classification/_centroid_classifiers.py +++ b/skfda/ml/classification/_centroid_classifiers.py @@ -127,7 +127,7 @@ class DTMClassifier(NearestCentroid[Input, Target]): Test samples are classified to the class that minimizes the distance of the observation to the trimmed mean of the group - :footcite:`fraiman+muniz_2001_trimmed`. + :footcite:`pintado+romo_2005_depthbased`. Parameters: proportiontocut: @@ -174,6 +174,7 @@ class DTMClassifier(NearestCentroid[Input, Target]): See also: :class:`~skfda.ml.classification.NearestCentroid` + :class:`~skfda.exploratory.stats.trim_mean` References: .. footbibliography:: diff --git a/skfda/ml/clustering/_kmeans.py b/skfda/ml/clustering/_kmeans.py index 9d58e068a..c433eb186 100644 --- a/skfda/ml/clustering/_kmeans.py +++ b/skfda/ml/clustering/_kmeans.py @@ -147,7 +147,7 @@ def _check_clustering(self, fdata: Input) -> Input: return fdata def _tolerance(self, fdata: Input) -> float: - variance = fdata.var(ddof=0) + variance = fdata.var(correction=0) mean_variance = np.mean(variance[0].data_matrix) return float(mean_variance * self.tol) diff --git a/skfda/ml/regression/_historical_linear_model.py b/skfda/ml/regression/_historical_linear_model.py index de5dc38bf..21377c77c 100644 --- a/skfda/ml/regression/_historical_linear_model.py +++ b/skfda/ml/regression/_historical_linear_model.py @@ -42,7 +42,7 @@ def _pairwise_fem_inner_product( eval_fd = fd(grid) prod = eval_fem * eval_fd - integral = scipy.integrate.simps(prod, grid, axis=1) + integral = scipy.integrate.simpson(prod, grid, axis=1) return np.sum(integral, axis=-1) # type: ignore[no-any-return] diff --git a/skfda/preprocessing/dim_reduction/_fpca.py b/skfda/preprocessing/dim_reduction/_fpca.py index f5c88a54a..9110f31c0 100644 --- a/skfda/preprocessing/dim_reduction/_fpca.py +++ b/skfda/preprocessing/dim_reduction/_fpca.py @@ -348,7 +348,7 @@ def _fit_grid( if self._weights is None: # grid_points is a list with one array in the 1D case identity = np.eye(len(X.grid_points[0])) - self._weights = scipy.integrate.simps(identity, X.grid_points[0]) + self._weights = scipy.integrate.simpson(identity, X.grid_points[0]) elif callable(self._weights): self._weights = self._weights(X.grid_points[0]) # if its a FDataGrid then we need to reduce the dimension to 1-D diff --git a/skfda/preprocessing/feature_construction/_functions.py b/skfda/preprocessing/feature_construction/_functions.py index 3359a77af..e426afaa4 100644 --- a/skfda/preprocessing/feature_construction/_functions.py +++ b/skfda/preprocessing/feature_construction/_functions.py @@ -3,13 +3,16 @@ from __future__ import annotations import itertools -from typing import Optional, Sequence, Tuple, TypeVar, Union, cast, overload +from typing import Optional, Sequence, Tuple, TypeVar, Union, cast import numpy as np -from typing_extensions import Literal, Protocol, TypeGuard +from typing_extensions import Literal, TypeGuard -from ..._utils import nquad_vec -from ...misc.validation import check_fdata_dimensions, validate_domain_range +from ..._utils.ndfunction._functions import ( + _average_function_ufunc, + _UnaryUfunc, +) +from ...misc.validation import check_fdata_dimensions from ...representation import FData, FDataBasis, FDataGrid from ...typing._base import DomainRangeLike from ...typing._numpy import ArrayLike, NDArrayBool, NDArrayFloat, NDArrayInt @@ -17,20 +20,6 @@ T = TypeVar("T", bound=Union[NDArrayFloat, FDataGrid]) -class _BasicUfuncProtocol(Protocol): - - @overload - def __call__(self, __arg: FDataGrid) -> FDataGrid: # noqa: WPS112 - pass - - @overload - def __call__(self, __arg: NDArrayFloat) -> NDArrayFloat: # noqa: WPS112 - pass - - def __call__(self, __arg: T) -> T: # noqa: WPS112 - pass - - def _sequence_of_ints(data: Sequence[object]) -> TypeGuard[Sequence[int]]: """Check that every element is an integer.""" return all(isinstance(d, int) for d in data) @@ -332,7 +321,7 @@ def number_crossings( >>> from skfda.preprocessing.feature_construction import ( ... number_crossings, - ... ) + ... ) >>> from scipy.special import jv >>> import numpy as np >>> x_grid = np.linspace(0, 14, 14) @@ -521,7 +510,7 @@ def unconditional_moment( def unconditional_expected_value( data: FData, - function: _BasicUfuncProtocol, + function: _UnaryUfunc, *, domain: DomainRangeLike | None = None, ) -> NDArrayFloat: @@ -537,19 +526,19 @@ def unconditional_expected_value( f_p(x(t))=\frac{1}{\left( b-a\right)}\int_a^b g \left(x_p(t)\right) dt - Args: - data: FDataGrid or FDataBasis where we want to calculate - the expected value. - function: function that specifies how the expected value to is - calculated. It has to be a function of X(t). - domain: Integration domain. By default, the whole domain is used. + Args: + data: FDataGrid or FDataBasis where we want to calculate + the expected value. + function: function that specifies how the expected value to is + calculated. It has to be a function of X(t). + domain: Integration domain. By default, the whole domain is used. - Returns: - ndarray of shape (n_dimensions, n_samples) with the values of the - expectations. + Returns: + ndarray of shape (n_dimensions, n_samples) with the values of the + expectations. Examples: - We will use this funtion to calculate the logarithmic first moment + We will use this function to calculate the logarithmic first moment of the first 5 samples of the Berkeley Growth dataset. We will start by importing it. @@ -574,26 +563,4 @@ def unconditional_expected_value( [ 4.84]]) """ - if domain is None: - domain = data.domain_range - else: - domain = validate_domain_range(domain) - - lebesgue_measure = np.prod( - [ - (iterval[1] - iterval[0]) - for iterval in domain - ], - ) - - if isinstance(data, FDataGrid): - return function(data).integrate(domain=domain) / lebesgue_measure - - def integrand(*args: NDArrayFloat) -> NDArrayFloat: # noqa: WPS430 - f1 = data(args)[:, 0, :] - return function(f1) - - return nquad_vec( - integrand, - domain, - ) / lebesgue_measure + return _average_function_ufunc(data, ufunc=function, domain=domain) diff --git a/skfda/preprocessing/smoothing/_kernel_smoothers.py b/skfda/preprocessing/smoothing/_kernel_smoothers.py index c07489235..e9b56f44d 100644 --- a/skfda/preprocessing/smoothing/_kernel_smoothers.py +++ b/skfda/preprocessing/smoothing/_kernel_smoothers.py @@ -9,9 +9,11 @@ import numpy as np -from ..._utils._utils import _to_grid_points +from ..._utils._utils import _cartesian_product, _to_grid_points from ...misc.hat_matrix import HatMatrix, NadarayaWatsonHatMatrix +from ...misc.metrics import PairwiseMetric, l2_distance from ...typing._base import GridPointsLike +from ...typing._metric import Metric from ...typing._numpy import NDArrayFloat from ._linear import _LinearSmoother @@ -114,10 +116,12 @@ def __init__( *, weights: Optional[NDArrayFloat] = None, output_points: Optional[GridPointsLike] = None, + metric: Metric[NDArrayFloat] = l2_distance, ): self.kernel_estimator = kernel_estimator self.weights = weights self.output_points = output_points + self.metric = metric self._cv = False # For testing purposes only def _hat_matrix( @@ -126,18 +130,18 @@ def _hat_matrix( output_points: GridPointsLike, ) -> NDArrayFloat: - input_points = _to_grid_points(input_points) - output_points = _to_grid_points(output_points) + input_points = _cartesian_product(_to_grid_points(input_points)) + output_points = _cartesian_product(_to_grid_points(output_points)) if self.kernel_estimator is None: self.kernel_estimator = NadarayaWatsonHatMatrix() - delta_x = np.subtract.outer(output_points[0], input_points[0]) + delta_x = PairwiseMetric(self.metric)(output_points, input_points) return self.kernel_estimator( delta_x=delta_x, weights=self.weights, - X_train=input_points, - X=output_points, + X_train=np.array(input_points), + X=np.array(output_points), _cv=self._cv, ) diff --git a/skfda/preprocessing/smoothing/_linear.py b/skfda/preprocessing/smoothing/_linear.py index 8759704c9..09c6fadcc 100644 --- a/skfda/preprocessing/smoothing/_linear.py +++ b/skfda/preprocessing/smoothing/_linear.py @@ -28,6 +28,7 @@ class _LinearSmoother( ``hat_matrix`` to define the smoothing or 'hat' matrix. """ + input_points_: GridPoints output_points_: GridPoints @@ -116,9 +117,25 @@ def transform( ) ) + dims = [len(e) for e in self.output_points_] + dims += [len(e) for e in self.input_points_] + + hat_matrix = np.reshape( + self.hat_matrix_, + dims, + ) + + data_matrix = np.einsum( + hat_matrix, + [Ellipsis, *range(1, len(self.output_points_) + 1)], + X.data_matrix, + [0, *range(1, len(self.output_points_) + 2)], + [0, Ellipsis, len(self.output_points_) + 1], + ) + # The matrix is cached return X.copy( - data_matrix=self.hat_matrix_ @ X.data_matrix, + data_matrix=data_matrix, grid_points=self.output_points_, ) diff --git a/skfda/representation/_functional_data.py b/skfda/representation/_functional_data.py index 2696e7b25..ee8813bbc 100644 --- a/skfda/representation/_functional_data.py +++ b/skfda/representation/_functional_data.py @@ -826,7 +826,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -834,7 +834,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -844,7 +844,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -864,9 +864,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Points where the covariance function is evaluated. t_points: Points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: Covariance function. diff --git a/skfda/representation/basis/_bspline_basis.py b/skfda/representation/basis/_bspline_basis.py index 7b4898f96..15059e49c 100644 --- a/skfda/representation/basis/_bspline_basis.py +++ b/skfda/representation/basis/_bspline_basis.py @@ -129,11 +129,10 @@ def __init__( # noqa: WPS238 ) n_basis = len(knots) + order - 2 - if (n_basis - order + 2) < 2: + if n_basis < order: raise ValueError( - f"The number of basis ({n_basis}) minus the " - f"order of the bspline ({order}) should be " - f"greater than 3.", + f"The number of basis ({n_basis}) should not be smaller " + f"than the order of the bspline ({order}).", ) self._order = order diff --git a/skfda/representation/basis/_fdatabasis.py b/skfda/representation/basis/_fdatabasis.py index 8289471d4..945dda05d 100644 --- a/skfda/representation/basis/_fdatabasis.py +++ b/skfda/representation/basis/_fdatabasis.py @@ -445,7 +445,7 @@ def sum( # noqa: WPS125 def var( self: T, eval_points: Optional[NDArrayFloat] = None, - ddof: int = 1, + correction: int = 0, ) -> T: """Compute the variance of the functional data object. @@ -460,15 +460,17 @@ def var( numpy.linspace with bounds equal to the ones defined in self.domain_range and the number of points the maximum between 501 and 10 times the number of basis. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number of - elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: Variance of the original object. """ - return self.to_grid(eval_points).var(ddof=ddof).to_basis(self.basis) + return self.to_grid( + eval_points, + ).var(correction=correction).to_basis(self.basis) @overload def cov( # noqa: WPS451 @@ -476,7 +478,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -484,7 +486,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -493,7 +495,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -515,9 +517,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Points where the covariance function is evaluated. t_points: Points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: Covariance function. @@ -525,7 +527,7 @@ def cov( # noqa: WPS320, WPS451 """ # To avoid circular imports from ...misc.covariances import EmpiricalBasis - cov_function = EmpiricalBasis(self, ddof=ddof) + cov_function = EmpiricalBasis(self, correction=correction) if s_points is None or t_points is None: return cov_function return cov_function(s_points, t_points) diff --git a/skfda/representation/grid.py b/skfda/representation/grid.py index 5801d3a01..bdd01c3e4 100644 --- a/skfda/representation/grid.py +++ b/skfda/representation/grid.py @@ -515,7 +515,7 @@ def integrate( integrand = data.data_matrix for g in data.grid_points[::-1]: - integrand = scipy.integrate.simps( + integrand = scipy.integrate.simpson( integrand, x=g, axis=-2, @@ -582,13 +582,13 @@ def sum( # noqa: WPS125 sample_names=(None,), ) - def var(self: T, ddof: int = 1) -> T: + def var(self: T, correction: int = 0) -> T: """Compute the variance of a set of samples in a FDataGrid object. Args: - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number of - elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: A FDataGrid object with just one sample representing the @@ -599,7 +599,7 @@ def var(self: T, ddof: int = 1) -> T: data_matrix=np.array([np.var( self.data_matrix, axis=0, - ddof=ddof, + ddof=correction, )]), sample_names=("variance",), ) @@ -610,7 +610,7 @@ def cov( # noqa: WPS451 s_points: NDArrayFloat, t_points: NDArrayFloat, /, - ddof: int = 1, + correction: int = 0, ) -> NDArrayFloat: pass @@ -618,7 +618,7 @@ def cov( # noqa: WPS451 def cov( # noqa: WPS451 self: T, /, - ddof: int = 1, + correction: int = 0, ) -> Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat]: pass @@ -627,7 +627,7 @@ def cov( # noqa: WPS320, WPS451 s_points: Optional[NDArrayFloat] = None, t_points: Optional[NDArrayFloat] = None, /, - ddof: int = 1, + correction: int = 0, ) -> Union[ Callable[[NDArrayFloat, NDArrayFloat], NDArrayFloat], NDArrayFloat, @@ -643,9 +643,9 @@ def cov( # noqa: WPS320, WPS451 Args: s_points: Grid points where the covariance function is evaluated. t_points: Grid points where the covariance function is evaluated. - ddof: "Delta Degrees of Freedom": the divisor used in the - calculation is `N - ddof`, where `N` represents the number - of elements. By default `ddof` is 1. + correction: degrees of freedom adjustment. The divisor used in the + calculation is `N - correction`, where `N` represents the + number of elements. Default: `0`. Returns: Covariance function. @@ -653,7 +653,7 @@ def cov( # noqa: WPS320, WPS451 """ # To avoid circular imports from ..misc.covariances import EmpiricalGrid - cov_function = EmpiricalGrid(self, ddof=ddof) + cov_function = EmpiricalGrid(self, correction=correction) if s_points is None or t_points is None: return cov_function return cov_function(s_points, t_points) @@ -734,8 +734,9 @@ def _get_op_matrix( return other[other_index] raise ValueError( - f"Invalid dimensions in operator between FDataGrid and Numpy " - f"array: {other.shape}" + f"Invalid dimensions in operator between " + f"FDataGrid (data_matrix.shape={self.data_matrix.shape}) " + f"and Numpy array (shape={other.shape})", ) elif isinstance(other, FDataGrid): diff --git a/skfda/tests/test_metrics.py b/skfda/tests/test_metrics.py index 4e4746901..440806379 100644 --- a/skfda/tests/test_metrics.py +++ b/skfda/tests/test_metrics.py @@ -89,10 +89,11 @@ def test_lp_norm_surface_inf(self) -> None: ) def test_lp_norm_surface(self) -> None: - """Test that integration of surfaces has not been implemented.""" - # Integration of surfaces not implemented, add test case after - # implementation - self.assertEqual(lp_norm(self.fd_surface, p=1), NotImplemented) + """Test the integration of surfaces.""" + np.testing.assert_allclose( + lp_norm(self.fd_surface, p=1), + [0.12566256, 0.12563755, 0.12566133], + ) def test_lp_error_dimensions(self) -> None: """Test error on metric between different kind of objects.""" diff --git a/skfda/tests/test_recursive_maxima_hunting.py b/skfda/tests/test_recursive_maxima_hunting.py index 9a435b8f9..389e095ac 100644 --- a/skfda/tests/test_recursive_maxima_hunting.py +++ b/skfda/tests/test_recursive_maxima_hunting.py @@ -82,7 +82,7 @@ def test_fit_exponential(self) -> None: join. """ - n_samples = 10000 + n_samples = 1000 n_features = 100 def mean_1( # noqa: WPS430 diff --git a/skfda/tests/test_smoothing.py b/skfda/tests/test_smoothing.py index a76825640..315d2b3a5 100644 --- a/skfda/tests/test_smoothing.py +++ b/skfda/tests/test_smoothing.py @@ -4,6 +4,7 @@ import numpy as np import sklearn +from sklearn.datasets import load_digits from typing_extensions import Literal import skfda @@ -173,6 +174,39 @@ def test_knn(self) -> None: np.testing.assert_allclose(hat_matrix, hat_matrix_r) +class TestSurfaceSmoother(unittest.TestCase): + """Test that smoothing works on surfaces.""" + + def test_knn(self) -> None: + """Check 2D smoothing using digits dataset.""" + X, y = load_digits(return_X_y=True) + X = X.reshape(-1, 8, 8) + + fd = skfda.FDataGrid(X) + + ks = KernelSmoother( + kernel_estimator=KNeighborsHatMatrix(n_neighbors=1)) + fd_trans = ks.fit_transform(fd) + + np.testing.assert_allclose(fd_trans.data_matrix, fd.data_matrix) + + ks = KernelSmoother( + kernel_estimator=KNeighborsHatMatrix(n_neighbors=3)) + fd_trans = ks.fit_transform(fd) + + res = [ + [0, 1.25, 7.75, 10.5, 8.25, 6.25, 1.5, 0], + [0, 3.2, 9.6, 10.6, 9.8, 8.4, 5.6, 1.25], + [0.75, 4.4, 9, 6.4, 4.6, 8.4, 6.4, 2], + [1, 4.8, 7.8, 2.8, 1.6, 7.2, 6.4, 2], + [1.25, 4.2, 7.2, 1.6, 2, 7.4, 6.4, 2], + [1, 4.4, 7.4, 3.4, 4.6, 8.2, 5.4, 1.75], + [0.5, 4, 7.6, 8.4, 7.6, 6.8, 3.8, 0], + [0, 2, 8.25, 8.5, 8.25, 5.5, 0, 0], + ] + np.testing.assert_allclose(fd_trans.data_matrix[0, ..., 0], res) + + class TestBasisSmoother(unittest.TestCase): """Test Basis Smoother."""