diff --git a/treeple/ensemble/_honest_forest.py b/treeple/ensemble/_honest_forest.py index 0650d9a31..e9e2885bb 100644 --- a/treeple/ensemble/_honest_forest.py +++ b/treeple/ensemble/_honest_forest.py @@ -728,10 +728,16 @@ def oob_samples_(self): return oob_samples def __sklearn_tags__(self): - # XXX: nans should be supportable in HRF tags = super().__sklearn_tags__() - tags.classifier_tags.multi_output = False + # XXX: nans should be supportable in HRF tags.input_tags.allow_nan = False + + try: + # sklearn >= 1.6 tags were revamped + tags.target_tags.multi_output = False + except AttributeError: + tags.classifier_tags.multi_output = False + return tags def decision_path(self, X): diff --git a/treeple/experimental/tests/test_sdf.py b/treeple/experimental/tests/test_sdf.py index 64c7e45a8..55ca5485d 100644 --- a/treeple/experimental/tests/test_sdf.py +++ b/treeple/experimental/tests/test_sdf.py @@ -115,6 +115,7 @@ def test_sklearn_compatible_estimator(estimator, check): # XXX: can include this "generalization" in the future if it's useful if check.func.__name__ in [ "check_class_weight_classifiers", + "check_sample_weight_equivalence", ]: pytest.skip() check(estimator) diff --git a/treeple/neighbors.py b/treeple/neighbors.py index b16e732f9..c16aa3f9e 100644 --- a/treeple/neighbors.py +++ b/treeple/neighbors.py @@ -11,7 +11,7 @@ from treeple.tree._neighbors import _compute_distance_matrix, compute_forest_similarity_matrix -class NearestNeighborsMetaEstimator(BaseEstimator, MetaEstimatorMixin): +class NearestNeighborsMetaEstimator(MetaEstimatorMixin, BaseEstimator): """Meta-estimator for nearest neighbors. Uses a decision-tree, or forest model to compute distances between samples diff --git a/treeple/stats/permuteforest.py b/treeple/stats/permuteforest.py index e5c2c3f6f..909245041 100644 --- a/treeple/stats/permuteforest.py +++ b/treeple/stats/permuteforest.py @@ -195,7 +195,7 @@ class PermutationHonestForestClassifier(HonestForestClassifier): **tree_estimator_params : dict Parameters to pass to the underlying base tree estimators. These must be parameters for ``tree_estimator``. - + Attributes ---------- estimator : treeple.tree.HonestTreeClassifier diff --git a/treeple/stats/utils.py b/treeple/stats/utils.py index bd7001879..43c5cd4da 100644 --- a/treeple/stats/utils.py +++ b/treeple/stats/utils.py @@ -5,7 +5,7 @@ import numpy as np import scipy.sparse as sp -from joblib import Parallel, delayed +from joblib import Parallel, delayed, parallel_config from numpy.typing import ArrayLike from scipy.stats import entropy from sklearn.ensemble._forest import _generate_unsampled_indices, _get_n_samples_bootstrap @@ -234,18 +234,19 @@ def _compute_null_distribution_coleman( # generate the random seeds for the parallel jobs ss = np.random.SeedSequence(seed) - out = Parallel(n_jobs=n_jobs)( - delayed(_parallel_build_null_forests)( - y_pred_ind_arr, - n_estimators, - all_y_pred, - y_test, - seed, - metric, - **metric_kwargs, + with parallel_config("multiprocessing"): + out = Parallel(n_jobs=n_jobs)( + delayed(_parallel_build_null_forests)( + y_pred_ind_arr, + n_estimators, + all_y_pred, + y_test, + seed, + metric, + **metric_kwargs, + ) + for i, seed in zip(range(n_repeats), ss.spawn(n_repeats)) ) - for i, seed in zip(range(n_repeats), ss.spawn(n_repeats)) - ) for idx, (first_half_metric, second_half_metric) in enumerate(out): metric_star[idx] = first_half_metric @@ -512,20 +513,21 @@ def _compute_null_distribution_coleman_sparse( # generate the random seeds for the parallel jobs ss = np.random.SeedSequence(seed) - out = Parallel(n_jobs=n_jobs)( - delayed(_parallel_build_null_forests_sparse)( - np.arange(n_trees), - oob_predictions, - oob_indicators, - y_test, - n_outputs, - seed, - True, - metric, - **metric_kwargs, + with parallel_config("multiprocessing"): + out = Parallel(n_jobs=n_jobs)( + delayed(_parallel_build_null_forests_sparse)( + np.arange(n_trees), + oob_predictions, + oob_indicators, + y_test, + n_outputs, + seed, + True, + metric, + **metric_kwargs, + ) + for _, seed in zip(range(n_repeats), ss.spawn(n_repeats)) ) - for _, seed in zip(range(n_repeats), ss.spawn(n_repeats)) - ) metric_star = np.zeros((n_repeats,)) metric_star_pi = np.zeros((n_repeats,)) diff --git a/treeple/tests/test_honest_forest.py b/treeple/tests/test_honest_forest.py index 7a6ee4568..2fd2f241c 100644 --- a/treeple/tests/test_honest_forest.py +++ b/treeple/tests/test_honest_forest.py @@ -310,6 +310,7 @@ def test_sklearn_compatible_estimator(estimator, check): # for fitting the tree's splits if check.func.__name__ in [ "check_class_weight_classifiers", + "check_sample_weight_equivalence", # TODO: this is an error. Somehow a segfault is raised when fit is called first and # then partial_fit "check_fit_score_takes_y", diff --git a/treeple/tests/test_multiview_forest.py b/treeple/tests/test_multiview_forest.py index dff44a863..88c583a07 100644 --- a/treeple/tests/test_multiview_forest.py +++ b/treeple/tests/test_multiview_forest.py @@ -18,6 +18,10 @@ ] ) def test_sklearn_compatible_estimator(estimator, check): + if check.func.__name__ in [ + "check_sample_weight_equivalence", + ]: + pytest.skip() check(estimator) diff --git a/treeple/tests/test_supervised_forest.py b/treeple/tests/test_supervised_forest.py index ba5b29577..17a07a8d4 100644 --- a/treeple/tests/test_supervised_forest.py +++ b/treeple/tests/test_supervised_forest.py @@ -196,11 +196,14 @@ def test_sklearn_compatible_estimator(estimator, check): if isinstance( estimator, ( + ExtraObliqueRandomForestRegressor, + ObliqueRandomForestRegressor, + PatchObliqueRandomForestRegressor, ExtraObliqueRandomForestClassifier, ObliqueRandomForestClassifier, PatchObliqueRandomForestClassifier, ), - ) and check.func.__name__ in ["check_fit_score_takes_y"]: + ) and check.func.__name__ in ["check_sample_weight_equivalence", "check_fit_score_takes_y"]: pytest.skip() check(estimator) diff --git a/treeple/tests/test_unsupervised_forest.py b/treeple/tests/test_unsupervised_forest.py index 96c6f3b17..49a27c110 100644 --- a/treeple/tests/test_unsupervised_forest.py +++ b/treeple/tests/test_unsupervised_forest.py @@ -35,6 +35,7 @@ def test_sklearn_compatible_estimator(estimator, check): "check_methods_subset_invariance", # # sample weights do not necessarily imply a sample is not used in clustering "check_sample_weights_invariance", + "check_sample_weight_equivalence", # # sample order is not preserved in predict "check_methods_sample_order_invariance", ]: diff --git a/treeple/tree/tests/test_honest_tree.py b/treeple/tree/tests/test_honest_tree.py index bdc714e55..5e1bb554b 100644 --- a/treeple/tree/tests/test_honest_tree.py +++ b/treeple/tree/tests/test_honest_tree.py @@ -175,6 +175,7 @@ def test_sklearn_compatible_estimator(estimator, check): "check_class_weight_classifiers", "check_classifier_multioutput", "check_do_not_raise_errors_in_init_or_set_params", + "check_sample_weight_equivalence", ]: pytest.skip() check(estimator) diff --git a/treeple/tree/tests/test_unsupervised_tree.py b/treeple/tree/tests/test_unsupervised_tree.py index 84e7d3d62..2af99d191 100644 --- a/treeple/tree/tests/test_unsupervised_tree.py +++ b/treeple/tree/tests/test_unsupervised_tree.py @@ -110,6 +110,7 @@ def test_sklearn_compatible_transformer(estimator, check): "check_sample_weights_invariance", # sample order is not preserved in predict "check_methods_sample_order_invariance", + "check_sample_weight_equivalence", ]: pytest.skip() check(estimator)