neurodata · goraj · Nov 11, 2024 · Nov 11, 2024 · Nov 12, 2024 · Nov 12, 2024
diff --git a/treeple/ensemble/_honest_forest.py b/treeple/ensemble/_honest_forest.py
@@ -728,10 +728,16 @@
         return oob_samples
 
     def __sklearn_tags__(self):
-        # XXX: nans should be supportable in HRF
         tags = super().__sklearn_tags__()
-        tags.classifier_tags.multi_output = False
+        # XXX: nans should be supportable in HRF
         tags.input_tags.allow_nan = False
+
+        try:
+            # sklearn >= 1.6 tags were revamped
+            tags.target_tags.multi_output = False
+        except AttributeError:
+            tags.classifier_tags.multi_output = False
+
         return tags
 
     def decision_path(self, X):

diff --git a/treeple/experimental/tests/test_sdf.py b/treeple/experimental/tests/test_sdf.py
@@ -115,6 +115,7 @@ def test_sklearn_compatible_estimator(estimator, check):
     # XXX: can include this "generalization" in the future if it's useful
     if check.func.__name__ in [
         "check_class_weight_classifiers",
+        "check_sample_weight_equivalence",
     ]:
         pytest.skip()
     check(estimator)
diff --git a/treeple/neighbors.py b/treeple/neighbors.py
@@ -11,7 +11,7 @@
 from treeple.tree._neighbors import _compute_distance_matrix, compute_forest_similarity_matrix
 
 
-class NearestNeighborsMetaEstimator(BaseEstimator, MetaEstimatorMixin):
+class NearestNeighborsMetaEstimator(MetaEstimatorMixin, BaseEstimator):
     """Meta-estimator for nearest neighbors.
 
     Uses a decision-tree, or forest model to compute distances between samples

diff --git a/treeple/stats/permuteforest.py b/treeple/stats/permuteforest.py
@@ -195,7 +195,7 @@ class PermutationHonestForestClassifier(HonestForestClassifier):
     **tree_estimator_params : dict
         Parameters to pass to the underlying base tree estimators.
         These must be parameters for ``tree_estimator``.
-        
+
     Attributes
     ----------
     estimator : treeple.tree.HonestTreeClassifier

diff --git a/treeple/stats/utils.py b/treeple/stats/utils.py
@@ -5,7 +5,7 @@
 
 import numpy as np
 import scipy.sparse as sp
-from joblib import Parallel, delayed
+from joblib import Parallel, delayed, parallel_config
 from numpy.typing import ArrayLike
 from scipy.stats import entropy
 from sklearn.ensemble._forest import _generate_unsampled_indices, _get_n_samples_bootstrap
@@ -234,18 +234,19 @@ def _compute_null_distribution_coleman(
 
     # generate the random seeds for the parallel jobs
     ss = np.random.SeedSequence(seed)
-    out = Parallel(n_jobs=n_jobs)(
-        delayed(_parallel_build_null_forests)(
-            y_pred_ind_arr,
-            n_estimators,
-            all_y_pred,
-            y_test,
-            seed,
-            metric,
-            **metric_kwargs,
+    with parallel_config("multiprocessing"):
+        out = Parallel(n_jobs=n_jobs)(
+            delayed(_parallel_build_null_forests)(
+                y_pred_ind_arr,
+                n_estimators,
+                all_y_pred,
+                y_test,
+                seed,
+                metric,
+                **metric_kwargs,
+            )
+            for i, seed in zip(range(n_repeats), ss.spawn(n_repeats))
         )
-        for i, seed in zip(range(n_repeats), ss.spawn(n_repeats))
-    )
 
     for idx, (first_half_metric, second_half_metric) in enumerate(out):
         metric_star[idx] = first_half_metric
@@ -512,20 +513,21 @@ def _compute_null_distribution_coleman_sparse(
 
     # generate the random seeds for the parallel jobs
     ss = np.random.SeedSequence(seed)
-    out = Parallel(n_jobs=n_jobs)(
-        delayed(_parallel_build_null_forests_sparse)(
-            np.arange(n_trees),
-            oob_predictions,
-            oob_indicators,
-            y_test,
-            n_outputs,
-            seed,
-            True,
-            metric,
-            **metric_kwargs,
+    with parallel_config("multiprocessing"):
+        out = Parallel(n_jobs=n_jobs)(
+            delayed(_parallel_build_null_forests_sparse)(
+                np.arange(n_trees),
+                oob_predictions,
+                oob_indicators,
+                y_test,
+                n_outputs,
+                seed,
+                True,
+                metric,
+                **metric_kwargs,
+            )
+            for _, seed in zip(range(n_repeats), ss.spawn(n_repeats))
         )
-        for _, seed in zip(range(n_repeats), ss.spawn(n_repeats))
-    )
 
     metric_star = np.zeros((n_repeats,))
     metric_star_pi = np.zeros((n_repeats,))

diff --git a/treeple/tests/test_honest_forest.py b/treeple/tests/test_honest_forest.py
@@ -310,6 +310,7 @@ def test_sklearn_compatible_estimator(estimator, check):
     #  for fitting the tree's splits
     if check.func.__name__ in [
         "check_class_weight_classifiers",
+        "check_sample_weight_equivalence",
         # TODO: this is an error. Somehow a segfault is raised when fit is called first and
         # then partial_fit
         "check_fit_score_takes_y",

diff --git a/treeple/tests/test_multiview_forest.py b/treeple/tests/test_multiview_forest.py
@@ -18,6 +18,10 @@
     ]
 )
 def test_sklearn_compatible_estimator(estimator, check):
+    if check.func.__name__ in [
+        "check_sample_weight_equivalence",
+    ]:
+        pytest.skip()
     check(estimator)
 
 

diff --git a/treeple/tests/test_supervised_forest.py b/treeple/tests/test_supervised_forest.py
@@ -196,11 +196,14 @@ def test_sklearn_compatible_estimator(estimator, check):
     if isinstance(
         estimator,
         (
+            ExtraObliqueRandomForestRegressor,
+            ObliqueRandomForestRegressor,
+            PatchObliqueRandomForestRegressor,
             ExtraObliqueRandomForestClassifier,
             ObliqueRandomForestClassifier,
             PatchObliqueRandomForestClassifier,
         ),
-    ) and check.func.__name__ in ["check_fit_score_takes_y"]:
+    ) and check.func.__name__ in ["check_sample_weight_equivalence", "check_fit_score_takes_y"]:
         pytest.skip()
     check(estimator)
 

diff --git a/treeple/tests/test_unsupervised_forest.py b/treeple/tests/test_unsupervised_forest.py
@@ -35,6 +35,7 @@ def test_sklearn_compatible_estimator(estimator, check):
         "check_methods_subset_invariance",
         # # sample weights do not necessarily imply a sample is not used in clustering
         "check_sample_weights_invariance",
+        "check_sample_weight_equivalence",
         # # sample order is not preserved in predict
         "check_methods_sample_order_invariance",
     ]:

diff --git a/treeple/tree/tests/test_honest_tree.py b/treeple/tree/tests/test_honest_tree.py
@@ -175,6 +175,7 @@ def test_sklearn_compatible_estimator(estimator, check):
         "check_class_weight_classifiers",
         "check_classifier_multioutput",
         "check_do_not_raise_errors_in_init_or_set_params",
+        "check_sample_weight_equivalence",
     ]:
         pytest.skip()
     check(estimator)

diff --git a/treeple/tree/tests/test_unsupervised_tree.py b/treeple/tree/tests/test_unsupervised_tree.py
@@ -110,6 +110,7 @@ def test_sklearn_compatible_transformer(estimator, check):
         "check_sample_weights_invariance",
         # sample order is not preserved in predict
         "check_methods_sample_order_invariance",
+        "check_sample_weight_equivalence",
     ]:
         pytest.skip()
     check(estimator)