PriorLabs · LeoGrin · Jan 21, 2025 · Jan 16, 2025 · Jan 16, 2025 · Jan 16, 2025
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,15 @@
+version: 2
+updates:
+  # Python dependencies
+  - package-ecosystem: "pip"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
+
+  # GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "weekly"
+    open-pull-requests-limit: 3
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
@@ -0,0 +1,123 @@
+name: In pull request
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  check_python_linting:
+    name: Ruff Linting & Formatting
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - uses: chartboost/ruff-action@v1
+        with:
+          src: "./src ./tests"
+          version: 0.8.6
+      - uses: chartboost/ruff-action@v1
+        with:
+          src: "./src ./tests"
+          version: 0.8.6
+          args: 'format --check'
+
+  test_compatibility:
+    name: Test Package Compatibility
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: ubuntu-latest
+            python-version: "3.9"
+            dependency-set: minimum
+          - os: macos-13 # macos-latest doesn't work with python 3.10
+          # https://github.com/actions/setup-python/issues/855
+            python-version: "3.9"
+            dependency-set: minimum
+          - os: ubuntu-latest
+            python-version: "3.12"
+            dependency-set: maximum
+          - os: macos-latest
+            python-version: "3.12"
+            dependency-set: maximum
+    runs-on: ${{ matrix.os }}
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python-version }}
+          architecture: x64
+
+      - name: Install uv
+        uses: astral-sh/setup-uv@v5
+        with:
+          enable-cache: true
+
+      - name: Generate requirements file for minimum dependencies
+        if: matrix.dependency-set == 'minimum'
+        run: |
+          python << EOF
+          import re
+
+          with open('pyproject.toml', 'r') as f:
+              content = f.read()
+
+          # Find dependencies section using regex
+          deps_match = re.search(r'dependencies\s*=\s*\[(.*?)\]', content, re.DOTALL)
+          if deps_match:
+              deps = [d.strip(' "\'') for d in deps_match.group(1).strip().split('\n') if d.strip()]
+              min_reqs = []
+              for dep in deps:
+                  match = re.match(r'([^>=<\s]+)\s*>=\s*([^,\s"\']+)', dep)
+                  if match:
+                      package, min_ver = match.groups()
+                      min_reqs.append(f"{package}=={min_ver}")
+
+              with open('requirements.txt', 'w') as f:
+                  f.write('\n'.join(min_reqs))
+          EOF
+
+      - name: Generate requirements file for maximum dependencies
+        if: matrix.dependency-set == 'maximum'
+        run: |
+          python << EOF
+          import re
+
+          with open('pyproject.toml', 'r') as f:
+              content = f.read()
+
+          # Find dependencies section using regex
+          deps_match = re.search(r'dependencies\s*=\s*\[(.*?)\]', content, re.DOTALL)
+          if deps_match:
+              deps = [d.strip(' "\'') for d in deps_match.group(1).strip().split('\n') if d.strip()]
+              max_reqs = []
+              for dep in deps:
+                  # Check for maximum version constraint
+                  max_version_match = re.search(r'([^>=<\s]+).*?<\s*([^,\s"\']+)', dep)
+                  if max_version_match:
+                      # If there's a max version, use the version just below it
+                      package, max_ver = max_version_match.groups()
+                      max_reqs.append(f"{package}<{max_ver}")
+                  else:
+                      # If no max version, just use the package name
+                      package = re.match(r'([^>=<\s]+)', dep).group(1)
+                      max_reqs.append(package)
+
+              with open('requirements.txt', 'w') as f:
+                  f.write('\n'.join(max_reqs))
+          EOF
+
+      - name: Install dependencies
+        run: |
+          uv pip install --system --no-deps .
+          uv pip install --system pytest
+          uv pip install --system -r requirements.txt
+
+      - name: Initialize submodules
+        run: git submodule update --init --recursive
+
+      - name: Run Tests
+        run: |
+          pytest tests/
diff --git a/pyproject.toml b/pyproject.toml
@@ -6,15 +6,15 @@ build-backend = "setuptools.build_meta"
 name = "tabpfn"
 version = "2.0.3"
 dependencies = [
-  "torch>=2.1",
-  "scikit-learn>=1.2.0",
-  "typing_extensions",
-  "scipy",
-  "pandas",
-  "einops",
-  "huggingface-hub",
+  "torch>=2.1,<3",
+  "scikit-learn>=1.2.0,<1.7",
+  "typing_extensions>=4.4.0",
+  "scipy>=1.7.3,<2",
+  "pandas>=1.4.0,<3",
+  "einops>=0.2.0,<0.9",
+  "huggingface-hub>=0.0.1,<1",
 ]
-requires-python = ">=3.9,<3.12"
+requires-python = ">=3.9,<3.13"
 authors = [
   { name = "Noah Hollmann", email = "[email protected]" },
   { name = "Samuel Müller", email = "[email protected]" },

diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
@@ -181,9 +181,9 @@ def __init__(  # noqa: PLR0913
                 Whether to balance the probabilities based on the class distribution
                 in the training data. This can help to improve predictive performance
                 when the classes are highly imbalanced and the metric of interest is
-                insensitive to class imbalance (e.g., balanced accuracy, balanced log loss,
-                roc-auc macro ovo, etc.). This is only applied when predicting during a
-                post-processing step.
+                insensitive to class imbalance (e.g., balanced accuracy, balanced log
+                loss, roc-auc macro ovo, etc.). This is only applied when predicting
+                during a post-processing step.
 
             average_before_softmax:
                 Only used if `n_estimators > 1`. Whether to average the predictions of
@@ -443,7 +443,7 @@ def fit(self, X: XType, y: YType) -> Self:
                 "classes supported by TabPFN. Consider using a strategy to reduce "
                 "the number of classes. For code see "
                 "https://github.com/PriorLabs/tabpfn-extensions/blob/main/src/"
-                "tabpfn_extensions/many_class/many_class_classifier.py"
+                "tabpfn_extensions/many_class/many_class_classifier.py",
             )
 
         # Will convert specified categorical indices to category dtype, as well

diff --git a/src/tabpfn/model/loading.py b/src/tabpfn/model/loading.py
@@ -72,7 +72,7 @@ def get_regressor_v2(cls) -> ModelSource:
             "tabpfn-v2-regressor-09gpqh39.ckpt",
             "tabpfn-v2-regressor-2noar4o2.ckpt",
             "tabpfn-v2-regressor-5wof9ojf.ckpt",
-            "tabpfn-v2-regressor-wyl4o83o.ckpt"
+            "tabpfn-v2-regressor-wyl4o83o.ckpt",
         ]
         return cls(
             repo_id="Prior-Labs/TabPFN-v2-reg",

diff --git a/src/tabpfn/utils.py b/src/tabpfn/utils.py
@@ -17,8 +17,7 @@
 import torch
 from sklearn.base import check_array, is_classifier
 from sklearn.compose import ColumnTransformer, make_column_selector
-from sklearn.preprocessing import OrdinalEncoder, FunctionTransformer
-
+from sklearn.preprocessing import FunctionTransformer, OrdinalEncoder
 from sklearn.utils.multiclass import check_classification_targets
 from torch import nn
 

diff --git a/tests/test_classifier_interface.py b/tests/test_classifier_interface.py
@@ -8,9 +8,9 @@
 import sklearn.datasets
 import torch
 from sklearn.base import check_is_fitted
-from sklearn.utils.estimator_checks import parametrize_with_checks
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils.estimator_checks import parametrize_with_checks
 
 from tabpfn import TabPFNClassifier
 
@@ -96,6 +96,7 @@ def test_fit(
     predictions = model.predict(X)
     assert predictions.shape == (X.shape[0],), "Predictions shape is incorrect!"
 
+
 # TODO(eddiebergman): Should probably run a larger suite with different configurations
 @parametrize_with_checks(
     [TabPFNClassifier(inference_config={"USE_SKLEARN_16_DECIMAL_PRECISION": True})],
@@ -112,46 +113,59 @@ def test_sklearn_compatible_estimator(
 
     check(estimator)
 
+
 def test_balanced_probabilities(X_y: tuple[np.ndarray, np.ndarray]) -> None:
     """Test that balance_probabilities=True works correctly."""
     X, y = X_y
-    
+
     model = TabPFNClassifier(
         balance_probabilities=True,
     )
-    
+
     model.fit(X, y)
     probabilities = model.predict_proba(X)
-    
+
     # Check that probabilities sum to 1 for each prediction
     assert np.allclose(probabilities.sum(axis=1), 1.0)
-    
+
     # Check that the mean probability for each class is roughly equal
     mean_probs = probabilities.mean(axis=0)
     expected_mean = 1.0 / len(np.unique(y))
-    assert np.allclose(mean_probs, expected_mean, rtol=0.1), \
-        "Class probabilities are not properly balanced"
+    assert np.allclose(
+        mean_probs,
+        expected_mean,
+        rtol=0.1,
+    ), "Class probabilities are not properly balanced"
+
 
 def test_classifier_in_pipeline(X_y: tuple[np.ndarray, np.ndarray]) -> None:
     """Test that TabPFNClassifier works correctly within a sklearn pipeline."""
     X, y = X_y
-    
+
     # Create a simple preprocessing pipeline
-    pipeline = Pipeline([
-        ('scaler', StandardScaler()),
-        ('classifier', TabPFNClassifier(
-            n_estimators=2  # Fewer estimators for faster testing
-        ))
-    ])
-
+    pipeline = Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            (
+                "classifier",
+                TabPFNClassifier(
+                    n_estimators=2,  # Fewer estimators for faster testing
+                ),
+            ),
+        ],
+    )
+
     pipeline.fit(X, y)
     probabilities = pipeline.predict_proba(X)
-    
+
     # Check that probabilities sum to 1 for each prediction
     assert np.allclose(probabilities.sum(axis=1), 1.0)
-    
+
     # Check that the mean probability for each class is roughly equal
     mean_probs = probabilities.mean(axis=0)
     expected_mean = 1.0 / len(np.unique(y))
-    assert np.allclose(mean_probs, expected_mean, rtol=0.1), \
-        "Class probabilities are not properly balanced in pipeline"
+    assert np.allclose(
+        mean_probs,
+        expected_mean,
+        rtol=0.1,
+    ), "Class probabilities are not properly balanced in pipeline"
diff --git a/tests/test_regressor_interface.py b/tests/test_regressor_interface.py
@@ -8,9 +8,9 @@
 import sklearn.datasets
 import torch
 from sklearn.base import check_is_fitted
-from sklearn.utils.estimator_checks import parametrize_with_checks
 from sklearn.pipeline import Pipeline
 from sklearn.preprocessing import StandardScaler
+from sklearn.utils.estimator_checks import parametrize_with_checks
 
 from tabpfn import TabPFNRegressor
 
@@ -110,38 +110,48 @@ def test_sklearn_compatible_estimator(
         "check_methods_sample_order_invariance",
     ):
         estimator.inference_precision = torch.float64
-
+    if check.func.__name__ == "check_methods_sample_order_invariance":  # type: ignore
+        pytest.xfail("We're not at 1e-7 difference yet")
     check(estimator)
 
 
 def test_regressor_in_pipeline(X_y: tuple[np.ndarray, np.ndarray]) -> None:
     """Test that TabPFNRegressor works correctly within a sklearn pipeline."""
     X, y = X_y
-    
+
     # Create a simple preprocessing pipeline
-    pipeline = Pipeline([
-        ('scaler', StandardScaler()),
-        ('regressor', TabPFNRegressor(
-            n_estimators=2  # Fewer estimators for faster testing
-        ))
-    ])
-
+    pipeline = Pipeline(
+        [
+            ("scaler", StandardScaler()),
+            (
+                "regressor",
+                TabPFNRegressor(
+                    n_estimators=2,  # Fewer estimators for faster testing
+                ),
+            ),
+        ],
+    )
+
     pipeline.fit(X, y)
     predictions = pipeline.predict(X)
-    
+
     # Check predictions shape
     assert predictions.shape == (X.shape[0],), "Predictions shape is incorrect"
-    
+
     # Test different prediction modes through the pipeline
     predictions_median = pipeline.predict(X, output_type="median")
-    assert predictions_median.shape == (X.shape[0],), "Median predictions shape is incorrect"
-
+    assert predictions_median.shape == (
+        X.shape[0],
+    ), "Median predictions shape is incorrect"
+
     predictions_mode = pipeline.predict(X, output_type="mode")
-    assert predictions_mode.shape == (X.shape[0],), "Mode predictions shape is incorrect"
-
+    assert predictions_mode.shape == (
+        X.shape[0],
+    ), "Mode predictions shape is incorrect"
+
     quantiles = pipeline.predict(X, output_type="quantiles", quantiles=[0.1, 0.9])
     assert isinstance(quantiles, list)
     assert len(quantiles) == 2
-    assert quantiles[0].shape == (X.shape[0],), "Quantile predictions shape is incorrect"
-
-
+    assert quantiles[0].shape == (
+        X.shape[0],
+    ), "Quantile predictions shape is incorrect"