diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..9160f3c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,19 @@ +# See https://pre-commit.com for more information +# See https://pre-commit.com/hooks.html for more hooks +repos: +- repo: https://github.com/pre-commit/pre-commit-hooks + rev: v4.6.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-added-large-files +- repo: https://github.com/pycqa/isort + rev: 5.13.2 + args: ["--profile", "black", "--filter-files"] + hooks: + - id: isort +- repo: https://github.com/psf/black + rev: 24.4.2 + hooks: + - id: black diff --git a/development_guide.md b/development_guide.md index 2cfb5d9..855e3d3 100644 --- a/development_guide.md +++ b/development_guide.md @@ -9,4 +9,4 @@ python -m venv venv source venv/bin/activate # Install the dependencies make env-install -``` \ No newline at end of file +``` diff --git a/pyproject.toml b/pyproject.toml index 2922d97..e832cc6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,6 +26,7 @@ dev = [ "polars", "pandas", "numpy", + "pre-commit", ] doc = ["mkdocs", "mkdocs-material", "mkdocstrings[python]", "mkdocs-jupyter"] build = ["build", "twine"] @@ -56,5 +57,5 @@ exclude = ''' )/ ''' -[tool.flake8] -max-line-length = 88 +[tool.isort] +profile = "black" diff --git a/sklearo/encoding/__init__.py b/sklearo/encoding/__init__.py index 266528e..af92a85 100644 --- a/sklearo/encoding/__init__.py +++ b/sklearo/encoding/__init__.py @@ -1,3 +1,3 @@ from .woe import WOEEncoder -__all__ = ["WOEEncoder"] \ No newline at end of file +__all__ = ["WOEEncoder"] diff --git a/sklearo/encoding/woe.py b/sklearo/encoding/woe.py index 2e0fcd9..8c3e5ea 100644 --- a/sklearo/encoding/woe.py +++ b/sklearo/encoding/woe.py @@ -1,14 +1,14 @@ -import narwhals as nw -from narwhals.typing import IntoFrameT, IntoSeriesT -import warnings import math -from typing import Sequence, Literal, Any - +import warnings from collections import defaultdict +from typing import Any, Literal, Sequence +import narwhals as nw +from narwhals.typing import IntoFrameT, IntoSeriesT from pydantic import validate_call + from sklearo.utils import select_columns -from sklearo.validation import check_X_y, check_if_fitted +from sklearo.validation import check_if_fitted, check_X_y class WOEEncoder: @@ -355,8 +355,10 @@ def transform(self, X: IntoFrameT) -> IntoFrameT: for column, mapping in self.encoding_map_.items(): uniques = X[column].unique() unseen_cats = uniques.filter( - (~uniques.is_in(next(iter(mapping.values())).keys()) - & ~uniques.is_null()) + ( + ~uniques.is_in(next(iter(mapping.values())).keys()) + & ~uniques.is_null() + ) ).to_list() if unseen_cats: unseen_per_col[column] = unseen_cats diff --git a/sklearo/utils.py b/sklearo/utils.py index 466f57b..070cd8d 100644 --- a/sklearo/utils.py +++ b/sklearo/utils.py @@ -1,10 +1,9 @@ +import inspect +import re from typing import Sequence -import re import narwhals as nw from narwhals.typing import IntoFrameT -import inspect - def select_columns_by_regex_pattern(df: IntoFrameT, pattern: str): @@ -29,4 +28,4 @@ def select_columns(df: IntoFrameT, columns: Sequence[nw.typing.DTypes | str] | s elif isinstance(columns[0], str): yield from columns else: - raise ValueError("Invalid columns type") \ No newline at end of file + raise ValueError("Invalid columns type") diff --git a/tests/encoding/test_woe.py b/tests/encoding/test_woe.py index 2dfc1df..bdb4811 100644 --- a/tests/encoding/test_woe.py +++ b/tests/encoding/test_woe.py @@ -1,7 +1,7 @@ +import numpy as np import pandas as pd import polars as pl import pytest -import numpy as np from sklearo.encoding.woe import WOEEncoder diff --git a/tests/test_utils.py b/tests/test_utils.py index 84729f5..20c1864 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,14 +1,14 @@ +import narwhals as nw import pandas as pd import polars as pl import pytest -import narwhals as nw + from sklearo.utils import ( + select_columns, select_columns_by_regex_pattern, select_columns_by_types, - select_columns, ) - select_columns_by_regex_pattern = nw.narwhalify(select_columns_by_regex_pattern) select_columns_by_types = nw.narwhalify(select_columns_by_types) select_columns = nw.narwhalify(select_columns) @@ -72,8 +72,7 @@ def test_select_columns_empty_tuple(self, sample_data, DataFrame): selected_columns = list(select_columns(df, ())) assert selected_columns == [] - def test_select_columns_invalid_type(self, sample_data, DataFrame): df = DataFrame(sample_data) with pytest.raises(ValueError, match="Invalid columns type"): - list(select_columns(df, [1, 2])) \ No newline at end of file + list(select_columns(df, [1, 2]))