diff --git a/docs/source/configuration.rst b/docs/source/configuration.rst new file mode 100644 index 000000000..7da2ac0bd --- /dev/null +++ b/docs/source/configuration.rst @@ -0,0 +1,17 @@ +.. currentmodule:: pandera + +.. _configuration: + +Configuration +=============== + +*New in version 0.17.3* +``pandera`` provides a global config `~pandera.config.PanderaConfig`. + +This configuration can also be set using environment variables. For instance: +``` +export PANDERA_VALIDATION_ENABLED=False +export PANDERA_VALIDATION_DEPTH=DATA_ONLY +``` + +Runtime data validation incurs a performance overhead. To mitigate this, you have the option to disable validation globally. This can be achieved by setting the environment variable `PANDERA_VALIDATION_ENABLE=False`. When validation is disabled, any `validate` call will return `None`. diff --git a/docs/source/index.rst b/docs/source/index.rst index d093aa8ce..19e10c65e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -369,6 +369,7 @@ page or reach out to the maintainers and pandera community on data_format_conversion supported_libraries integrations + configuration .. toctree:: :maxdepth: 6 diff --git a/pandera/api/pandas/array.py b/pandera/api/pandas/array.py index f5de7ea3b..f9e86d503 100644 --- a/pandera/api/pandas/array.py +++ b/pandera/api/pandas/array.py @@ -3,7 +3,6 @@ import copy import warnings from typing import Any, List, Optional, TypeVar, Union, cast - import pandas as pd from pandera import errors @@ -12,6 +11,7 @@ from pandera.api.checks import Check from pandera.api.hypotheses import Hypothesis from pandera.api.pandas.types import CheckList, PandasDtypeInputTypes, is_field +from pandera.config import CONFIG from pandera.dtypes import DataType, UniqueSettings from pandera.engines import pandas_engine, PYDANTIC_V2 @@ -426,6 +426,9 @@ def validate( # type: ignore [override] dtype: float64 """ + if not CONFIG.validation_enabled: + return check_obj + if self._is_inferred: warnings.warn( f"This {type(self)} is an inferred schema that hasn't been " diff --git a/pandera/api/pandas/container.py b/pandera/api/pandas/container.py index fd1fe2fe2..17b7d46ca 100644 --- a/pandera/api/pandas/container.py +++ b/pandera/api/pandas/container.py @@ -11,6 +11,7 @@ import pandas as pd from pandera import errors +from pandera.config import CONFIG from pandera import strategies as st from pandera.api.base.schema import BaseSchema, inferred_schema_guard from pandera.api.checks import Check @@ -345,6 +346,9 @@ def validate( 4 0.80 dog 5 0.76 dog """ + if not CONFIG.validation_enabled: + return check_obj + # NOTE: Move this into its own schema-backend variant. This is where # the benefits of separating the schema spec from the backend # implementation comes in. diff --git a/tests/core/test_pandas_config.py b/tests/core/test_pandas_config.py new file mode 100644 index 000000000..f1c542379 --- /dev/null +++ b/tests/core/test_pandas_config.py @@ -0,0 +1,71 @@ +"""This module is to test the behaviour change based on defined config in pandera""" +# pylint:disable=import-outside-toplevel,abstract-method,redefined-outer-name + + +import pandas as pd +import pytest + +import pandera as pa +from pandera import DataFrameModel, DataFrameSchema, SeriesSchema +from pandera.config import CONFIG, ValidationDepth + + +@pytest.fixture() +def disable_validation(): + """Fixture to disable validation and clean up after the test is finished""" + CONFIG.validation_enabled = False + yield "resource" + CONFIG.validation_enabled = True + + +class TestPandasDataFrameConfig: + """Class to test all the different configs types""" + + sample_data = pd.DataFrame( + (("Bread", 9), ("Cutter", 15)), columns=["product", "price_val"] + ) + # pylint: disable=unused-argument + def test_disable_validation(self, disable_validation): + """This function validates that a none object is loaded if validation is disabled""" + + pandera_schema = DataFrameSchema( + { + "product": pa.Column( + str, pa.Check(lambda s: s.str.startswith("B")) + ), + "price_val": pa.Column(int), + } + ) + + class TestSchema(DataFrameModel): + """Test Schema class""" + + product: str = pa.Field(str_startswith="B") + price_val: int = pa.Field() + + expected = { + "validation_enabled": False, + "validation_depth": ValidationDepth.SCHEMA_AND_DATA, + } + + assert CONFIG.dict() == expected + assert pandera_schema.validate(self.sample_data) is self.sample_data + assert TestSchema.validate(self.sample_data) is self.sample_data + + +class TestPandasSeriesConfig: + """Class to test all the different configs types""" + + sample_data = pd.Series([1, 1, 2, 2, 3, 3]) + # pylint: disable=unused-argument + def test_disable_validation(self, disable_validation): + """This function validates that a none object is loaded if validation is disabled""" + expected = { + "validation_enabled": False, + "validation_depth": ValidationDepth.SCHEMA_AND_DATA, + } + pandera_schema = SeriesSchema( + int, pa.Check(lambda s: s.value_counts() == 2, element_wise=False) + ) + assert CONFIG.dict() == expected + assert pandera_schema.validate(self.sample_data) is self.sample_data