Skip to content

Commit

Permalink
Add support for PANDERA_VALIDATION_ENABLED for pandas and Configura…
Browse files Browse the repository at this point in the history
…tion docs (#1354)

* Add PANDERA_VALIDATION_ENABLED to pandas container

Signed-off-by: Nok Lam Chan <[email protected]>
Signed-off-by: Nok <[email protected]>

* Placeholder

Signed-off-by: Nok Lam Chan <[email protected]>
Signed-off-by: Nok <[email protected]>

* Add support for PANDERA_VALIDATION_ENABLED for pandas

Signed-off-by: Nok <[email protected]>

* Please linting & DCO

Signed-off-by: Nok <[email protected]>

---------

Signed-off-by: Nok Lam Chan <[email protected]>
Signed-off-by: Nok <[email protected]>
  • Loading branch information
noklam authored Dec 4, 2023
1 parent a318e48 commit fbdc6a0
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 1 deletion.
17 changes: 17 additions & 0 deletions docs/source/configuration.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
.. currentmodule:: pandera

.. _configuration:

Configuration
===============

*New in version 0.17.3*
``pandera`` provides a global config `~pandera.config.PanderaConfig`.

This configuration can also be set using environment variables. For instance:
```
export PANDERA_VALIDATION_ENABLED=False
export PANDERA_VALIDATION_DEPTH=DATA_ONLY
```

Runtime data validation incurs a performance overhead. To mitigate this, you have the option to disable validation globally. This can be achieved by setting the environment variable `PANDERA_VALIDATION_ENABLE=False`. When validation is disabled, any `validate` call will return `None`.
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,7 @@ page or reach out to the maintainers and pandera community on
data_format_conversion
supported_libraries
integrations
configuration

.. toctree::
:maxdepth: 6
Expand Down
5 changes: 4 additions & 1 deletion pandera/api/pandas/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import copy
import warnings
from typing import Any, List, Optional, TypeVar, Union, cast

import pandas as pd

from pandera import errors
Expand All @@ -12,6 +11,7 @@
from pandera.api.checks import Check
from pandera.api.hypotheses import Hypothesis
from pandera.api.pandas.types import CheckList, PandasDtypeInputTypes, is_field
from pandera.config import CONFIG
from pandera.dtypes import DataType, UniqueSettings
from pandera.engines import pandas_engine, PYDANTIC_V2

Expand Down Expand Up @@ -426,6 +426,9 @@ def validate( # type: ignore [override]
dtype: float64
"""
if not CONFIG.validation_enabled:
return check_obj

if self._is_inferred:
warnings.warn(
f"This {type(self)} is an inferred schema that hasn't been "
Expand Down
4 changes: 4 additions & 0 deletions pandera/api/pandas/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pandas as pd

from pandera import errors
from pandera.config import CONFIG
from pandera import strategies as st
from pandera.api.base.schema import BaseSchema, inferred_schema_guard
from pandera.api.checks import Check
Expand Down Expand Up @@ -345,6 +346,9 @@ def validate(
4 0.80 dog
5 0.76 dog
"""
if not CONFIG.validation_enabled:
return check_obj

# NOTE: Move this into its own schema-backend variant. This is where
# the benefits of separating the schema spec from the backend
# implementation comes in.
Expand Down
71 changes: 71 additions & 0 deletions tests/core/test_pandas_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
"""This module is to test the behaviour change based on defined config in pandera"""
# pylint:disable=import-outside-toplevel,abstract-method,redefined-outer-name


import pandas as pd
import pytest

import pandera as pa
from pandera import DataFrameModel, DataFrameSchema, SeriesSchema
from pandera.config import CONFIG, ValidationDepth


@pytest.fixture()
def disable_validation():
"""Fixture to disable validation and clean up after the test is finished"""
CONFIG.validation_enabled = False
yield "resource"
CONFIG.validation_enabled = True


class TestPandasDataFrameConfig:
"""Class to test all the different configs types"""

sample_data = pd.DataFrame(
(("Bread", 9), ("Cutter", 15)), columns=["product", "price_val"]
)
# pylint: disable=unused-argument
def test_disable_validation(self, disable_validation):
"""This function validates that a none object is loaded if validation is disabled"""

pandera_schema = DataFrameSchema(
{
"product": pa.Column(
str, pa.Check(lambda s: s.str.startswith("B"))
),
"price_val": pa.Column(int),
}
)

class TestSchema(DataFrameModel):
"""Test Schema class"""

product: str = pa.Field(str_startswith="B")
price_val: int = pa.Field()

expected = {
"validation_enabled": False,
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
}

assert CONFIG.dict() == expected
assert pandera_schema.validate(self.sample_data) is self.sample_data
assert TestSchema.validate(self.sample_data) is self.sample_data


class TestPandasSeriesConfig:
"""Class to test all the different configs types"""

sample_data = pd.Series([1, 1, 2, 2, 3, 3])
# pylint: disable=unused-argument
def test_disable_validation(self, disable_validation):
"""This function validates that a none object is loaded if validation is disabled"""
expected = {
"validation_enabled": False,
"validation_depth": ValidationDepth.SCHEMA_AND_DATA,
}
pandera_schema = SeriesSchema(
int, pa.Check(lambda s: s.value_counts() == 2, element_wise=False)
)
assert CONFIG.dict() == expected
assert pandera_schema.validate(self.sample_data) is self.sample_data

0 comments on commit fbdc6a0

Please sign in to comment.