From f78bc0752d9f1600debe598e8035c70178201762 Mon Sep 17 00:00:00 2001 From: Jarek-Rolski Date: Mon, 6 Jan 2025 11:48:45 +0000 Subject: [PATCH 1/3] add json_normalize to pandas read formats --- pandera/typing/formats.py | 4 ++++ pandera/typing/pandas.py | 1 + 2 files changed, 5 insertions(+) diff --git a/pandera/typing/formats.py b/pandera/typing/formats.py index 1585263df..0f88d5089 100644 --- a/pandera/typing/formats.py +++ b/pandera/typing/formats.py @@ -42,6 +42,9 @@ class Formats(Enum): #: python pickle file format pickle = "pickle" + #: python json_normalize + json_normalize = "json_normalize" + Format = Union[ Literal[Formats.csv], @@ -50,4 +53,5 @@ class Formats(Enum): Literal[Formats.feather], Literal[Formats.parquet], Literal[Formats.pickle], + Literal[Formats.json_normalize], ] diff --git a/pandera/typing/pandas.py b/pandera/typing/pandas.py index 9c2a0b7c3..8e26962e7 100644 --- a/pandera/typing/pandas.py +++ b/pandera/typing/pandas.py @@ -122,6 +122,7 @@ def from_format(cls, obj: Any, config) -> pd.DataFrame: Formats.feather: pd.read_feather, Formats.parquet: pd.read_parquet, Formats.pickle: pd.read_pickle, + Formats.json_normalize: pd.json_normalize, }[Formats(config.from_format)] return reader(obj, **(config.from_format_kwargs or {})) # type: ignore From a0497636ee3a2e4476b3cd2c2f14aa04ca40ae68 Mon Sep 17 00:00:00 2001 From: Jarek-Rolski Date: Tue, 14 Jan 2025 15:30:53 +0000 Subject: [PATCH 2/3] add unit test --- tests/core/test_from_to_format_conversions.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/core/test_from_to_format_conversions.py b/tests/core/test_from_to_format_conversions.py index 5b3062dfa..64413733f 100644 --- a/tests/core/test_from_to_format_conversions.py +++ b/tests/core/test_from_to_format_conversions.py @@ -64,6 +64,10 @@ class Config: from_format = pd.read_pickle +class InSchemaJsonNormalize(InSchema): + class Config: + from_format = "json_normalize" + class OutSchema(InSchema): float_col: pa.typing.Series[float] @@ -194,6 +198,7 @@ def _needs_pyarrow(schema) -> bool: [InSchemaParquet, lambda df, x: df.to_parquet(x), io.BytesIO], [InSchemaPickle, lambda df, x: df.to_pickle(x), io.BytesIO], [InSchemaPickleCallable, lambda df, x: df.to_pickle(x), io.BytesIO], + [InSchemaJsonNormalize, lambda df: df.to_json(), io.StringIO], ], ) def test_from_format(schema, to_fn, buf_cls): From 55bcbfa4bed846214c187c65d1d412e39ed56c2e Mon Sep 17 00:00:00 2001 From: Jarek-Rolski Date: Tue, 14 Jan 2025 16:00:01 +0000 Subject: [PATCH 3/3] format code with black/fix unit test --- tests/core/test_from_to_format_conversions.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/core/test_from_to_format_conversions.py b/tests/core/test_from_to_format_conversions.py index 64413733f..80dc055cf 100644 --- a/tests/core/test_from_to_format_conversions.py +++ b/tests/core/test_from_to_format_conversions.py @@ -68,6 +68,7 @@ class InSchemaJsonNormalize(InSchema): class Config: from_format = "json_normalize" + class OutSchema(InSchema): float_col: pa.typing.Series[float] @@ -198,7 +199,7 @@ def _needs_pyarrow(schema) -> bool: [InSchemaParquet, lambda df, x: df.to_parquet(x), io.BytesIO], [InSchemaPickle, lambda df, x: df.to_pickle(x), io.BytesIO], [InSchemaPickleCallable, lambda df, x: df.to_pickle(x), io.BytesIO], - [InSchemaJsonNormalize, lambda df: df.to_json(), io.StringIO], + [InSchemaJsonNormalize, lambda df: df.to_dict(orient="records"), None], ], ) def test_from_format(schema, to_fn, buf_cls):