Skip to content

Commit

Permalink
TST(string dtype): Resolve some HDF5 xfails (#60615)
Browse files Browse the repository at this point in the history
* TST(string dtype): Resolve HDF5 xfails

* More xfails

* Cleanup
  • Loading branch information
rhshadrach authored Dec 29, 2024
1 parent 82f4354 commit 2edc7c9
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 17 deletions.
2 changes: 2 additions & 0 deletions pandas/io/pytables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5297,6 +5297,8 @@ def _dtype_to_kind(dtype_str: str) -> str:
kind = "integer"
elif dtype_str == "object":
kind = "object"
elif dtype_str == "str":
kind = "str"
else:
raise ValueError(f"cannot interpret dtype of [{dtype_str}]")

Expand Down
45 changes: 34 additions & 11 deletions pandas/tests/io/pytables/test_file_handling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,12 +37,11 @@

pytestmark = [
pytest.mark.single_cpu,
pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False),
]


@pytest.mark.parametrize("mode", ["r", "r+", "a", "w"])
def test_mode(setup_path, tmp_path, mode):
def test_mode(setup_path, tmp_path, mode, using_infer_string):
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
columns=Index(list("ABCD"), dtype=object),
Expand Down Expand Up @@ -91,10 +90,12 @@ def test_mode(setup_path, tmp_path, mode):
read_hdf(path, "df", mode=mode)
else:
result = read_hdf(path, "df", mode=mode)
if using_infer_string:
df.columns = df.columns.astype("str")
tm.assert_frame_equal(result, df)


def test_default_mode(tmp_path, setup_path):
def test_default_mode(tmp_path, setup_path, using_infer_string):
# read_hdf uses default mode
df = DataFrame(
np.random.default_rng(2).standard_normal((10, 4)),
Expand All @@ -104,7 +105,10 @@ def test_default_mode(tmp_path, setup_path):
path = tmp_path / setup_path
df.to_hdf(path, key="df", mode="w")
result = read_hdf(path, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)


def test_reopen_handle(tmp_path, setup_path):
Expand Down Expand Up @@ -163,7 +167,7 @@ def test_reopen_handle(tmp_path, setup_path):
assert not store.is_open


def test_open_args(setup_path):
def test_open_args(setup_path, using_infer_string):
with tm.ensure_clean(setup_path) as path:
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -178,8 +182,13 @@ def test_open_args(setup_path):
store["df"] = df
store.append("df2", df)

tm.assert_frame_equal(store["df"], df)
tm.assert_frame_equal(store["df2"], df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")

tm.assert_frame_equal(store["df"], expected)
tm.assert_frame_equal(store["df2"], expected)

store.close()

Expand All @@ -194,7 +203,7 @@ def test_flush(setup_path):
store.flush(fsync=True)


def test_complibs_default_settings(tmp_path, setup_path):
def test_complibs_default_settings(tmp_path, setup_path, using_infer_string):
# GH15943
df = DataFrame(
1.1 * np.arange(120).reshape((30, 4)),
Expand All @@ -207,7 +216,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complevel=9)
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -218,7 +231,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df", complib="zlib")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand All @@ -229,7 +246,11 @@ def test_complibs_default_settings(tmp_path, setup_path):
tmpfile = tmp_path / setup_path
df.to_hdf(tmpfile, key="df")
result = read_hdf(tmpfile, "df")
tm.assert_frame_equal(result, df)
expected = df.copy()
if using_infer_string:
expected.index = expected.index.astype("str")
expected.columns = expected.columns.astype("str")
tm.assert_frame_equal(result, expected)

with tables.open_file(tmpfile, mode="r") as h5file:
for node in h5file.walk_nodes(where="/df", classname="Leaf"):
Expand Down Expand Up @@ -308,6 +329,7 @@ def test_complibs(tmp_path, lvl, lib, request):
assert node.filters.complib == lib


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.skipif(
not is_platform_little_endian(), reason="reason platform is not little endian"
)
Expand All @@ -325,6 +347,7 @@ def test_encoding(setup_path):
tm.assert_frame_equal(result, expected)


@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)", strict=False)
@pytest.mark.parametrize(
"val",
[
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/pytables/test_subclass.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas import (
DataFrame,
Series,
Expand All @@ -19,7 +17,6 @@

class TestHDFStoreSubclass:
# GH 33748
@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string)")
def test_supported_for_subclass_dataframe(self, tmp_path):
data = {"a": [1, 2], "b": [3, 4]}
sdf = tm.SubclassedDataFrame(data, dtype=np.intp)
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/io/test_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
import numpy as np
import pytest

from pandas._config import using_string_dtype

from pandas.compat import (
WASM,
is_platform_windows,
Expand Down Expand Up @@ -365,7 +363,6 @@ def test_write_fspath_all(self, writer_name, writer_kwargs, module):
expected = f_path.read()
assert result == expected

@pytest.mark.xfail(using_string_dtype(), reason="TODO(infer_string) hdf support")
def test_write_fspath_hdf5(self):
# Same test as write_fspath_all, except HDF5 files aren't
# necessarily byte-for-byte identical for a given dataframe, so we'll
Expand Down

0 comments on commit 2edc7c9

Please sign in to comment.