Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

clib.conversion._to_numpy: Add tests for pandas.Series with datetime dtypes #3670

Merged
merged 12 commits into from
Jan 9, 2025
10 changes: 10 additions & 0 deletions pygmt/clib/conversion.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,16 @@
numpy_dtype = np.float64
data = data.to_numpy(na_value=np.nan)

# Deal with timezone-aware datetime dtypes.
if isinstance(dtype, pd.DatetimeTZDtype): # pandas.DatetimeTZDtype
numpy_dtype = getattr(dtype, "base", None)
elif isinstance(dtype, pd.ArrowDtype) and hasattr(dtype.pyarrow_dtype, "tz"):
# pd.ArrowDtype[pa.Timestamp]
numpy_dtype = getattr(dtype, "numpy_dtype", None)
if Version(pd.__version__) < Version("2.1"):
# In pandas 2.0, dtype.numpy_type is dtype("O").
numpy_dtype = np.dtype(f"M8[{dtype.pyarrow_dtype.unit}]") # type: ignore[assignment, attr-defined]

Check warning on line 203 in pygmt/clib/conversion.py

View check run for this annotation

Codecov / codecov/patch

pygmt/clib/conversion.py#L203

Added line #L203 was not covered by tests
Comment on lines +202 to +204
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a TODO here to remove this once we drop support for pandas 2.0? Should be after 2025-08-29 according to https://scientific-python.org/specs/spec-0000/#support-window


array = np.ascontiguousarray(data, dtype=numpy_dtype)

# Check if a np.object_ array can be converted to np.str_.
Expand Down
106 changes: 106 additions & 0 deletions pygmt/tests/test_clib_to_numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,112 @@ def test_to_numpy_pandas_date(dtype, expected_dtype):
)


pandas_old_version = pytest.mark.xfail(
condition=Version(pd.__version__) < Version("2.1"),
reason="pandas 2.0 bug reported in https://github.com/pandas-dev/pandas/issues/52705",
)


@pytest.mark.parametrize(
("dtype", "expected_dtype"),
[
# NumPy datetime64 types. Only unit 's'/'ms'/'us'/'ns' are supported.
pytest.param("datetime64[s]", "datetime64[s]", id="datetime64[s]"),
pytest.param("datetime64[ms]", "datetime64[ms]", id="datetime64[ms]"),
pytest.param("datetime64[us]", "datetime64[us]", id="datetime64[us]"),
pytest.param("datetime64[ns]", "datetime64[ns]", id="datetime64[ns]"),
# pandas.DatetimeTZDtype can be given in two ways [tz is required]:
# 1. pandas.DatetimeTZDtype(unit, tz)
# 2. String aliases: "datetime64[unit, tz]"
pytest.param(
"datetime64[s, UTC]",
"datetime64[s]",
id="datetime64[s, tz=UTC]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, America/New_York]",
"datetime64[s]",
id="datetime64[s, tz=America/New_York]",
marks=pandas_old_version,
),
pytest.param(
"datetime64[s, +07:30]",
"datetime64[s]",
id="datetime64[s, +07:30]",
marks=pandas_old_version,
),
# PyArrow timestamp types can be given in two ways [tz is optional]:
# 1. pd.ArrowDtype(pyarrow.Timestamp(unit, tz=tz))
# 2. String aliases: "timestamp[unit, tz][pyarrow]"
pytest.param(
"timestamp[s][pyarrow]",
"datetime64[s]",
id="timestamp[s][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[ms][pyarrow]",
"datetime64[ms]",
id="timestamp[ms][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[us][pyarrow]",
"datetime64[us]",
id="timestamp[us][pyarrow]",
marks=[skip_if_no(package="pyarrow"), pandas_old_version],
),
pytest.param(
"timestamp[ns][pyarrow]",
"datetime64[ns]",
id="timestamp[ns][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, UTC][pyarrow]",
"datetime64[s]",
id="timestamp[s, UTC][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, America/New_York][pyarrow]",
"datetime64[s]",
id="timestamp[s, America/New_York][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
pytest.param(
"timestamp[s, +08:00][pyarrow]",
"datetime64[s]",
id="timestamp[s, +08:00][pyarrow]",
marks=skip_if_no(package="pyarrow"),
),
],
)
def test_to_numpy_pandas_datetime(dtype, expected_dtype):
"""
Test the _to_numpy function with pandas.Series of datetime types.
"""
series = pd.Series(
[pd.Timestamp("2024-01-02T03:04:05"), pd.Timestamp("2024-01-02T03:04:06")],
dtype=dtype,
)
result = _to_numpy(series)
_check_result(result, np.datetime64)
assert result.dtype == expected_dtype

# Convert to UTC if the dtype is timezone-aware
if "," in str(dtype): # A hacky way to decide if the dtype is timezone-aware.
if Version(pd.__version__) < Version("2.1") and dtype.startswith("timestamp"):
# pandas 2.0 doesn't have the dt.tz_convert method for pyarrow.Timestamp.
series = pd.to_datetime(series, utc=True)
else:
series = series.dt.tz_convert("UTC")
# Remove time zone information and preserve local time.
expected_series = series.dt.tz_localize(tz=None)
npt.assert_array_equal(result, np.array(expected_series, dtype=expected_dtype))


########################################################################################
# Test the _to_numpy function with PyArrow arrays.
#
Expand Down
Loading