Skip to content

Commit

Permalink
fix: return pd.Timestamp or pd.Series[datetime64] for date.to_pandas()
Browse files Browse the repository at this point in the history
  • Loading branch information
mfatihaktas committed Jun 10, 2024
1 parent df0e656 commit 440d7a8
Show file tree
Hide file tree
Showing 14 changed files with 87 additions and 57 deletions.
1 change: 1 addition & 0 deletions compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ services:
- druid

oracle:
platform: linux/amd64
image: gvenzl/oracle-free:23.4-slim
environment:
ORACLE_PASSWORD: ibis
Expand Down
8 changes: 8 additions & 0 deletions docs/contribute/02_workflow.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,11 @@ you are going only up).
```bash
$ colima delete
```

### `x86_64` or `amd64` based containers

While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places:
1. Add `platform: linux/amd64` for the service in `compose.yaml`.
2. Set the `--arch` flag while starting the VM `colima start --arch x86_64`

For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started.
2 changes: 1 addition & 1 deletion ibis/backends/dask/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def convert_Date(cls, s, dtype, pandas_type):
else:
s = dd.to_datetime(s)

return s.dt.normalize()
return s

@classmethod
def convert_String(cls, s, dtype, pandas_type):
Expand Down
13 changes: 6 additions & 7 deletions ibis/backends/dask/tests/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,13 +146,12 @@ def test_timestamp_with_timezone_is_inferred_correctly(t):
def test_cast_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()
expected = (
df[column]
.dt.normalize()
.map(lambda x: x.date())
.compute()
.rename(expr.get_name())
)

expected = df[column].compute().rename(expr.get_name())
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)

tm.assert_series_equal(result, expected, check_index=False)


Expand Down
10 changes: 7 additions & 3 deletions ibis/backends/dask/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,12 @@ def test_timestamp_functions(con, case_func, expected_func):
def test_cast_datetime_strings_to_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()

df_computed = df.compute()
expected = pd.to_datetime(df_computed[column]).map(lambda x: x.date())
expected = pd.to_datetime(df_computed[column])
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)

tm.assert_series_equal(
result.reset_index(drop=True).rename("tmp"),
Expand Down Expand Up @@ -114,10 +118,10 @@ def test_cast_integer_to_date(t, pandas_df):
expr = t.plain_int64.cast("date")
result = expr.execute()
expected = pd.Series(
pd.to_datetime(pandas_df.plain_int64.values, unit="D").date,
pd.to_datetime(pandas_df.plain_int64.values, unit="D"),
index=pandas_df.index,
name="plain_int64",
)
).astype(result.dtype)
tm.assert_series_equal(result, expected, check_names=False)


Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/oracle/converter.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
from __future__ import annotations

import datetime
import pandas as pd

from ibis.formats.pandas import PandasData


class OraclePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat
2 changes: 1 addition & 1 deletion ibis/backends/pandas/tests/test_cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def test_timestamp_with_timezone_is_inferred_correctly(t, df):
def test_cast_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()
expected = df[column].dt.normalize().dt.tz_localize(None).dt.date
expected = df[column].dt.normalize().dt.tz_localize(None).astype(result.dtype)
tm.assert_series_equal(result, expected)


Expand Down
7 changes: 5 additions & 2 deletions ibis/backends/pandas/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,10 @@ def test_timestamp_functions(case_func, expected_func):
def test_cast_datetime_strings_to_date(t, df, column):
expr = t[column].cast("date")
result = expr.execute()
expected = pd.to_datetime(df[column]).dt.normalize().dt.tz_localize(None).dt.date
expected = pd.to_datetime(df[column]).dt.normalize()
if expected.dt.tz:
expected = expected.dt.tz_convert("UTC")
expected = expected.dt.tz_localize(None).astype(result.dtype)
tm.assert_series_equal(result, expected)


Expand Down Expand Up @@ -104,7 +107,7 @@ def test_cast_integer_to_date(t, df):
expr = t.plain_int64.cast("date")
result = expr.execute()
expected = pd.Series(
pd.to_datetime(df.plain_int64.values, unit="D").date,
pd.to_datetime(df.plain_int64.values, unit="D").astype(result.dtype),
index=df.index,
name="plain_int64",
)
Expand Down
8 changes: 4 additions & 4 deletions ibis/backends/snowflake/converter.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
from __future__ import annotations

import datetime
import json
from typing import TYPE_CHECKING

import pandas as pd
import pyarrow as pa

from ibis.formats.pandas import PandasData
Expand Down Expand Up @@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self):
class SnowflakePandasData(PandasData):
@classmethod
def convert_Timestamp_element(cls, dtype):
return datetime.datetime.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Date_element(cls, dtype):
return datetime.date.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_Time_element(cls, dtype):
return datetime.time.fromisoformat
return pd.Timestamp.fromisoformat

@classmethod
def convert_JSON(cls, s, dtype, pandas_type):
Expand Down
2 changes: 2 additions & 0 deletions ibis/backends/sqlite/tests/test_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def test_type_map(db):
sol = pd.DataFrame(
{"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")}
)
sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype)

assert res.equals(sol)


Expand Down
2 changes: 1 addition & 1 deletion ibis/backends/tests/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -1172,7 +1172,7 @@ def test_string_quantile(alltypes, func):
)
def test_date_quantile(alltypes, func):
expr = func(alltypes.timestamp_col.date())
result = expr.execute()
result = expr.execute().to_pydatetime().date()
assert result == date(2009, 12, 31)


Expand Down
56 changes: 33 additions & 23 deletions ibis/backends/tests/test_temporal.py
Original file line number Diff line number Diff line change
Expand Up @@ -729,9 +729,7 @@ def convert_to_offset(x):
"ignore", category=(UserWarning, pd.errors.PerformanceWarning)
)
expected = (
pd.to_datetime(df.date_string_col)
.add(offset)
.map(lambda ts: ts.normalize().date(), na_action="ignore")
pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]")
)

expected = backend.default_series_rename(expected)
Expand Down Expand Up @@ -817,12 +815,7 @@ def convert_to_offset(x):
),
param(
lambda t, _: t.timestamp_col.date() + ibis.interval(days=4),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.add(pd.Timedelta(days=4))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)),
id="date-add-interval",
marks=[
pytest.mark.notimpl(
Expand All @@ -831,16 +824,19 @@ def convert_to_offset(x):
reason="'StringColumn' object has no attribute 'date'",
),
pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError),
pytest.mark.broken(
["oracle"],
raises=AssertionError,
reason=(
"Oracle includes hour:min:sec in the result for "
"CAST(t0.timestamp_col AS DATE), while other backends don't."
),
),
],
),
param(
lambda t, _: t.timestamp_col.date() - ibis.interval(days=14),
lambda t, _: (
t.timestamp_col.dt.floor("d")
.sub(pd.Timedelta(days=14))
.dt.normalize()
.dt.date
),
lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)),
id="date-subtract-interval",
marks=[
pytest.mark.notimpl(
Expand All @@ -849,6 +845,14 @@ def convert_to_offset(x):
reason="'StringColumn' object has no attribute 'date'",
),
pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError),
pytest.mark.broken(
["oracle"],
raises=AssertionError,
reason=(
"Oracle includes hour:min:sec in the result for "
"CAST(t0.timestamp_col AS DATE), while other backends don't."
),
),
],
),
param(
Expand Down Expand Up @@ -1253,19 +1257,28 @@ def test_interval_add_cast_scalar(backend, alltypes):
reason="'StringColumn' object has no attribute 'date'",
)
@pytest.mark.broken(["flink"], raises=AssertionError, reason="incorrect results")
@pytest.mark.broken(
["oracle"],
raises=AssertionError,
reason=(
"Oracle includes hour:min:sec in the result for "
"CAST(t0.timestamp_col AS DATE), while other backends don't."
),
)
def test_interval_add_cast_column(backend, alltypes, df):
timestamp_date = alltypes.timestamp_col.date()
delta = alltypes.bigint_col.cast("interval('D')")
expr = alltypes["id", (timestamp_date + delta).name("tmp")]
result = expr.execute().sort_values("id").reset_index().tmp

df = df.sort_values("id").reset_index(drop=True)
expected = (
df["timestamp_col"]
.dt.normalize()
.add(df.bigint_col.astype("timedelta64[D]"))
.rename("tmp")
.dt.date
)

backend.assert_series_equal(result, expected.astype(result.dtype))


Expand Down Expand Up @@ -2538,7 +2551,7 @@ def test_time_literal_sql(dialect, snapshot, micros):
),
pytest.mark.notyet(["datafusion"], raises=Exception),
pytest.mark.broken(
["pandas", "dask"],
["dask", "pandas", "pyspark"],
condition=is_older_than("pandas", "2.0.0"),
raises=ValueError,
reason="Out of bounds nanosecond timestamp: 9999-01-02 00:00:00",
Expand All @@ -2557,7 +2570,7 @@ def test_time_literal_sql(dialect, snapshot, micros):
),
pytest.mark.notyet(["datafusion"], raises=Exception),
pytest.mark.broken(
["pandas", "dask"],
["dask", "pandas", "pyspark"],
condition=is_older_than("pandas", "2.0.0"),
raises=ValueError,
reason="Out of bounds nanosecond timestamp: 1-07-17 00:00:00",
Expand All @@ -2580,10 +2593,7 @@ def test_time_literal_sql(dialect, snapshot, micros):
)
def test_date_scalar(con, value, func):
expr = ibis.date(func(value)).name("tmp")

result = con.execute(expr)

assert not isinstance(result, datetime.datetime)
assert isinstance(result, datetime.date)

assert result == datetime.date.fromisoformat(value)
assert isinstance(result, pd.Timestamp)
assert result == pd.Timestamp.fromisoformat(value)
12 changes: 6 additions & 6 deletions ibis/expr/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -937,16 +937,16 @@ def date(value_or_year, month=None, day=None, /):
Create a date scalar from a string
>>> ibis.date("2023-01-02")
┌───────────────────────────┐
datetime.date(2023, 1, 2) │
└───────────────────────────┘
┌──────────────────────────────────
Timestamp('2023-01-02 00:00:00') │
└──────────────────────────────────
Create a date scalar from year, month, and day
>>> ibis.date(2023, 1, 2)
┌───────────────────────────┐
datetime.date(2023, 1, 2) │
└───────────────────────────┘
┌──────────────────────────────────
Timestamp('2023-01-02 00:00:00') │
└──────────────────────────────────
Create a date column from year, month, and day
Expand Down
13 changes: 8 additions & 5 deletions ibis/formats/pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,17 +224,20 @@ def convert_Timestamp(cls, s, dtype, pandas_type):
def convert_Date(cls, s, dtype, pandas_type):
if isinstance(s.dtype, pd.DatetimeTZDtype):
s = s.dt.tz_convert("UTC").dt.tz_localize(None)

try:
return s.astype(pandas_type).dt.date
return s.astype(pandas_type)
except (ValueError, TypeError, pd._libs.tslibs.OutOfBoundsDatetime):

def try_date(v):
if isinstance(v, datetime.datetime):
return v.date()
if isinstance(v, datetime.date):
return pd.Timestamp(v)
elif isinstance(v, str):
if v.endswith("Z"):
return datetime.datetime.fromisoformat(v[:-1]).date()
return datetime.date.fromisoformat(v)
datetime_obj = datetime.datetime.fromisoformat(v[:-1])
else:
datetime_obj = datetime.datetime.fromisoformat(v)
return pd.Timestamp(datetime_obj)
else:
return v

Expand Down

0 comments on commit 440d7a8

Please sign in to comment.