diff --git a/compose.yaml b/compose.yaml index 5a6d74901c321..ae0fe8deb4f34 100644 --- a/compose.yaml +++ b/compose.yaml @@ -341,6 +341,7 @@ services: - druid oracle: + platform: linux/amd64 image: gvenzl/oracle-free:23.3-slim environment: ORACLE_PASSWORD: ibis diff --git a/docs/contribute/02_workflow.qmd b/docs/contribute/02_workflow.qmd index 99c713af98737..b3b866d071d72 100644 --- a/docs/contribute/02_workflow.qmd +++ b/docs/contribute/02_workflow.qmd @@ -251,3 +251,11 @@ you are going only up). ```bash $ colima delete ``` + +### `x86_64` or `amd64` based containers + +While starting the containers based on `x86_64` / `amd64`, the architecture flag needs to be set in two places: +1. Add `platform: linux/amd64` for the service in `compose.yaml`. +2. Set the `--arch` flag while starting the VM `colima start --arch x86_64` + +For instance, this step is necessary for the `oracle` service in `compose.yaml`. Otherwise, the container will fail shortly after getting started. diff --git a/ibis/backends/dask/convert.py b/ibis/backends/dask/convert.py index eb5e3b0f650b8..fac952f267a76 100644 --- a/ibis/backends/dask/convert.py +++ b/ibis/backends/dask/convert.py @@ -74,7 +74,7 @@ def convert_Date(cls, s, dtype, pandas_type): else: s = dd.to_datetime(s) - return s.dt.normalize() + return s @classmethod def convert_String(cls, s, dtype, pandas_type): diff --git a/ibis/backends/dask/tests/test_cast.py b/ibis/backends/dask/tests/test_cast.py index 23187b59abf96..ed27d5e4ff508 100644 --- a/ibis/backends/dask/tests/test_cast.py +++ b/ibis/backends/dask/tests/test_cast.py @@ -146,13 +146,12 @@ def test_timestamp_with_timezone_is_inferred_correctly(t): def test_cast_date(t, df, column): expr = t[column].cast("date") result = expr.execute() - expected = ( - df[column] - .dt.normalize() - .map(lambda x: x.date()) - .compute() - .rename(expr.get_name()) - ) + + expected = df[column].compute().rename(expr.get_name()) + if expected.dt.tz: + expected = expected.dt.tz_convert("UTC") + expected = expected.dt.tz_localize(None).astype(result.dtype) + tm.assert_series_equal(result, expected, check_index=False) diff --git a/ibis/backends/dask/tests/test_temporal.py b/ibis/backends/dask/tests/test_temporal.py index a70bd37005f0b..3e3bd91cf95ae 100644 --- a/ibis/backends/dask/tests/test_temporal.py +++ b/ibis/backends/dask/tests/test_temporal.py @@ -66,8 +66,12 @@ def test_timestamp_functions(con, case_func, expected_func): def test_cast_datetime_strings_to_date(t, df, column): expr = t[column].cast("date") result = expr.execute() + df_computed = df.compute() - expected = pd.to_datetime(df_computed[column]).map(lambda x: x.date()) + expected = pd.to_datetime(df_computed[column]) + if expected.dt.tz: + expected = expected.dt.tz_convert("UTC") + expected = expected.dt.tz_localize(None).astype(result.dtype) tm.assert_series_equal( result.reset_index(drop=True).rename("tmp"), @@ -114,10 +118,10 @@ def test_cast_integer_to_date(t, pandas_df): expr = t.plain_int64.cast("date") result = expr.execute() expected = pd.Series( - pd.to_datetime(pandas_df.plain_int64.values, unit="D").date, + pd.to_datetime(pandas_df.plain_int64.values, unit="D"), index=pandas_df.index, name="plain_int64", - ) + ).astype(result.dtype) tm.assert_series_equal(result, expected, check_names=False) diff --git a/ibis/backends/oracle/converter.py b/ibis/backends/oracle/converter.py index 7755cb595340b..0b013847e0475 100644 --- a/ibis/backends/oracle/converter.py +++ b/ibis/backends/oracle/converter.py @@ -1,6 +1,6 @@ from __future__ import annotations -import datetime +import pandas as pd from ibis.formats.pandas import PandasData @@ -8,12 +8,12 @@ class OraclePandasData(PandasData): @classmethod def convert_Timestamp_element(cls, dtype): - return datetime.datetime.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Date_element(cls, dtype): - return datetime.date.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Time_element(cls, dtype): - return datetime.time.fromisoformat + return pd.Timestamp.fromisoformat diff --git a/ibis/backends/pandas/tests/test_cast.py b/ibis/backends/pandas/tests/test_cast.py index 3f166e79464f4..49a1850cce881 100644 --- a/ibis/backends/pandas/tests/test_cast.py +++ b/ibis/backends/pandas/tests/test_cast.py @@ -163,7 +163,7 @@ def test_timestamp_with_timezone_is_inferred_correctly(t, df): def test_cast_date(t, df, column): expr = t[column].cast("date") result = expr.execute() - expected = df[column].dt.normalize().dt.tz_localize(None).dt.date + expected = df[column].dt.normalize().dt.tz_localize(None).astype(result.dtype) tm.assert_series_equal(result, expected) diff --git a/ibis/backends/pandas/tests/test_temporal.py b/ibis/backends/pandas/tests/test_temporal.py index f8cf670e99f14..6c43e0752f757 100644 --- a/ibis/backends/pandas/tests/test_temporal.py +++ b/ibis/backends/pandas/tests/test_temporal.py @@ -67,7 +67,10 @@ def test_timestamp_functions(case_func, expected_func): def test_cast_datetime_strings_to_date(t, df, column): expr = t[column].cast("date") result = expr.execute() - expected = pd.to_datetime(df[column]).dt.normalize().dt.tz_localize(None).dt.date + expected = pd.to_datetime(df[column]).dt.normalize() + if expected.dt.tz: + expected = expected.dt.tz_convert("UTC") + expected = expected.dt.tz_localize(None).astype(result.dtype) tm.assert_series_equal(result, expected) @@ -104,7 +107,7 @@ def test_cast_integer_to_date(t, df): expr = t.plain_int64.cast("date") result = expr.execute() expected = pd.Series( - pd.to_datetime(df.plain_int64.values, unit="D").date, + pd.to_datetime(df.plain_int64.values, unit="D").astype(result.dtype), index=df.index, name="plain_int64", ) diff --git a/ibis/backends/snowflake/converter.py b/ibis/backends/snowflake/converter.py index e3094eab94de0..9b363b0b502ac 100644 --- a/ibis/backends/snowflake/converter.py +++ b/ibis/backends/snowflake/converter.py @@ -1,9 +1,9 @@ from __future__ import annotations -import datetime import json from typing import TYPE_CHECKING +import pandas as pd import pyarrow as pa from ibis.formats.pandas import PandasData @@ -52,15 +52,15 @@ def __arrow_ext_scalar_class__(self): class SnowflakePandasData(PandasData): @classmethod def convert_Timestamp_element(cls, dtype): - return datetime.datetime.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Date_element(cls, dtype): - return datetime.date.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_Time_element(cls, dtype): - return datetime.time.fromisoformat + return pd.Timestamp.fromisoformat @classmethod def convert_JSON(cls, s, dtype, pandas_type): diff --git a/ibis/backends/sqlite/tests/test_types.py b/ibis/backends/sqlite/tests/test_types.py index 14f8eeebd9f91..50ffdf96dc9d8 100644 --- a/ibis/backends/sqlite/tests/test_types.py +++ b/ibis/backends/sqlite/tests/test_types.py @@ -89,6 +89,8 @@ def test_type_map(db): sol = pd.DataFrame( {"str_col": ["a"], "date_col": pd.Series([date(2022, 1, 1)], dtype="object")} ) + sol["date_col"] = sol["date_col"].astype(res["date_col"].dtype) + assert res.equals(sol) diff --git a/ibis/backends/tests/test_aggregation.py b/ibis/backends/tests/test_aggregation.py index 50be5a738d5a1..51ad14f6c8f75 100644 --- a/ibis/backends/tests/test_aggregation.py +++ b/ibis/backends/tests/test_aggregation.py @@ -1167,7 +1167,7 @@ def test_string_quantile(alltypes, func): ) def test_date_quantile(alltypes, func): expr = func(alltypes.timestamp_col.date()) - result = expr.execute() + result = expr.execute().to_pydatetime().date() assert result == date(2009, 12, 31) diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index 19341a8c9b07b..561f7ab4b0c0c 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -686,9 +686,7 @@ def convert_to_offset(x): "ignore", category=(UserWarning, pd.errors.PerformanceWarning) ) expected = ( - pd.to_datetime(df.date_string_col) - .add(offset) - .map(lambda ts: ts.normalize().date(), na_action="ignore") + pd.to_datetime(df.date_string_col).add(offset).astype("datetime64[s]") ) expected = backend.default_series_rename(expected) @@ -774,12 +772,7 @@ def convert_to_offset(x): ), param( lambda t, _: t.timestamp_col.date() + ibis.interval(days=4), - lambda t, _: ( - t.timestamp_col.dt.floor("d") - .add(pd.Timedelta(days=4)) - .dt.normalize() - .dt.date - ), + lambda t, _: t.timestamp_col.dt.floor("d").add(pd.Timedelta(days=4)), id="date-add-interval", marks=[ pytest.mark.notimpl( @@ -788,16 +781,19 @@ def convert_to_offset(x): reason="'StringColumn' object has no attribute 'date'", ), pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + pytest.mark.broken( + ["oracle"], + raises=AssertionError, + reason=( + "Oracle includes hour:min:sec in the result for " + "CAST(t0.timestamp_col AS DATE), while other backends don't." + ), + ), ], ), param( lambda t, _: t.timestamp_col.date() - ibis.interval(days=14), - lambda t, _: ( - t.timestamp_col.dt.floor("d") - .sub(pd.Timedelta(days=14)) - .dt.normalize() - .dt.date - ), + lambda t, _: t.timestamp_col.dt.floor("d").sub(pd.Timedelta(days=14)), id="date-subtract-interval", marks=[ pytest.mark.notimpl( @@ -806,6 +802,14 @@ def convert_to_offset(x): reason="'StringColumn' object has no attribute 'date'", ), pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError), + pytest.mark.broken( + ["oracle"], + raises=AssertionError, + reason=( + "Oracle includes hour:min:sec in the result for " + "CAST(t0.timestamp_col AS DATE), while other backends don't." + ), + ), ], ), param( @@ -1210,19 +1214,28 @@ def test_interval_add_cast_scalar(backend, alltypes): reason="'StringColumn' object has no attribute 'date'", ) @pytest.mark.broken(["flink"], raises=AssertionError, reason="incorrect results") +@pytest.mark.broken( + ["oracle"], + raises=AssertionError, + reason=( + "Oracle includes hour:min:sec in the result for " + "CAST(t0.timestamp_col AS DATE), while other backends don't." + ), +) def test_interval_add_cast_column(backend, alltypes, df): timestamp_date = alltypes.timestamp_col.date() delta = alltypes.bigint_col.cast("interval('D')") expr = alltypes["id", (timestamp_date + delta).name("tmp")] result = expr.execute().sort_values("id").reset_index().tmp + df = df.sort_values("id").reset_index(drop=True) expected = ( df["timestamp_col"] .dt.normalize() .add(df.bigint_col.astype("timedelta64[D]")) .rename("tmp") - .dt.date ) + backend.assert_series_equal(result, expected.astype(result.dtype)) @@ -2442,7 +2455,7 @@ def test_time_literal_sql(dialect, snapshot, micros): ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.broken( - ["pandas", "dask"], + ["dask", "pandas", "pyspark"], condition=is_older_than("pandas", "2.0.0"), raises=ValueError, reason="Out of bounds nanosecond timestamp: 9999-01-02 00:00:00", @@ -2461,7 +2474,7 @@ def test_time_literal_sql(dialect, snapshot, micros): ), pytest.mark.notyet(["datafusion"], raises=Exception), pytest.mark.broken( - ["pandas", "dask"], + ["dask", "pandas", "pyspark"], condition=is_older_than("pandas", "2.0.0"), raises=ValueError, reason="Out of bounds nanosecond timestamp: 1-07-17 00:00:00", @@ -2484,10 +2497,7 @@ def test_time_literal_sql(dialect, snapshot, micros): ) def test_date_scalar(con, value, func): expr = ibis.date(func(value)).name("tmp") - result = con.execute(expr) - assert not isinstance(result, datetime.datetime) - assert isinstance(result, datetime.date) - - assert result == datetime.date.fromisoformat(value) + assert isinstance(result, pd.Timestamp) + assert result == pd.Timestamp.fromisoformat(value) diff --git a/ibis/expr/api.py b/ibis/expr/api.py index d9ca23f984e59..84f82964197f2 100644 --- a/ibis/expr/api.py +++ b/ibis/expr/api.py @@ -907,12 +907,12 @@ def date(value_or_year, month=None, day=None, /): Create a date scalar from a string >>> ibis.date("2023-01-02") - datetime.date(2023, 1, 2) + Timestamp('2023-01-02 00:00:00') Create a date scalar from year, month, and day >>> ibis.date(2023, 1, 2) - datetime.date(2023, 1, 2) + Timestamp('2023-01-02 00:00:00') Create a date column from year, month, and day diff --git a/ibis/formats/pandas.py b/ibis/formats/pandas.py index 597b4efb22eac..77288d9ad5ea9 100644 --- a/ibis/formats/pandas.py +++ b/ibis/formats/pandas.py @@ -223,17 +223,21 @@ def convert_Timestamp(cls, s, dtype, pandas_type): def convert_Date(cls, s, dtype, pandas_type): if isinstance(s.dtype, pd.DatetimeTZDtype): s = s.dt.tz_convert("UTC").dt.tz_localize(None) + try: - return s.astype(pandas_type).dt.date + return s.astype(pandas_type) + except (TypeError, pd._libs.tslibs.OutOfBoundsDatetime): def try_date(v): - if isinstance(v, datetime.datetime): - return v.date() + if isinstance(v, datetime.date): + return pd.Timestamp(v) elif isinstance(v, str): if v.endswith("Z"): - return datetime.datetime.fromisoformat(v[:-1]).date() - return datetime.date.fromisoformat(v) + datetime_obj = datetime.datetime.fromisoformat(v[:-1]) + else: + datetime_obj = datetime.datetime.fromisoformat(v) + return pd.Timestamp(datetime_obj) else: return v