From faad9d32b38340b80669c8771d6a9f2c0504e4b2 Mon Sep 17 00:00:00 2001 From: Kai Jennissen Date: Thu, 18 Apr 2024 16:06:29 +0200 Subject: [PATCH] feat(api): add `day_of_week.iso_index` method for date and timestamp types --- ibis/backends/dask/kernels.py | 1 + ibis/backends/pandas/kernels.py | 1 + ibis/backends/polars/compiler.py | 1 + ibis/backends/sql/compilers/base.py | 3 ++ ibis/backends/sql/compilers/bigquery.py | 3 ++ ibis/backends/sql/compilers/clickhouse.py | 3 ++ ibis/backends/sql/compilers/datafusion.py | 3 ++ ibis/backends/sql/compilers/exasol.py | 7 ++++- ibis/backends/sql/compilers/flink.py | 3 ++ ibis/backends/sql/compilers/impala.py | 3 ++ ibis/backends/sql/compilers/mssql.py | 3 ++ ibis/backends/sql/compilers/mysql.py | 3 ++ ibis/backends/sql/compilers/postgres.py | 3 ++ ibis/backends/sql/compilers/pyspark.py | 3 ++ ibis/backends/sql/compilers/sqlite.py | 10 ++++++ ibis/backends/sql/compilers/trino.py | 3 ++ ibis/backends/tests/test_temporal.py | 37 +++++++++++++--------- ibis/expr/operations/temporal.py | 14 +++++++++ ibis/expr/types/temporal.py | 38 ++++++++++++++++++++++- 19 files changed, 125 insertions(+), 17 deletions(-) diff --git a/ibis/backends/dask/kernels.py b/ibis/backends/dask/kernels.py index 12a1a782ab01..e9e227e5b62a 100644 --- a/ibis/backends/dask/kernels.py +++ b/ibis/backends/dask/kernels.py @@ -44,6 +44,7 @@ def inner(df): ), ops.TimestampFromUNIX: lambda arg, unit: dd.to_datetime(arg, unit=unit.short), ops.DayOfWeekIndex: lambda arg: dd.to_datetime(arg).dt.dayofweek, + ops.IsoDayOfWeekIndex: lambda arg: dd.to_datetime(arg).dt.dayofweek + 1, ops.DayOfWeekName: lambda arg: dd.to_datetime(arg).dt.day_name(), } diff --git a/ibis/backends/pandas/kernels.py b/ibis/backends/pandas/kernels.py index da650d1211c3..7b5e357295fb 100644 --- a/ibis/backends/pandas/kernels.py +++ b/ibis/backends/pandas/kernels.py @@ -429,6 +429,7 @@ def wrapper(*args, **kwargs): ), ops.Capitalize: lambda arg: arg.str.capitalize(), ops.Date: lambda arg: arg.dt.floor("d"), + ops.IsoDayOfWeekIndex: lambda arg: pd.to_datetime(arg).dt.dayofweek + 1, ops.DayOfWeekIndex: lambda arg: pd.to_datetime(arg).dt.dayofweek, ops.DayOfWeekName: lambda arg: pd.to_datetime(arg).dt.day_name(), ops.EndsWith: lambda arg, end: arg.str.endswith(end), diff --git a/ibis/backends/polars/compiler.py b/ibis/backends/polars/compiler.py index 5ea3b9ae96ba..280cdff9e6f0 100644 --- a/ibis/backends/polars/compiler.py +++ b/ibis/backends/polars/compiler.py @@ -1064,6 +1064,7 @@ def extract_epoch_seconds(op, **kw): ops.Ceil: lambda arg: arg.ceil().cast(pl.Int64), ops.Cos: operator.methodcaller("cos"), ops.Cot: lambda arg: 1.0 / arg.tan(), + ops.IsoDayOfWeekIndex: (lambda arg: arg.dt.weekday().cast(pl.Int16)), ops.DayOfWeekIndex: ( lambda arg: arg.dt.weekday().cast(pl.Int16) - _day_of_week_offset ), diff --git a/ibis/backends/sql/compilers/base.py b/ibis/backends/sql/compilers/base.py index a040609e0118..af0367eaf027 100644 --- a/ibis/backends/sql/compilers/base.py +++ b/ibis/backends/sql/compilers/base.py @@ -861,6 +861,9 @@ def visit_TimeTruncate(self, op, *, arg, unit): def visit_DayOfWeekIndex(self, op, *, arg): return (self.f.dayofweek(arg) + 6) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return ((self.f.dayofweek(arg) + 6) % 7) + 1 + def visit_DayOfWeekName(self, op, *, arg): # day of week number is 0-indexed # Sunday == 0 diff --git a/ibis/backends/sql/compilers/bigquery.py b/ibis/backends/sql/compilers/bigquery.py index 10c405492c9b..c4d56a20332e 100644 --- a/ibis/backends/sql/compilers/bigquery.py +++ b/ibis/backends/sql/compilers/bigquery.py @@ -218,6 +218,9 @@ def visit_StringJoin(self, op, *, arg, sep): def visit_DayOfWeekIndex(self, op, *, arg): return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7) + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.f.mod(self.f.extract(self.v.dayofweek, arg) + 5, 7) + 1 + def visit_DayOfWeekName(self, op, *, arg): return self.f.initcap(sge.Cast(this=arg, to="STRING FORMAT 'DAY'")) diff --git a/ibis/backends/sql/compilers/clickhouse.py b/ibis/backends/sql/compilers/clickhouse.py index 743bd70ad398..f5f42ef5b816 100644 --- a/ibis/backends/sql/compilers/clickhouse.py +++ b/ibis/backends/sql/compilers/clickhouse.py @@ -471,6 +471,9 @@ def visit_DayOfWeekIndex(self, op, *, arg): weekdays = len(calendar.day_name) return (((self.f.toDayOfWeek(arg) - 1) % weekdays) + weekdays) % weekdays + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.f.toDayOfWeek(arg) + def visit_DayOfWeekName(self, op, *, arg): # ClickHouse 20 doesn't support dateName # diff --git a/ibis/backends/sql/compilers/datafusion.py b/ibis/backends/sql/compilers/datafusion.py index fda582f88aee..3e1780f213cb 100644 --- a/ibis/backends/sql/compilers/datafusion.py +++ b/ibis/backends/sql/compilers/datafusion.py @@ -247,6 +247,9 @@ def visit_ExtractDayOfYear(self, op, *, arg): def visit_DayOfWeekIndex(self, op, *, arg): return (self.f.date_part("dow", arg) + 6) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return ((self.f.date_part("dow", arg) + 6) % 7) + 1 + def visit_DayOfWeekName(self, op, *, arg): return sg.exp.Case( this=sge.paren(self.f.date_part("dow", arg) + 6, copy=False) % 7, diff --git a/ibis/backends/sql/compilers/exasol.py b/ibis/backends/sql/compilers/exasol.py index e5e67c1d5e82..6d6d66c4381c 100644 --- a/ibis/backends/sql/compilers/exasol.py +++ b/ibis/backends/sql/compilers/exasol.py @@ -51,7 +51,6 @@ class ExasolCompiler(SQLGlotCompiler): ops.DateAdd, ops.DateSub, ops.DateFromYMD, - ops.DayOfWeekIndex, ops.ElementWiseVectorizedUDF, ops.IntervalFromInteger, ops.IsInf, @@ -200,6 +199,12 @@ def visit_ExtractDayOfYear(self, op, *, arg): def visit_ExtractWeekOfYear(self, op, *, arg): return self.cast(self.f.to_char(arg, "IW"), op.dtype) + def visit_DayOfWeekIndex(self, op, *, arg): + return self.cast(self.f.to_char(arg, "ID"), op.dtype) - 1 + + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.cast(self.f.to_char(arg, "ID"), op.dtype) + def visit_ExtractIsoYear(self, op, *, arg): return self.cast(self.f.to_char(arg, "IYYY"), op.dtype) diff --git a/ibis/backends/sql/compilers/flink.py b/ibis/backends/sql/compilers/flink.py index 1bdf6cae9ebf..e08bc62044c4 100644 --- a/ibis/backends/sql/compilers/flink.py +++ b/ibis/backends/sql/compilers/flink.py @@ -441,6 +441,9 @@ def visit_ExtractMicrosecond(self, op, *, arg): def visit_DayOfWeekIndex(self, op, *, arg): return (self.f.dayofweek(arg) + 5) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return ((self.f.dayofweek(arg) + 5) % 7) + 1 + def visit_DayOfWeekName(self, op, *, arg): index = self.cast(self.f.dayofweek(self.cast(arg, dt.date)), op.dtype) lookup_table = self.f.str_to_map( diff --git a/ibis/backends/sql/compilers/impala.py b/ibis/backends/sql/compilers/impala.py index 6288865d2ec6..a8a36450932b 100644 --- a/ibis/backends/sql/compilers/impala.py +++ b/ibis/backends/sql/compilers/impala.py @@ -137,6 +137,9 @@ def visit_RandomScalar(self, op, **_): def visit_DayOfWeekIndex(self, op, *, arg): return self.f.pmod(self.f.dayofweek(arg) - 2, 7) + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.f.pmod(self.f.dayofweek(arg) - 2, 7) + 1 + def visit_ExtractMillisecond(self, op, *, arg): return self.f.extract(self.v.millisecond, arg) % 1_000 diff --git a/ibis/backends/sql/compilers/mssql.py b/ibis/backends/sql/compilers/mssql.py index 4da9b31e2c5f..5548961bed07 100644 --- a/ibis/backends/sql/compilers/mssql.py +++ b/ibis/backends/sql/compilers/mssql.py @@ -203,6 +203,9 @@ def visit_CountDistinct(self, op, *, arg, where): def visit_DayOfWeekIndex(self, op, *, arg): return self.f.datepart(self.v.weekday, arg) - 1 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.f.datepart(self.v.weekday, arg) + def visit_DayOfWeekName(self, op, *, arg): days = calendar.day_name return sge.Case( diff --git a/ibis/backends/sql/compilers/mysql.py b/ibis/backends/sql/compilers/mysql.py index 95f262eebfc5..67be062116ca 100644 --- a/ibis/backends/sql/compilers/mysql.py +++ b/ibis/backends/sql/compilers/mysql.py @@ -177,6 +177,9 @@ def visit_GroupConcat(self, op, *, arg, sep, where): def visit_DayOfWeekIndex(self, op, *, arg): return (self.f.dayofweek(arg) + 5) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return ((self.f.dayofweek(arg) + 5) % 7) + 1 + def visit_Literal(self, op, *, value, dtype): # avoid casting NULL: the set of types allowed by MySQL and # MariaDB when casting is a strict subset of allowed types in other diff --git a/ibis/backends/sql/compilers/postgres.py b/ibis/backends/sql/compilers/postgres.py index e074f9f89c4e..e14d0b95028f 100644 --- a/ibis/backends/sql/compilers/postgres.py +++ b/ibis/backends/sql/compilers/postgres.py @@ -490,6 +490,9 @@ def visit_TimestampBucket(self, op, *, arg, interval, offset): def visit_DayOfWeekIndex(self, op, *, arg): return self.cast(self.f.extract("dow", arg) + 6, dt.int16) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.cast(self.f.extract("isodow", arg), dt.int16) + def visit_DayOfWeekName(self, op, *, arg): return self.f.trim(self.f.to_char(arg, "Day"), string.whitespace) diff --git a/ibis/backends/sql/compilers/pyspark.py b/ibis/backends/sql/compilers/pyspark.py index b3b8bed250a8..4599ddda353e 100644 --- a/ibis/backends/sql/compilers/pyspark.py +++ b/ibis/backends/sql/compilers/pyspark.py @@ -164,6 +164,9 @@ def visit_IntervalFromInteger(self, op, *, arg, unit): def visit_DayOfWeekIndex(self, op, *, arg): return (self.f.dayofweek(arg) + 5) % 7 + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return ((self.f.dayofweek(arg) + 5) % 7) + 1 + def visit_DayOfWeekName(self, op, *, arg): return sge.Case( this=(self.f.dayofweek(arg) + 5) % 7, diff --git a/ibis/backends/sql/compilers/sqlite.py b/ibis/backends/sql/compilers/sqlite.py index 2c12cd9d97eb..48892d216364 100644 --- a/ibis/backends/sql/compilers/sqlite.py +++ b/ibis/backends/sql/compilers/sqlite.py @@ -414,6 +414,16 @@ def visit_DayOfWeekIndex(self, op, *, arg): self.f.mod(self.cast(self.f.strftime("%w", arg) + 6, dt.int64), 7), dt.int64 ) + def visit_IsoDayOfWeekIndex(self, op, *, arg): + # return self.cast(self.f.strftime("%u", arg), dt.int64) + return ( + self.cast( + self.f.mod(self.cast(self.f.strftime("%w", arg) + 6, dt.int64), 7), + dt.int64, + ) + + 1 + ) + def visit_DayOfWeekName(self, op, *, arg): return sge.Case( this=self.f.strftime("%w", arg), diff --git a/ibis/backends/sql/compilers/trino.py b/ibis/backends/sql/compilers/trino.py index f15d57ee7104..dd1262ba1c22 100644 --- a/ibis/backends/sql/compilers/trino.py +++ b/ibis/backends/sql/compilers/trino.py @@ -209,6 +209,9 @@ def visit_DayOfWeekIndex(self, op, *, arg): sge.paren(self.f.day_of_week(arg) + 6, copy=False) % 7, op.dtype ) + def visit_IsoDayOfWeekIndex(self, op, *, arg): + return self.cast(sge.paren(self.f.day_of_week(arg), copy=False), op.dtype) + def visit_DayOfWeekName(self, op, *, arg): return self.f.date_format(arg, "%W") diff --git a/ibis/backends/tests/test_temporal.py b/ibis/backends/tests/test_temporal.py index a1e5667dc7e6..d1e5fa9594c7 100644 --- a/ibis/backends/tests/test_temporal.py +++ b/ibis/backends/tests/test_temporal.py @@ -164,7 +164,7 @@ def test_iso_year_does_not_match_date_year(con): id="day_of_week_index", marks=[ pytest.mark.notimpl( - ["druid", "oracle", "exasol"], raises=com.OperationNotDefinedError + ["druid", "oracle"], raises=com.OperationNotDefinedError ), ], ), @@ -1561,29 +1561,34 @@ def test_string_to_date(alltypes, fmt): @pytest.mark.parametrize( - ("date", "expected_index", "expected_day"), + ("date", "expected_index", "expected_iso_index", "expected_day"), [ - param("2017-01-01", 6, "Sunday", id="sunday"), - param("2017-01-02", 0, "Monday", id="monday"), - param("2017-01-03", 1, "Tuesday", id="tuesday"), - param("2017-01-04", 2, "Wednesday", id="wednesday"), - param("2017-01-05", 3, "Thursday", id="thursday"), - param("2017-01-06", 4, "Friday", id="friday"), - param("2017-01-07", 5, "Saturday", id="saturday"), + param("2017-01-01", 6, 7, "Sunday", id="sunday"), + param("2017-01-02", 0, 1, "Monday", id="monday"), + param("2017-01-03", 1, 2, "Tuesday", id="tuesday"), + param("2017-01-04", 2, 3, "Wednesday", id="wednesday"), + param("2017-01-05", 3, 4, "Thursday", id="thursday"), + param("2017-01-06", 4, 5, "Friday", id="friday"), + param("2017-01-07", 5, 6, "Saturday", id="saturday"), ], ) @pytest.mark.notimpl(["druid", "oracle"], raises=com.OperationNotDefinedError) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) +# @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, reason="Refer to https://github.com/risingwavelabs/risingwave/issues/14670", ) -def test_day_of_week_scalar(con, date, expected_index, expected_day): +def test_day_of_week_scalar( + con, date, expected_index, expected_iso_index, expected_day +): expr = ibis.literal(date).cast(dt.date) result_index = con.execute(expr.day_of_week.index().name("tmp")) assert result_index == expected_index + result_iso_index = con.execute(expr.day_of_week.iso_index().name("tmp")) + assert result_iso_index == expected_iso_index + result_day = con.execute(expr.day_of_week.full_name().name("tmp")) assert result_day.lower() == expected_day.lower() @@ -1594,7 +1599,7 @@ def test_day_of_week_scalar(con, date, expected_index, expected_day): raises=AttributeError, reason="StringColumn' object has no attribute 'day_of_week'", ) -@pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) +# @pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) @pytest.mark.broken( ["risingwave"], raises=AssertionError, @@ -1608,6 +1613,11 @@ def test_day_of_week_column(backend, alltypes, df): backend.assert_series_equal(result_index, expected_index, check_names=False) + result_iso_index = expr.iso_index().name("tmp").execute() + expected_iso_index = df.timestamp_col.dt.isocalendar().day.astype("int16") + + backend.assert_series_equal(result_iso_index, expected_iso_index, check_names=False) + result_day = expr.full_name().name("tmp").execute() expected_day = df.timestamp_col.dt.day_name() @@ -1621,9 +1631,6 @@ def test_day_of_week_column(backend, alltypes, df): lambda t: t.timestamp_col.day_of_week.index().count(), lambda s: s.dt.dayofweek.count(), id="day_of_week_index", - marks=[ - pytest.mark.notimpl(["exasol"], raises=com.OperationNotDefinedError) - ], ), param( lambda t: t.timestamp_col.day_of_week.full_name().length().sum(), diff --git a/ibis/expr/operations/temporal.py b/ibis/expr/operations/temporal.py index 8bdfc34bda76..d9356d58a941 100644 --- a/ibis/expr/operations/temporal.py +++ b/ibis/expr/operations/temporal.py @@ -190,6 +190,13 @@ class DayOfWeekIndex(Unary): dtype = dt.int16 +@public +class IsoDayOfWeekIndex(Unary): + arg: Value[dt.Date | dt.Timestamp] + + dtype = dt.int16 + + @public class DayOfWeekName(Unary): """Extract the name of the day of the week from a date or timestamp.""" @@ -199,6 +206,13 @@ class DayOfWeekName(Unary): dtype = dt.string +@public +class IsoDayOfWeekName(Unary): + arg: Value[dt.Date | dt.Timestamp] + + dtype = dt.string + + @public class Time(Unary): """Extract the time from a timestamp.""" diff --git a/ibis/expr/types/temporal.py b/ibis/expr/types/temporal.py index 4c216ff299d9..e84d62312e96 100644 --- a/ibis/expr/types/temporal.py +++ b/ibis/expr/types/temporal.py @@ -52,8 +52,34 @@ def day_of_week(self) -> DayOfWeek: Returns ------- DayOfWeek - An namespace expression containing methods to use to extract + A namespace expression with methods for extracting day-of-week information. + + Examples + -------- + >>> import ibis + >>> import datetime as dt + >>> from ibis import _ + >>> ibis.options.interactive = True + >>> t = ibis.memtable({"date": [dt.datetime(2024, 4, x) for x in range(14, 21)]}) + >>> t.mutate( + ... day_of_week=_.date.day_of_week.index(), + ... day_of_week_name=_.date.day_of_week.full_name(), + ... iso_day_of_week=_.date.day_of_week.iso_index(), + ... ) + ┏━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━┓ + ┃ date ┃ day_of_week ┃ day_of_week_name ┃ iso_day_of_week ┃ + ┡━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━┩ + │ timestamp │ int16 │ string │ int16 │ + ├─────────────────────┼─────────────┼──────────────────┼─────────────────┤ + │ 2024-04-14 00:00:00 │ 6 │ Sunday │ 7 │ + │ 2024-04-15 00:00:00 │ 0 │ Monday │ 1 │ + │ 2024-04-16 00:00:00 │ 1 │ Tuesday │ 2 │ + │ 2024-04-17 00:00:00 │ 2 │ Wednesday │ 3 │ + │ 2024-04-18 00:00:00 │ 3 │ Thursday │ 4 │ + │ 2024-04-19 00:00:00 │ 4 │ Friday │ 5 │ + │ 2024-04-20 00:00:00 │ 5 │ Saturday │ 6 │ + └─────────────────────┴─────────────┴──────────────────┴─────────────────┘ """ return DayOfWeek(self) @@ -994,3 +1020,13 @@ def full_name(self): The name of the day of the week """ return ops.DayOfWeekName(self._expr).to_expr() + + def iso_index(self): + """Get the index of the day of the week in iso-format (1=Monday, 7=Sunday). + + Returns + ------- + IntegerValue + The index of the day of the week in iso-format (1=Monday, 7=Sunday). + """ + return ops.IsoDayOfWeekIndex(self._expr).to_expr()