Skip to content

Commit

Permalink
feat: allow empty structs
Browse files Browse the repository at this point in the history
working towards ibis-project#8289

I'm not sure how useful empty structs are, since it seems like
only bigquery, dask, and pandas actually support them.
But still, if you stay in ibis-land, perhaps it is useful.
ie for doing type manipulations, or maybe you
only use them for intermediate calculations?
Not that hard for us to support it, so why not.

I'm not sure of the history of the specific disallowment
that I am removing from the type inference.

Relevant context:

- ibis-project#8876
- https://github.com/ibis-project/ibis/issues?q=empty+struct
  • Loading branch information
NickCrews committed Jun 4, 2024
1 parent 5bef96a commit c2da371
Show file tree
Hide file tree
Showing 5 changed files with 41 additions and 5 deletions.
3 changes: 2 additions & 1 deletion ibis/backends/tests/errors.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,14 @@
from psycopg2.errors import OperationalError as PsycoPg2OperationalError
from psycopg2.errors import ProgrammingError as PsycoPg2ProgrammingError
from psycopg2.errors import SyntaxError as PsycoPg2SyntaxError
from psycopg2.errors import UndefinedFunction as PsycoPg2UndefinedFunction
from psycopg2.errors import UndefinedObject as PsycoPg2UndefinedObject
except ImportError:
PsycoPg2SyntaxError = PsycoPg2IndeterminateDatatype = (
PsycoPg2InvalidTextRepresentation
) = PsycoPg2DivisionByZero = PsycoPg2InternalError = PsycoPg2ProgrammingError = (
PsycoPg2OperationalError
) = PsycoPg2UndefinedObject = None
) = PsycoPg2UndefinedFunction = PsycoPg2UndefinedObject = None

try:
from pymysql.err import NotSupportedError as MySQLNotSupportedError
Expand Down
19 changes: 19 additions & 0 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,16 @@
import ibis.expr.datatypes as dt
from ibis import util
from ibis.backends.tests.errors import (
ClickHouseDatabaseError,
DuckDBParserException,
PolarsColumnNotFoundError,
PolarsComputeError,
PsycoPg2InternalError,
PsycoPg2SyntaxError,
PsycoPg2UndefinedFunction,
Py4JJavaError,
PySparkAnalysisException,
TrinoUserError,
)
from ibis.common.exceptions import IbisError

Expand All @@ -29,6 +34,20 @@
]


@pytest.mark.notimpl("clickhouse", raises=ClickHouseDatabaseError)
@pytest.mark.notimpl("duckdb", raises=DuckDBParserException)
@pytest.mark.notimpl("flink", raises=Py4JJavaError)
@pytest.mark.notimpl("polars", raises=PolarsComputeError)
@pytest.mark.notimpl("postgres", raises=PsycoPg2UndefinedFunction)
@pytest.mark.notimpl("pyspark", raises=Py4JJavaError)
@pytest.mark.notimpl("risingwave", raises=PsycoPg2InternalError)
@pytest.mark.notimpl("trino", raises=TrinoUserError)
def test_struct_factory_empty(con):
s = ibis.struct({})
result = con.execute(s)
assert result == {}


@pytest.mark.notimpl(["dask"])
@pytest.mark.parametrize(
("field", "expected"),
Expand Down
2 changes: 0 additions & 2 deletions ibis/expr/datatypes/value.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ def infer(value: Any) -> dt.DataType:
@infer.register(collections.OrderedDict)
def infer_struct(value: Mapping[str, Any]) -> dt.Struct:
"""Infer the [`Struct`](./datatypes.qmd#ibis.expr.datatypes.Struct) type of `value`."""
if not value:
raise TypeError("Empty struct type not supported")
fields = {name: infer(val) for name, val in value.items()}
return dt.Struct(fields)

Expand Down
9 changes: 7 additions & 2 deletions ibis/expr/operations/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from public import public

import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
import ibis.expr.rules as rlz
from ibis.common.annotations import ValidationError, attribute
Expand Down Expand Up @@ -38,8 +39,6 @@ class StructColumn(Value):
names: VarTuple[str]
values: VarTuple[Value]

shape = rlz.shape_like("values")

def __init__(self, names, values):
if len(names) != len(values):
raise ValidationError(
Expand All @@ -52,3 +51,9 @@ def __init__(self, names, values):
def dtype(self) -> dt.DataType:
dtypes = (value.dtype for value in self.values)
return dt.Struct.from_tuples(zip(self.names, dtypes))

@attribute
def shape(self) -> ds.DataShape:
if len(self.values) == 0:
return ds.scalar
return rlz.highest_precedence_shape(self.values)
13 changes: 13 additions & 0 deletions ibis/tests/expr/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import pytest

import ibis
import ibis.expr.datashape as ds
import ibis.expr.datatypes as dt
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis import _
Expand All @@ -22,6 +24,17 @@ def s():
return ibis.table(dict(a="struct<f: float, g: string>"), name="s")


@pytest.mark.parametrize("val", [{}, []])
@pytest.mark.parametrize("typ", [None, "struct<>", dt.Struct.from_tuples([])])
def test_struct_factory_empty(val, typ):
with pytest.raises(TypeError):
ibis.struct(val, type="struct<a: float64, b: float64>")
s = ibis.struct(val, type=typ)
assert s.names == tuple()
assert s.type() == dt.Struct.from_tuples([])
assert s.op().shape == ds.scalar


def test_struct_operations():
value = OrderedDict(
[
Expand Down

0 comments on commit c2da371

Please sign in to comment.