Skip to content

Commit

Permalink
feat: support type kwarg in array() and map()
Browse files Browse the repository at this point in the history
fixes ibis-project#8289

This does a lot of changes. It was hard for me to separate them out as I implemented them. But now that it's all hashed out, I can try to split this up into separate commits if you want. But that might be sorta hard in
some cases.

Several of the backends were always broken here, they just weren't getting caught. I marked them as broken,
we can fix them in a followup.

You can test this locally with eg
`pytest -m duckdb -k factory ibis/backends/tests/test_array.py  ibis/backends/tests/test_map.py ibis/backends/tests/test_struct.py`

Also, fix a typing bug: map() can accept ArrayValues, not just ArrayColumns

Also, support passing in None.

Also, error when the value type can't be inferred from empty python literals
(eg what is the value type for the elements of []?)

Also, make the type argument for struct() always have an effect, not just when passing in python literals.
So basically it can act like a cast.

Also, make these constructors idempotent.
  • Loading branch information
NickCrews committed Mar 15, 2024
1 parent 220085e commit 654bcda
Show file tree
Hide file tree
Showing 6 changed files with 191 additions and 48 deletions.
31 changes: 31 additions & 0 deletions ibis/backends/tests/test_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,37 @@
# list.


def test_array_factory(con):
a = ibis.array([1, 2, 3])
assert con.execute(a) == [1, 2, 3]
a2 = ibis.array(a)
assert con.execute(a2) == [1, 2, 3]
typed = ibis.array([1, 2, 3], type="array<float64>")
assert con.execute(typed) == [1.0, 2.0, 3.0]
typed2 = ibis.array(a, type="array<float64>")
assert con.execute(typed2) == [1.0, 2.0, 3.0]


@pytest.mark.notimpl("postgres", raises=PsycoPg2IndeterminateDatatype)
def test_array_factory_empty(con):
with pytest.raises(TypeError):
ibis.array([])

empty_typed = ibis.array([], type="array<float64>")
assert str(empty_typed.type()) == "array<float64>"
assert con.execute(empty_typed) == []


@pytest.mark.broken("polars", raises=AssertionError)
@pytest.mark.broken("pandas", raises=TypeError)
def test_array_factory_null(con):
with pytest.raises(TypeError):
ibis.array(None)
none_typed = ibis.array(None, type="array<float64>")
assert str(none_typed.type()) == "array<float64>"
assert con.execute(none_typed) is None


def test_array_column(backend, alltypes, df):
expr = ibis.array(
[alltypes["double_col"], alltypes["double_col"], 5.0, ibis.literal(6.0)]
Expand Down
27 changes: 27 additions & 0 deletions ibis/backends/tests/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,33 @@
]


def test_map_factory(con):
m = ibis.map({"a": 1, "b": 2})
assert con.execute(m) == {"a": 1, "b": 2}
m2 = ibis.map(m)
assert con.execute(m2) == {"a": 1, "b": 2}
typed = ibis.map({"a": 1, "b": 2}, type="map<string, float>")
assert con.execute(typed) == {"a": 1.0, "b": 2.0}
typed2 = ibis.map(m, type="map<string, float>")
assert con.execute(typed2) == {"a": 1.0, "b": 2.0}


def test_map_factory_empty(con):
with pytest.raises(TypeError):
ibis.map({})
empty_typed = ibis.map({}, type="map<string, float>")
assert str(empty_typed.type()) == "map<string, float64>"
assert con.execute(empty_typed) == {}


def test_map_factory_null(con):
with pytest.raises(TypeError):
ibis.map(None)
null_typed = ibis.map(None, type="map<string, float>")
assert str(null_typed.type()) == "map<string, float64>"
assert con.execute(null_typed) is None


@pytest.mark.notimpl(["pandas", "dask"])
def test_map_table(backend):
table = backend.map
Expand Down
29 changes: 29 additions & 0 deletions ibis/backends/tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,35 @@
]


@pytest.mark.notimpl(["postgres"])
@pytest.mark.broken(["pandas", "dask"], reason="casting is broken")
def test_struct_factory(con):
s = ibis.struct({"a": 1, "b": 2})
assert con.execute(s) == {"a": 1, "b": 2}
s2 = ibis.struct(s)
assert con.execute(s2) == {"a": 1, "b": 2}
typed = ibis.struct({"a": 1, "b": 2}, type="struct<a: float64, b: float64>")
assert con.execute(typed) == {"a": 1.0, "b": 2.0}
typed2 = ibis.struct(s, type="struct<a: float64, b: float64>")
assert con.execute(typed2) == {"a": 1.0, "b": 2.0}


def test_struct_factory_empty(con):
with pytest.raises(TypeError):
ibis.struct({})
with pytest.raises(TypeError):
ibis.struct({}, type="struct<a: float64, b: float64>")


@pytest.mark.broken("polars", raises=AttributeError)
def test_struct_factory_null(con):
with pytest.raises(TypeError):
ibis.struct(None)
none_typed = ibis.struct(None, type="struct<a: float64, b: float>")
assert str(none_typed.type()) == "struct<a: float64, b: float64>"
assert con.execute(none_typed) is None


@pytest.mark.notimpl(["dask"])
@pytest.mark.parametrize(
("field", "expected"),
Expand Down
51 changes: 38 additions & 13 deletions ibis/expr/types/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,16 @@

from public import public

import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.common.deferred import Deferred, deferrable
from ibis.expr.types.generic import Column, Scalar, Value

if TYPE_CHECKING:
from collections.abc import Iterable

import ibis.expr.types as ir
from ibis.expr.types import dt
from ibis.expr.types.typing import V

import ibis.common.exceptions as com
Expand Down Expand Up @@ -1081,7 +1083,10 @@ def __getitem__(self, index: int | ir.IntegerValue | slice) -> ir.Column:

@public
@deferrable
def array(values: Iterable[V]) -> ArrayValue:
def array(
values: ArrayValue | Iterable[V] | None,
type: str | dt.DataType | None = None,
) -> ArrayValue:
"""Create an array expression.
If any values are [column expressions](../concepts/datatypes.qmd) the
Expand All @@ -1092,6 +1097,9 @@ def array(values: Iterable[V]) -> ArrayValue:
----------
values
An iterable of Ibis expressions or Python literals
type
An instance of `ibis.expr.datatypes.DataType` or a string indicating
the Ibis type of `value`. eg `array<float>`.
Returns
-------
Expand Down Expand Up @@ -1120,15 +1128,32 @@ def array(values: Iterable[V]) -> ArrayValue:
│ [3, 42, ... +1] │
└──────────────────────┘
>>> ibis.array([t.a, 42 + ibis.literal(5)])
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ Array()
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ array<int64>
├──────────────────────┤
│ [1, 47]
│ [2, 47]
│ [3, 47]
└──────────────────────┘
>>> ibis.array([t.a, 42 + ibis.literal(5)], type="array<float>")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Cast(Array(), array<float64>)
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ array<float64>
├───────────────────────────────
│ [1.0, 47.0]
│ [2.0, 47.0]
│ [3.0, 47.0]
└───────────────────────────────
"""
return ops.Array(tuple(values)).to_expr()
if values is None:
if type is None:
raise TypeError("type must be specified when values is None")
return ibis.literal(None, type=type)

if isinstance(values, ir.ArrayValue):
result = values
else:
values = tuple(values)
if len(values) == 0:
if type is None:
raise TypeError("type must be specified when values empty")
result = ibis.literal([], type=type)
else:
result = ops.Array(values).to_expr()
if type is not None:
result = result.cast(type)
return result
52 changes: 36 additions & 16 deletions ibis/expr/types/maps.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,16 @@

from public import public

import ibis
import ibis.expr.operations as ops
import ibis.expr.types as ir
from ibis.common.deferred import deferrable
from ibis.expr.types.generic import Column, Scalar, Value

if TYPE_CHECKING:
from collections.abc import Iterable, Mapping

import ibis.expr.types as ir
from ibis.expr.types.arrays import ArrayColumn
from ibis.expr.types import dt


@public
Expand Down Expand Up @@ -435,8 +436,10 @@ def __getitem__(self, key: ir.Value) -> ir.Column:
@public
@deferrable
def map(
keys: Iterable[Any] | Mapping[Any, Any] | ArrayColumn,
values: Iterable[Any] | ArrayColumn | None = None,
keys: Iterable[Any] | Mapping[Any, Any] | ir.ArrayValue | MapValue | None,
values: Iterable[Any] | ir.ArrayValue | None = None,
*,
type: str | dt.DataType | None = None,
) -> MapValue:
"""Create a MapValue.
Expand All @@ -449,6 +452,9 @@ def map(
Keys of the map or `Mapping`. If `keys` is a `Mapping`, `values` must be `None`.
values
Values of the map or `None`. If `None`, the `keys` argument must be a `Mapping`.
type
An instance of `ibis.expr.datatypes.DataType` or a string indicating
the Ibis type of `value`. eg `map<a: float, b: string>`.
Returns
-------
Expand Down Expand Up @@ -476,16 +482,30 @@ def map(
│ ['a', 'b'] │ [1, 2] │
│ ['b'] │ [3] │
└──────────────────────┴──────────────────────┘
>>> ibis.map(t.keys, t.values)
┏━━━━━━━━━━━━━━━━━━━━━━┓
┃ Map(keys, values)
┡━━━━━━━━━━━━━━━━━━━━━━┩
│ map<string, int64>
├──────────────────────┤
│ {'a': 1, 'b': 2}
│ {'b': 3}
└──────────────────────┘
>>> ibis.map(t.keys, t.values, type="map<string, float>")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Cast(Map(keys, values), map<string, float64>)
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ map<string, float64>
├───────────────────────────────────────────────
│ {'a': 1.0, 'b': 2.0}
│ {'b': 3.0}
└───────────────────────────────────────────────
"""
if values is None:
keys, values = tuple(keys.keys()), tuple(keys.values())
return ops.Map(keys, values).to_expr()
if keys is None:
if type is None:
raise TypeError("Must specify a type when keys is None")
return ibis.literal(None, type=type)

if isinstance(keys, MapValue):
result = keys
else:
if values is None:
keys, values = tuple(keys.keys()), tuple(keys.values())
if len(keys) == 0 and type is None:
raise TypeError("Must specify a type when keys is empty")
result = ops.Map(keys, values).to_expr()

if type is not None:
result = result.cast(type)
return result
49 changes: 30 additions & 19 deletions ibis/expr/types/structs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
@public
@deferrable
def struct(
value: Iterable[tuple[str, V]] | Mapping[str, V],
value: Iterable[tuple[str, V]] | Mapping[str, V] | StructValue | None,
type: str | dt.DataType | None = None,
) -> StructValue:
"""Create a struct expression.
Expand All @@ -37,8 +37,7 @@ def struct(
`(str, Value)`.
type
An instance of `ibis.expr.datatypes.DataType` or a string indicating
the Ibis type of `value`. This is only used if all of the input values
are Python literals. eg `struct<a: float, b: string>`.
the Ibis type of `value`. eg `struct<a: float, b: string>`.
Returns
-------
Expand All @@ -62,26 +61,38 @@ def struct(
Create a struct column from a column and a scalar literal
>>> t = ibis.memtable({"a": [1, 2, 3]})
>>> ibis.struct([("a", t.a), ("b", "foo")])
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ StructColumn()
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ struct<a: int64, b: string> │
├─────────────────────────────┤
│ {'a': 1, 'b': 'foo'} │
│ {'a': 2, 'b': 'foo'} │
│ {'a': 3, 'b': 'foo'} │
└─────────────────────────────┘
>>> ibis.struct([("a", t.a), ("b", "foo")], type="struct<a: float, b: string>")
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
Cast(StructColumn(), struct<a: float64, b: string>)
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
│ struct<a: float64, b: string>
├─────────────────────────────────────────────────────
│ {'a': 1.0, 'b': 'foo'}
│ {'a': 2.0, 'b': 'foo'}
│ {'a': 3.0, 'b': 'foo'}
└─────────────────────────────────────────────────────
"""
import ibis.expr.operations as ops

fields = dict(value)
if any(isinstance(value, Value) for value in fields.values()):
names = tuple(fields.keys())
values = tuple(fields.values())
return ops.StructColumn(names=names, values=values).to_expr()
if value is None:
if type is None:
raise TypeError("Must specify type if value is None")
return literal(None, type=type)

if isinstance(value, StructValue):
result = value
else:
return literal(collections.OrderedDict(fields), type=type)
fields = dict(value)
if any(isinstance(value, Value) for value in fields.values()):
names = tuple(fields.keys())
values = tuple(fields.values())
result = ops.StructColumn(names=names, values=values).to_expr()
else:
result = literal(collections.OrderedDict(fields), type=type)

if type is not None:
result = result.cast(type)
return result


@public
Expand Down

0 comments on commit 654bcda

Please sign in to comment.