Skip to content

Commit

Permalink
Add test for 'group_dy' without 'partition_by' (#650)
Browse files Browse the repository at this point in the history
  • Loading branch information
dreadatour authored Dec 2, 2024
1 parent 74d885d commit 692c8dc
Showing 1 changed file with 43 additions and 0 deletions.
43 changes: 43 additions & 0 deletions tests/unit/lib/test_datachain.py
Original file line number Diff line number Diff line change
Expand Up @@ -2731,6 +2731,49 @@ def test_group_by_multiple_partition_by(test_session):
)


def test_group_by_no_partition_by(test_session):
from datachain import func

ds = (
DataChain.from_values(
col1=["a", "a", "b", "b", "b", "c"],
col2=[1, 2, 1, 2, 1, 2],
col3=[1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
col4=["1", "2", "3", "4", "5", "6"],
session=test_session,
)
.order_by("col4")
.group_by(
cnt=func.count(),
cnt_col=func.count("col2"),
sum=func.sum("col3"),
concat=func.concat("col4"),
value=func.any_value("col3"),
collect=func.collect("col3"),
)
.save("my-ds")
)

assert ds.signals_schema.serialize() == {
"cnt": "int",
"cnt_col": "int",
"sum": "float",
"concat": "str",
"value": "float",
"collect": "list[float]",
}
assert ds.to_records() == [
{
"cnt": 6,
"cnt_col": 6,
"sum": 21.0,
"concat": "123456",
"value": 1.0,
"collect": [1.0, 2.0, 3.0, 4.0, 5.0, 6.0],
},
]


def test_group_by_error(test_session):
from datachain import func

Expand Down

0 comments on commit 692c8dc

Please sign in to comment.