diff --git a/README.md b/README.md index f41dc087..89a0fe3b 100644 --- a/README.md +++ b/README.md @@ -98,58 +98,38 @@ query_1.py TPCH_QUERY_TESTS = ( { "test_name": "test_tpch_sql_1", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q1.sql"), "substrait_query": get_substrait_plan("query_01_plan.json"), }, { "test_name": "test_tpch_sql_2", - "file_names": [ - "part.parquet", - "supplier.parquet", - "partsupp.parquet", - "nation.parquet", - "region.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "partsupp": "partsupp.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q2.sql"), "substrait_query": get_substrait_plan("query_02_plan.json"), }, -] +) ``` ## Substrait Plans Substrait query plans are located in `substrait_consumer/tests/integration/queries/tpch_substrait_plans`. -The substrait query plans have placeholder strings in the `local_files` objects in the json -structure. -```json -"local_files": { - "items": [ - { - "uri_file": "file://FILENAME_PLACEHOLDER_0", - "parquet": {} - } - ] -} -``` - - -When the tests are run, these placeholders are replaced by the parquet data listed -listed in `"file_names"` in the test case args file. The order of parquet file appearance in the -`"file_names"` list should be consistent with the ordering for the table names in the substrait -query plan. ## SQL Queries SQL queries are located in `substrait_consumer/tests/integration/queries/tpch_sql`. -The SQL queries have empty bracket placeholders (`'{}'`) where the table names will be inserted. -Table names are determined based on the `"file_names"` in the test case args file. The order of -parquet file appearance in the `"file_names"` list should be consistent with the ordering for the -table names in the SQL query. The actual format after replacement will depend on the consumer being -used. - +The SQL queries have named placeholders (`'{customer}'`) where the table names or file paths will be inserted. +Table names are determined based on the `"named_tables"` and `"local_files"` in the test case args file. # Function Tests The substrait function tests aim to test the functions available in Substrait. This is done @@ -182,7 +162,8 @@ arithmetic_tests.py SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": IBIS_SCALAR["add"], }, @@ -196,7 +177,7 @@ SQL_SCALAR = { "add": """ SELECT PS_PARTKEY, PS_SUPPKEY, add(PS_PARTKEY, PS_SUPPKEY) AS ADD_KEY - FROM '{}'; + FROM '{partsupp}'; """, ``` diff --git a/substrait_consumer/common.py b/substrait_consumer/common.py index 19da85ec..708bc5c7 100644 --- a/substrait_consumer/common.py +++ b/substrait_consumer/common.py @@ -44,67 +44,16 @@ class SubstraitUtils: """ @staticmethod - def get_full_path(file_names: Iterable[str]) -> list[str]: + def compute_full_paths(local_files: dict[str, str]) -> dict[str, str]: """ - Get full paths for the TPCH parquet data. + Get the full paths for the given local files. Parameters: - file_names: - List of TPCH parquet data file names provided by the test case. + local_files: + A `dict` mapping format argument names to local files paths. Returns: - List of full paths. + A `dict` where the paths are expanded to absolute paths. """ data_dir = CUR_DIR / "data" / "tpch_parquet" - full_paths_list = [f"{data_dir}/{dataset}" for dataset in file_names] - - return full_paths_list - - def format_sql_query(self, sql_query: str, file_names: list[str]) -> str: - """ - Replace the 'Table' Parameters from the SQL query with the relative - file paths of the parquet data. - - Parameters: - sql_query: - SQL query. - file_names: - List of file names. - - Returns: - SQL Query with file paths. - """ - sql_commands_list = [line.strip() for line in sql_query.strip().split("\n")] - sql_query = " ".join(sql_commands_list) - # Get full path for all datasets used in the query - parquet_file_paths = self.get_full_path(file_names) - - return sql_query.format(*parquet_file_paths) - - def format_substrait_query( - self, substrait_query: str, file_names: list[str] - ) -> str: - """ - Replace the 'local_files' path in the substrait query plan with - the full path of the parquet data. - - Parameters: - substrait_query: - Substrait query. - file_names: - List of file names. - - Returns: - Substrait query plan in byte format. - """ - # Get full path for all datasets used in the query - parquet_file_paths = self.get_full_path(file_names) - - # Replace the filename placeholder in the substrait query plan with - # the proper parquet data file paths. - for count, file_path in enumerate(parquet_file_paths): - substrait_query = substrait_query.replace( - f"FILENAME_PLACEHOLDER_{count}", file_path - ) - - return substrait_query + return {k: f"{data_dir}/{v}" for k, v in local_files.items()} diff --git a/substrait_consumer/consumers/acero_consumer.py b/substrait_consumer/consumers/acero_consumer.py index a154ba5e..26aaaa57 100644 --- a/substrait_consumer/consumers/acero_consumer.py +++ b/substrait_consumer/consumers/acero_consumer.py @@ -1,15 +1,9 @@ from __future__ import annotations -import string -from pathlib import Path -from typing import Iterable - import pyarrow as pa import pyarrow.parquet as pq import pyarrow.substrait as substrait -from substrait_consumer.common import SubstraitUtils - from .consumer import COLUMN_A, COLUMN_B, COLUMN_C, COLUMN_D, Consumer @@ -19,15 +13,14 @@ class AceroConsumer(Consumer): """ def __init__(self): - self.tables = {} - self.table_provider = lambda names, schema: self.tables[names[0].lower()] - - def setup(self, db_connection, file_names: Iterable[str]): - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - self.tables[table_name] = pq.read_table(file_path) + self.named_tables = {} + self.table_provider = lambda names, schema: self.named_tables[names[0].lower()] + + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + for table_name, file_path in named_tables.items(): + self.named_tables[table_name] = pq.read_table(file_path) else: table = pa.table( { @@ -37,7 +30,7 @@ def setup(self, db_connection, file_names: Iterable[str]): "d": COLUMN_D, } ) - self.tables["t"] = table + self.named_tables["t"] = table def run_substrait_query(self, substrait_query: str) -> pa.Table: """ diff --git a/substrait_consumer/consumers/consumer.py b/substrait_consumer/consumers/consumer.py index 0a8a4a77..129fc04a 100644 --- a/substrait_consumer/consumers/consumer.py +++ b/substrait_consumer/consumers/consumer.py @@ -1,10 +1,12 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Iterable import pyarrow as pa +from substrait_consumer.common import SubstraitUtils + + COLUMN_A = [1, 2, 3, -4, 5, -6, 7, 8, 9, None] COLUMN_B = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] COLUMN_C = [ @@ -34,8 +36,33 @@ class Consumer(ABC): + + def setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + """ + Initializes this `Consumer` instance. + + In particular, expands the paths in `local_files` and `named_tables` to + absolute paths and forwards the arguments to `self._setup` implemented + by classes inheriting from `Consumer`. + + Parameters: + db_connection: + DuckDB connection for this `Consumer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ + local_files = SubstraitUtils.compute_full_paths(local_files) + named_tables = SubstraitUtils.compute_full_paths(named_tables) + self._setup(db_connection, local_files, named_tables) + @abstractmethod - def setup(self, db_connection, file_names: Iterable[str]): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): pass @abstractmethod diff --git a/substrait_consumer/consumers/datafusion_consumer.py b/substrait_consumer/consumers/datafusion_consumer.py index 89cb9873..1789dd18 100644 --- a/substrait_consumer/consumers/datafusion_consumer.py +++ b/substrait_consumer/consumers/datafusion_consumer.py @@ -1,9 +1,6 @@ from __future__ import annotations import json -import string -from pathlib import Path -from typing import Iterable import pyarrow as pa from datafusion import SessionContext @@ -11,8 +8,6 @@ from google.protobuf.json_format import Parse from substrait.gen.proto.plan_pb2 import Plan -from substrait_consumer.common import SubstraitUtils - from .consumer import COLUMN_A, COLUMN_B, COLUMN_C, COLUMN_D, Consumer @@ -24,17 +19,16 @@ class DataFusionConsumer(Consumer): def __init__(self): self._ctx = SessionContext() - def setup(self, db_connection, file_names: Iterable[str]): - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - if self._ctx.table_exist(table_name): - self._ctx.deregister_table(table_name) - self._ctx.register_parquet(table_name, file_path) + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + for table_name, file_path in named_tables.items(): + if self._ctx.table_exist(table_name): + self._ctx.deregister_table(table_name) + self._ctx.register_parquet(table_name, file_path) else: if not self._ctx.table_exist("t"): - tables = pa.RecordBatch.from_arrays( + named_tables = pa.RecordBatch.from_arrays( [ pa.array(COLUMN_A), pa.array(COLUMN_B), @@ -44,7 +38,7 @@ def setup(self, db_connection, file_names: Iterable[str]): names=["a", "b", "c", "d"], ) - self._ctx.register_record_batches("t", [[tables]]) + self._ctx.register_record_batches("t", [[named_tables]]) def run_substrait_query(self, substrait_query: str) -> pa.Table: """ diff --git a/substrait_consumer/consumers/duckdb_consumer.py b/substrait_consumer/consumers/duckdb_consumer.py index 2c850527..7e0fa304 100644 --- a/substrait_consumer/consumers/duckdb_consumer.py +++ b/substrait_consumer/consumers/duckdb_consumer.py @@ -1,15 +1,10 @@ from __future__ import annotations -import string -from pathlib import Path -from typing import Iterable - import duckdb import pyarrow as pa -from substrait_consumer.common import SubstraitUtils - from .consumer import Consumer +from substrait_consumer.producers.producer import load_named_tables class DuckDBConsumer(Consumer): @@ -26,9 +21,11 @@ def __init__(self, db_connection=None): self.db_connection.execute("INSTALL substrait") self.db_connection.execute("LOAD substrait") - def setup(self, db_connection, file_names: Iterable[str]): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self.db_connection = db_connection - self.load_tables_from_parquet(file_names) + load_named_tables(db_connection, named_tables) def run_substrait_query(self, substrait_query: str) -> pa.Table: """ @@ -42,31 +39,3 @@ def run_substrait_query(self, substrait_query: str) -> pa.Table: A pyarrow table resulting from running the substrait query plan. """ return self.db_connection.from_substrait_json(substrait_query).arrow() - - def load_tables_from_parquet( - self, - file_names: Iterable[str], - ) -> list: - """ - Load all the parquet files into separate tables in DuckDB. - - Parameters: - file_names: - Name of parquet files. - - Returns: - A list of the table names. - """ - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - table_names = [] - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - try: - self.db_connection.execute(f"DROP TABLE {table_name}") - except: - pass - create_table_sql = f"CREATE TABLE {table_name} AS SELECT * FROM read_parquet('{file_path}');" - self.db_connection.execute(create_table_sql) - table_names.append(table_name) - - return table_names diff --git a/substrait_consumer/context.py b/substrait_consumer/context.py index 2af74cc3..6a8f3e47 100644 --- a/substrait_consumer/context.py +++ b/substrait_consumer/context.py @@ -42,13 +42,13 @@ def produce_isthmus_substrait(sql_string, schema_list, validate=False): return json_plan -def get_schema(file_names): +def get_schema(local_files): """ Create the list of schemas based on the given file names. If there are no files give, a custom schema for the data is used. Parameters: - file_names: List of file names. + local_files: List of file names. Returns: List of all schemas as a java list. @@ -56,11 +56,11 @@ def get_schema(file_names): import substrait_consumer.java_definitions as java arr = java.ArrayListClass() - if file_names: + if local_files: text_schema_file = open(schema_file) schema_string = text_schema_file.read().replace("\n", " ").split(";")[:-1] for create_table in schema_string: - if "small" not in file_names[0]: + if "small" not in local_files[0]: create_table = create_table.replace("_small", "") java_obj = jpype.JObject @ jpype.JString(create_table) arr.add(java_obj) diff --git a/substrait_consumer/functional/aggregate_relation_configs.py b/substrait_consumer/functional/aggregate_relation_configs.py index 53644b4d..4cd81c93 100644 --- a/substrait_consumer/functional/aggregate_relation_configs.py +++ b/substrait_consumer/functional/aggregate_relation_configs.py @@ -4,61 +4,74 @@ AGGREGATE_RELATION_TESTS = ( { "test_name": "single_measure_aggregate", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["single_measure_aggregate"], "ibis_expr": None }, { "test_name": "multiple_measure_aggregate", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["multiple_measure_aggregate"], "ibis_expr": None }, { "test_name": "aggregate_with_computation", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_computation"], "ibis_expr": None }, { "test_name": "compute_within_aggregate", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["compute_within_aggregate"], "ibis_expr": None }, { "test_name": "computation_between_aggregates", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["computation_between_aggregates"], "ibis_expr": None }, { "test_name": "aggregate_in_subquery", - "file_names": ['orders_small.parquet', 'orders_small.parquet'], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": AGGREGATE_RELATIONS["aggregate_in_subquery"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by_cube", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_cube"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by_rollup", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_rollup"], "ibis_expr": None }, { "test_name": "aggregate_with_grouping_set", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_grouping_set"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/approximation_configs.py b/substrait_consumer/functional/approximation_configs.py index b804a80a..f382930d 100644 --- a/substrait_consumer/functional/approximation_configs.py +++ b/substrait_consumer/functional/approximation_configs.py @@ -3,13 +3,15 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "approx_count_distinct", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["approx_count_distinct"], "ibis_expr": None, }, { "test_name": "approx_distinct", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["approx_distinct"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/arithmetic_configs.py b/substrait_consumer/functional/arithmetic_configs.py index f723793b..fc933dcb 100644 --- a/substrait_consumer/functional/arithmetic_configs.py +++ b/substrait_consumer/functional/arithmetic_configs.py @@ -6,115 +6,134 @@ SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": IBIS_SCALAR["add"], }, { "test_name": "subtract", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": IBIS_SCALAR["subtract"], }, { "test_name": "multiply", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["multiply"], "ibis_expr": IBIS_SCALAR["multiply"], }, { "test_name": "divide", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["divide"], "ibis_expr": IBIS_SCALAR["divide"], }, { "test_name": "modulus", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["modulus"], "ibis_expr": IBIS_SCALAR["modulus"], }, { "test_name": "factorial", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["factorial"], "ibis_expr": None, }, { "test_name": "power", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["power"], "ibis_expr": IBIS_SCALAR["power"], }, { "test_name": "sqrt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["sqrt"], "ibis_expr": IBIS_SCALAR["sqrt"], }, { "test_name": "exp", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["exp"], "ibis_expr": IBIS_SCALAR["exp"], }, { "test_name": "negate", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["negate"], "ibis_expr": IBIS_SCALAR["negate"], }, { "test_name": "cos", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["cos"], "ibis_expr": IBIS_SCALAR["cos"], }, { "test_name": "acos", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["acos"], "ibis_expr": IBIS_SCALAR["acos"], }, { "test_name": "sin", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["sin"], "ibis_expr": IBIS_SCALAR["sin"], }, { "test_name": "asin", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["asin"], "ibis_expr": IBIS_SCALAR["asin"], }, { "test_name": "tan", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["tan"], "ibis_expr": IBIS_SCALAR["tan"], }, { "test_name": "atan", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["atan"], "ibis_expr": IBIS_SCALAR["atan"], }, { "test_name": "atan2", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["atan2"], "ibis_expr": None, }, { "test_name": "abs", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["abs"], "ibis_expr": IBIS_SCALAR["abs"], }, { "test_name": "sign", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["sign"], "ibis_expr": IBIS_SCALAR["sign"], }, @@ -124,67 +143,78 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "sum", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["sum"], "ibis_expr": IBIS_AGGREGATE["sum"], }, { "test_name": "count", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["count"], "ibis_expr": None, }, { "test_name": "count_star", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["count_star"], "ibis_expr": None, }, { "test_name": "avg", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["avg"], "ibis_expr": IBIS_AGGREGATE["avg"], }, { "test_name": "min", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["min"], "ibis_expr": IBIS_AGGREGATE["min"], }, { "test_name": "max", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["max"], "ibis_expr": IBIS_AGGREGATE["max"], }, { "test_name": "median", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["median"], "ibis_expr": IBIS_AGGREGATE["median"], }, { "test_name": "mode", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["mode"], "ibis_expr": None, }, { "test_name": "product", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["product"], "ibis_expr": None, }, { "test_name": "std_dev", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["std_dev"], "ibis_expr": None, }, { "test_name": "variance", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["variance"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/arithmetic_decimal_configs.py b/substrait_consumer/functional/arithmetic_decimal_configs.py index b0fc14f9..abe4e14b 100644 --- a/substrait_consumer/functional/arithmetic_decimal_configs.py +++ b/substrait_consumer/functional/arithmetic_decimal_configs.py @@ -6,31 +6,36 @@ SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": None, }, { "test_name": "subtract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": None, }, { "test_name": "multiply", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["multiply"], "ibis_expr": None, }, { "test_name": "divide", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["divide"], "ibis_expr": None, }, { "test_name": "modulus", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["modulus"], "ibis_expr": None, }, @@ -39,25 +44,29 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "sum", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["sum"], "ibis_expr": None, }, { "test_name": "avg", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["avg"], "ibis_expr": None, }, { "test_name": "min", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["min"], "ibis_expr": None, }, { "test_name": "max", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["max"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/boolean_configs.py b/substrait_consumer/functional/boolean_configs.py index e002ad54..9b686d6e 100644 --- a/substrait_consumer/functional/boolean_configs.py +++ b/substrait_consumer/functional/boolean_configs.py @@ -4,25 +4,29 @@ SCALAR_FUNCTIONS = ( { "test_name": "or", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["or"], "ibis_expr": IBIS_SCALAR["or"], }, { "test_name": "and", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["and"], "ibis_expr": IBIS_SCALAR["and"], }, { "test_name": "not", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["not"], "ibis_expr": None, }, { "test_name": "xor", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["xor"], "ibis_expr": None, }, @@ -31,13 +35,15 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "bool_and", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_AGGREGATE["bool_and"], "ibis_expr": None, }, { "test_name": "bool_or", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_AGGREGATE["bool_or"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/common.py b/substrait_consumer/functional/common.py index 685b18ef..9f88d627 100644 --- a/substrait_consumer/functional/common.py +++ b/substrait_consumer/functional/common.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Callable, Iterable +from typing import TYPE_CHECKING, Callable import pytest from duckdb import DuckDBPyConnection @@ -51,7 +51,8 @@ def generate_snapshot_results( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ): """ @@ -65,17 +66,18 @@ def generate_snapshot_results( Pytest snapshot plugin used for verification. db_con: DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ # Load the parquet files into DuckDB and return all the table names as a list producer = DuckDBProducer() - producer.set_db_connection(db_con) - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(db_con, local_files, named_tables) - duckdb_result = db_con.query(f"{sql_query}").arrow() + duckdb_result = producer.run_sql_query(sql_query[0]) duckdb_result = duckdb_result.rename_columns( list(map(str.lower, duckdb_result.column_names)) ) @@ -96,7 +98,8 @@ def substrait_producer_sql_test( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -114,9 +117,11 @@ def substrait_producer_sql_test( snapshot: Pytest snapshot plugin used for verification. db_con: - DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + DuckDB connection for creating in memory named_tables. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -126,11 +131,8 @@ def substrait_producer_sql_test( *args: The data tables to be passed to the ibis expression. """ - producer.set_db_connection(db_con) - supported_producers = sql_query[1] - - # Load the parquet files into DuckDB and return all the table names as a list - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(db_con, local_files, named_tables) + sql_query, supported_producers = sql_query # Convert the SQL/Ibis expression to a substrait query plan if type(producer).__name__ == "IbisProducer": @@ -159,7 +161,8 @@ def substrait_consumer_sql_test( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -177,8 +180,10 @@ def substrait_consumer_sql_test( Pytest snapshot plugin used for verification. db_con: DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -188,7 +193,7 @@ def substrait_consumer_sql_test( consumer: Substrait consumer class. """ - consumer.setup(db_con, file_names) + consumer.setup(db_con, local_files, named_tables) group, name = test_name.split(":") snopshot_dir = RELATION_SNAPSHOT_DIR if "relation" in group else FUNCTION_SNAPSHOT_DIR diff --git a/substrait_consumer/functional/comparison_configs.py b/substrait_consumer/functional/comparison_configs.py index 5930b508..5c10ef88 100644 --- a/substrait_consumer/functional/comparison_configs.py +++ b/substrait_consumer/functional/comparison_configs.py @@ -6,85 +6,99 @@ SCALAR_FUNCTIONS = ( { "test_name": "not_equal", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["not_equal"], "ibis_expr": IBIS_SCALAR["not_equal"], }, { "test_name": "equal", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["equal"], "ibis_expr": IBIS_SCALAR["equal"], }, { "test_name": "is_not_distinct_from", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_not_distinct_from"], "ibis_expr": None, }, { "test_name": "lt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["lt"], "ibis_expr": IBIS_SCALAR["lt"], }, { "test_name": "lte", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["lte"], "ibis_expr": IBIS_SCALAR["lte"], }, { "test_name": "gt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["gt"], "ibis_expr": IBIS_SCALAR["gt"], }, { "test_name": "gte", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["gte"], "ibis_expr": IBIS_SCALAR["gte"], }, { "test_name": "is_not_null", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_not_null"], "ibis_expr": None, }, { "test_name": "is_null", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_null"], "ibis_expr": None, }, { "test_name": "is_nan", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_nan"], "ibis_expr": None, }, { "test_name": "is_finite", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_finite"], "ibis_expr": None, }, { "test_name": "is_infinite", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_infinite"], "ibis_expr": None, }, { "test_name": "between", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["between"], "ibis_expr": None, }, { "test_name": "coalesce", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["coalesce"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/datetime_configs.py b/substrait_consumer/functional/datetime_configs.py index 2b9eb470..afd4ad08 100644 --- a/substrait_consumer/functional/datetime_configs.py +++ b/substrait_consumer/functional/datetime_configs.py @@ -3,49 +3,57 @@ SCALAR_FUNCTIONS = ( { "test_name": "extract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["extract"], "ibis_expr": None, }, { "test_name": "add", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": None, }, { "test_name": "subtract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": None, }, { "test_name": "lt", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["lt"], "ibis_expr": None, }, { "test_name": "lte", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["lte"], "ibis_expr": None, }, { "test_name": "gt", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["gt"], "ibis_expr": None, }, { "test_name": "gte", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["gte"], "ibis_expr": None, }, { "test_name": "add_intervals", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": SQL_SCALAR["add_intervals"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/ddl_relation_configs.py b/substrait_consumer/functional/ddl_relation_configs.py index 8f450533..c2afa23a 100644 --- a/substrait_consumer/functional/ddl_relation_configs.py +++ b/substrait_consumer/functional/ddl_relation_configs.py @@ -4,43 +4,50 @@ DDL_RELATION_TESTS = ( { "test_name": "create_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": DDL_RELATIONS["create_table"], "ibis_expr": None }, { "test_name": "drop_table", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["drop_table"], "ibis_expr": None }, { "test_name": "alter_table", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["alter_table"], "ibis_expr": None }, { "test_name": "alter_column", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["alter_column"], "ibis_expr": None }, { "test_name": "drop_column", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["drop_column"], "ibis_expr": None }, { "test_name": "create_view", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["create_view"], "ibis_expr": None }, { "test_name": "create_or_replace_view", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["create_or_replace_view"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/fetch_relation_configs.py b/substrait_consumer/functional/fetch_relation_configs.py index 1d0ed311..d6f8950c 100644 --- a/substrait_consumer/functional/fetch_relation_configs.py +++ b/substrait_consumer/functional/fetch_relation_configs.py @@ -4,13 +4,15 @@ FETCH_RELATION_TESTS = ( { "test_name": "fetch", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": FETCH_RELATIONS["fetch"], "ibis_expr": None }, { "test_name": "fetch_with_offset", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": FETCH_RELATIONS["fetch_with_offset"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/filter_relation_configs.py b/substrait_consumer/functional/filter_relation_configs.py index e762dc05..91833a9c 100644 --- a/substrait_consumer/functional/filter_relation_configs.py +++ b/substrait_consumer/functional/filter_relation_configs.py @@ -4,73 +4,85 @@ FILTER_RELATION_TESTS = ( { "test_name": "where_equal_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_equal_multi_col"], "ibis_expr": None }, { "test_name": "where_not_equal_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_not_equal_multi_col"], "ibis_expr": None }, { "test_name": "where_gt_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_gt_multi_col"], "ibis_expr": None }, { "test_name": "where_gte_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_gte_multi_col"], "ibis_expr": None }, { "test_name": "where_lt_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_lt_multi_col"], "ibis_expr": None }, { "test_name": "where_lte_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_lte_multi_col"], "ibis_expr": None }, { "test_name": "where_like", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_like"], "ibis_expr": None }, { "test_name": "where_between", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_between"], "ibis_expr": None }, { "test_name": "where_in", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_in"], "ibis_expr": None }, { "test_name": "where_or", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_or"], "ibis_expr": None }, { "test_name": "where_and", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_and"], "ibis_expr": None }, { "test_name": "having", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["having"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/join_relation_configs.py b/substrait_consumer/functional/join_relation_configs.py index c1d82305..4fc21fea 100644 --- a/substrait_consumer/functional/join_relation_configs.py +++ b/substrait_consumer/functional/join_relation_configs.py @@ -4,79 +4,131 @@ JOIN_RELATION_TESTS = ( { "test_name": "inner_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["inner_join"], "ibis_expr": None }, { "test_name": "left_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_join"], "ibis_expr": None }, { "test_name": "right_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_join"], "ibis_expr": None }, { "test_name": "full_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["full_join"], "ibis_expr": None }, { "test_name": "cross_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["cross_join"], "ibis_expr": None }, { "test_name": "left_semi_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_semi_join"], "ibis_expr": None }, { "test_name": "right_semi_join", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_semi_join"], "ibis_expr": None }, { "test_name": "left_anti_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_anti_join"], "ibis_expr": None }, { "test_name": "right_anti_join", - "file_names": ["orders_small.parquet", "lineitem_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "lineitem": "lineitem_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_anti_join"], "ibis_expr": None }, { "test_name": "left_single_join", - "file_names": ["customer_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_single_join"], "ibis_expr": None }, { "test_name": "right_single_join", - "file_names": ["customer_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_single_join"], "ibis_expr": None }, { "test_name": "left_mark_join", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_mark_join"], "ibis_expr": None }, { "test_name": "right_mark_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_mark_join"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/logarithmic_configs.py b/substrait_consumer/functional/logarithmic_configs.py index f59ad9d9..1089648e 100644 --- a/substrait_consumer/functional/logarithmic_configs.py +++ b/substrait_consumer/functional/logarithmic_configs.py @@ -5,25 +5,29 @@ SCALAR_FUNCTIONS = ( { "test_name": "ln", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["ln"], "ibis_expr": IBIS_SCALAR["ln"], }, { "test_name": "log10", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["log10"], "ibis_expr": IBIS_SCALAR["log10"], }, { "test_name": "log2", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["log2"], "ibis_expr": IBIS_SCALAR["log2"], }, { "test_name": "logb", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["logb"], "ibis_expr": IBIS_SCALAR["logb"], }, diff --git a/substrait_consumer/functional/project_relation_configs.py b/substrait_consumer/functional/project_relation_configs.py index aa8d3602..a669f220 100644 --- a/substrait_consumer/functional/project_relation_configs.py +++ b/substrait_consumer/functional/project_relation_configs.py @@ -4,43 +4,53 @@ PROJECT_RELATION_TESTS = ( { "test_name": "project_single_col", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_single_col"], "ibis_expr": None }, { "test_name": "project_multi_col", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_multi_col"], "ibis_expr": None }, { "test_name": "project_all_col", - "file_names": ["region_small.parquet"], + "local_files": {}, + "named_tables": {"region": "region_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_all_col"], "ibis_expr": None }, { "test_name": "extended_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["extended_project"], "ibis_expr": None }, { "test_name": "subquery_in_project", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": PROJECT_RELATIONS["subquery_in_project"], "ibis_expr": None }, { "test_name": "distinct_in_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["distinct_in_project"], "ibis_expr": None }, { "test_name": "count_distinct_in_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["count_distinct_in_project"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/queries/sql/approximation_functions_sql.py b/substrait_consumer/functional/queries/sql/approximation_functions_sql.py index e2af20ec..4937c22c 100644 --- a/substrait_consumer/functional/queries/sql/approximation_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/approximation_functions_sql.py @@ -5,14 +5,14 @@ "approx_count_distinct": ( """ SELECT approx_count_distinct(l_comment) - FROM '{}'; + FROM '{lineitem}'; """, [DuckDBProducer], ), "approx_distinct": ( """ SELECT approx_distinct(l_comment) - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer], ), diff --git a/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py b/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py index d44875d0..60d8fd34 100644 --- a/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py @@ -5,7 +5,7 @@ "add": ( """ SELECT L_TAX, L_DISCOUNT, add(L_TAX, L_DISCOUNT) AS ADD_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -13,7 +13,7 @@ "subtract": ( """ SELECT L_TAX, L_DISCOUNT, subtract(L_TAX, L_DISCOUNT) AS SUBTRACT_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -21,7 +21,7 @@ "multiply": ( """ SELECT L_TAX, L_EXTENDEDPRICE, round(multiply(L_TAX, L_EXTENDEDPRICE), 2) AS MULTIPLY_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -29,7 +29,7 @@ "divide": ( """ SELECT L_TAX, L_EXTENDEDPRICE, round(divide(L_EXTENDEDPRICE, L_TAX), 2) AS DIVIDE_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -37,7 +37,7 @@ "modulus": ( """ SELECT L_EXTENDEDPRICE, L_TAX, round(mod(L_EXTENDEDPRICE, L_TAX), 2) AS MODULUS_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -48,28 +48,28 @@ "sum": ( """ SELECT sum(L_EXTENDEDPRICE) AS SUM_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "avg": ( """ SELECT round(avg(L_EXTENDEDPRICE), 2) AS AVG_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "min": ( """ SELECT min(L_EXTENDEDPRICE) AS MIN_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "max": ( """ SELECT max(L_EXTENDEDPRICE) AS MAX_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py b/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py index d3d6000a..6243135c 100644 --- a/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py @@ -6,7 +6,7 @@ "add": ( """ SELECT PS_PARTKEY, PS_SUPPKEY, PS_PARTKEY + PS_SUPPKEY AS ADD_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "subtract": ( """ SELECT PS_PARTKEY, PS_SUPPKEY, PS_PARTKEY - PS_SUPPKEY AS SUBTRACT_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -22,7 +22,7 @@ "multiply": ( """ SELECT PS_PARTKEY, PS_PARTKEY * 10 AS MULTIPLY_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -30,7 +30,7 @@ "divide": ( """ SELECT PS_PARTKEY, PS_PARTKEY / 10 AS DIVIDE_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -38,7 +38,7 @@ "modulus": ( """ SELECT PS_PARTKEY, mod(PS_PARTKEY, 10) AS MODULUS_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DuckDBProducer, IsthmusProducer], @@ -46,7 +46,7 @@ "factorial": ( """ SELECT N_NATIONKEY, factorial(N_NATIONKEY) AS FACTORIAL_KEY - FROM '{}' + FROM '{nation}' WHERE N_NATIONKEY <= 10 LIMIT 100; """, @@ -55,7 +55,7 @@ "power": ( """ SELECT PS_PARTKEY, power(PS_PARTKEY, 2) AS POWER_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -63,7 +63,7 @@ "sqrt": ( """ SELECT PS_PARTKEY, round(sqrt(CAST(PS_PARTKEY AS DOUBLE)), 2) AS SQRT_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -71,7 +71,7 @@ "exp": ( """ SELECT PS_PARTKEY, round(exp(CAST(PS_PARTKEY AS DOUBLE)), 2) AS EXP_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -79,7 +79,7 @@ "negate": ( """ SELECT PS_PARTKEY, negate(PS_PARTKEY) AS NEGATE_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DuckDBProducer], @@ -87,7 +87,7 @@ "cos": ( """ SELECT round(cos(CAST(ps_supplycost AS DOUBLE)), 2) AS COS_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -95,7 +95,7 @@ "acos": ( """ SELECT round(acos(CAST(l_tax AS DOUBLE)), 2) AS ACOS_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -103,7 +103,7 @@ "sin": ( """ SELECT round(sin(CAST(ps_supplycost AS DOUBLE)), 2) AS SIN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -111,7 +111,7 @@ "asin": ( """ SELECT round(asin(CAST(l_tax AS DOUBLE)), 2) AS ASIN_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -119,7 +119,7 @@ "tan": ( """ SELECT round(tan(CAST(ps_supplycost AS DOUBLE)), 2) AS TAN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -127,7 +127,7 @@ "atan": ( """ SELECT round(atan(CAST(l_tax AS DOUBLE)), 2) AS ATAN_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -135,7 +135,7 @@ "atan2": ( """ SELECT round(atan2(CAST(l_tax AS DOUBLE), CAST(l_tax AS DOUBLE)), 2) AS ATAN2_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -162,77 +162,77 @@ "sum": ( """ SELECT sum(PS_SUPPLYCOST) AS SUM_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "count": ( """ SELECT count(PS_SUPPLYCOST) AS COUNT_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "count_star": ( """ SELECT count(*) - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "avg": ( """ SELECT round(avg(PS_SUPPLYCOST), 2) AS AVG_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer], ), "min": ( """ SELECT min(PS_SUPPLYCOST) AS MIN_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "max": ( """ SELECT max(PS_SUPPLYCOST) AS MAX_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "median": ( """ SELECT median(PS_SUPPLYCOST) AS MEDIAN_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer], ), "mode": ( """ SELECT mode(PS_SUPPLYCOST) AS MODE_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "product": ( """ SELECT product(PS_SUPPLYCOST) AS PRODUCT_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "std_dev": ( """ SELECT round(stddev(PS_SUPPLYCOST), 2) AS STDDEV_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "variance": ( """ SELECT round(variance(PS_SUPPLYCOST), 2) AS VARIANCE_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/comparison_functions_sql.py b/substrait_consumer/functional/queries/sql/comparison_functions_sql.py index 4e786247..ba92b4a8 100644 --- a/substrait_consumer/functional/queries/sql/comparison_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/comparison_functions_sql.py @@ -6,7 +6,7 @@ "not_equal": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE NOT N_NAME = 'CANADA' """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "equal": ( """ SELECT PS_AVAILQTY, PS_PARTKEY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY = PS_PARTKEY """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -30,7 +30,7 @@ "lt": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY < 10 ORDER BY PS_AVAILQTY """, @@ -39,7 +39,7 @@ "lte": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY <= 10 ORDER BY PS_AVAILQTY """, @@ -48,7 +48,7 @@ "gt": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY > 9990 ORDER BY PS_AVAILQTY """, @@ -57,7 +57,7 @@ "gte": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY >= 9990 ORDER BY PS_AVAILQTY """, diff --git a/substrait_consumer/functional/queries/sql/datetime_functions_sql.py b/substrait_consumer/functional/queries/sql/datetime_functions_sql.py index 859b513c..4165e77d 100644 --- a/substrait_consumer/functional/queries/sql/datetime_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/datetime_functions_sql.py @@ -6,7 +6,7 @@ "extract": ( """ SELECT L_SHIPDATE, extract(year FROM L_SHIPDATE) - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "add": ( """ SELECT L_SHIPDATE, L_SHIPDATE + INTERVAL 5 DAY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DuckDBProducer], @@ -22,7 +22,7 @@ "subtract": ( """ SELECT L_SHIPDATE, L_SHIPDATE - INTERVAL 5 DAY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DuckDBProducer], @@ -30,7 +30,7 @@ "lt": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE < L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -38,7 +38,7 @@ "lte": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE <= L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -46,7 +46,7 @@ "gt": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE > L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -54,7 +54,7 @@ "gte": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE >= L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py b/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py index 024f315a..73a199af 100644 --- a/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py @@ -5,7 +5,7 @@ "ln": ( """ SELECT PS_SUPPLYCOST, round(ln(PS_SUPPLYCOST), 2) AS LN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -13,7 +13,7 @@ "log10": ( """ SELECT PS_SUPPLYCOST, round(log10(PS_SUPPLYCOST), 2) AS LOG10_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -21,7 +21,7 @@ "log2": ( """ SELECT PS_SUPPLYCOST, round(log2(PS_SUPPLYCOST), 2) AS LOG2_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -29,7 +29,7 @@ "logb": ( """ SELECT PS_SUPPLYCOST, round(logb(PS_SUPPLYCOST, 10), 2) AS LOGB_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], diff --git a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py index 64128657..b42c4ce9 100644 --- a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py @@ -6,35 +6,35 @@ "single_measure_aggregate": ( """ SELECT COUNT(L_PARTKEY) - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "multiple_measure_aggregate": ( """ SELECT MIN(O_TOTALPRICE), MAX(O_TOTALPRICE), AVG(O_TOTALPRICE) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "aggregate_with_computation": ( """ SELECT AVG(O_TOTALPRICE) * 10 - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "compute_within_aggregate": ( """ SELECT AVG(O_TOTALPRICE * 10) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "computation_between_aggregates": ( """ SELECT AVG(O_TOTALPRICE) + MAX(O_TOTALPRICE) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -42,15 +42,15 @@ """ SELECT O_TOTALPRICE - FROM '{}' - WHERE O_TOTALPRICE <= (SELECT AVG(O_TOTALPRICE) FROM '{}') + FROM '{orders}' + WHERE O_TOTALPRICE <= (SELECT AVG(O_TOTALPRICE) FROM '{orders}') """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "aggregate_with_group_by": ( """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY L_ORDERKEY, L_LINENUMBER ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -59,7 +59,7 @@ "aggregate_with_group_by_cube": ( """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY CUBE(L_ORDERKEY, L_LINENUMBER) ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -69,7 +69,7 @@ """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY ROLLUP(L_ORDERKEY, L_LINENUMBER) ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -79,7 +79,7 @@ """ SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY - FROM '{}' + FROM '{lineitem}' GROUP BY GROUPING SETS ( (L_LINENUMBER), diff --git a/substrait_consumer/functional/queries/sql/relations/ddl_relations.py b/substrait_consumer/functional/queries/sql/relations/ddl_relations.py index a8161537..bd33f604 100644 --- a/substrait_consumer/functional/queries/sql/relations/ddl_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/ddl_relations.py @@ -15,27 +15,27 @@ ), "drop_table": ( """ - DROP TABLE '{}'; + DROP TABLE '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "alter_table": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' ADD email VARCHAR; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "alter_column": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' RENAME COLUMN c_address TO c_street_address; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "drop_column": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' DROP COLUMN c_address; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -47,7 +47,7 @@ C_CUSTKEY, C_NAME, FROM - '{}'; + '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -58,7 +58,7 @@ C_CUSTKEY, C_NAME, FROM - '{}'; + '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/fetch_relations.py b/substrait_consumer/functional/queries/sql/relations/fetch_relations.py index ec4c7a15..01bb1f30 100644 --- a/substrait_consumer/functional/queries/sql/relations/fetch_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/fetch_relations.py @@ -5,14 +5,14 @@ FETCH_RELATIONS = { "fetch": ( """ - SELECT O_ORDERKEY FROM '{}' + SELECT O_ORDERKEY FROM '{orders}' FETCH NEXT 1 ROWS ONLY; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "fetch_with_offset": ( """ - SELECT O_ORDERKEY FROM '{}' + SELECT O_ORDERKEY FROM '{orders}' OFFSET 5 ROWS FETCH NEXT 5 ROWS ONLY; """, diff --git a/substrait_consumer/functional/queries/sql/relations/filter_relations.py b/substrait_consumer/functional/queries/sql/relations/filter_relations.py index 5d62d683..3b17b023 100644 --- a/substrait_consumer/functional/queries/sql/relations/filter_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/filter_relations.py @@ -6,7 +6,7 @@ "where_equal_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT = L_TAX ORDER BY L_DISCOUNT LIMIT 20; @@ -16,7 +16,7 @@ "where_not_equal_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT != L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -26,7 +26,7 @@ "where_gt_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT > L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -36,7 +36,7 @@ "where_gte_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT >= L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -46,7 +46,7 @@ "where_lt_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT < L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -56,7 +56,7 @@ "where_lte_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT <= L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -66,7 +66,7 @@ "where_like": ( """ SELECT L_SHIPINSTRUCT, L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_SHIPINSTRUCT LIKE '%DELIVER IN PERSON%' ORDER BY L_ORDERKEY LIMIT 20; @@ -76,7 +76,7 @@ "where_between": ( """ SELECT L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY BETWEEN 20 AND 50 LIMIT 20; """, @@ -85,7 +85,7 @@ "where_in": ( """ SELECT L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY IN (1, 2, 3) """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -93,7 +93,7 @@ "where_or": ( """ SELECT L_ORDERKEY, L_SHIPINSTRUCT - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY = 2 OR L_ORDERKEY = 3 """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -101,7 +101,7 @@ "where_and": ( """ SELECT L_ORDERKEY, L_SHIPINSTRUCT - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY = 2 AND L_SHIPINSTRUCT = 'TAKE BACK RETURN' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -109,7 +109,7 @@ "having": ( """ SELECT L_QUANTITY, COUNT(*) - FROM '{}' + FROM '{lineitem}' GROUP BY L_QUANTITY HAVING COUNT(*) > 12100 ORDER BY L_QUANTITY diff --git a/substrait_consumer/functional/queries/sql/relations/join_relations.py b/substrait_consumer/functional/queries/sql/relations/join_relations.py index 4e0e943b..5acbf3c9 100644 --- a/substrait_consumer/functional/queries/sql/relations/join_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/join_relations.py @@ -10,9 +10,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c INNER JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -25,9 +25,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c LEFT JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -40,9 +40,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c RIGHT JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -55,9 +55,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c FULL JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -70,9 +70,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c CROSS JOIN - '{}' o + '{orders}' o """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -82,11 +82,11 @@ c.C_CUSTKEY, c.C_NAME FROM - '{}' c + '{customer}' c WHERE EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ); """, @@ -98,11 +98,11 @@ o.O_ORDERKEY, o.O_CUSTKEY FROM - '{}' o + '{orders}' o WHERE EXISTS ( SELECT 1 - FROM '{}' c + FROM '{customer}' c WHERE c.C_CUSTKEY = o.O_CUSTKEY ); """, @@ -114,11 +114,11 @@ c.C_CUSTKEY, c.C_NAME FROM - '{}' c + '{customer}' c WHERE NOT EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ); """, @@ -130,11 +130,11 @@ o.O_ORDERKEY, o.O_CUSTKEY FROM - '{}' o + '{orders}' o WHERE NOT EXISTS ( SELECT 1 - FROM '{}' l + FROM '{lineitem}' l WHERE l.L_ORDERKEY = o.O_ORDERKEY ); """, @@ -150,9 +150,9 @@ c2.C_NAME AS c2name, c2.C_NATIONKEY AS c2nationakey FROM - '{}' c1 + '{customer}' c1 LEFT JOIN - '{}' c2 + '{customer}' c2 ON c1.C_NATIONKEY = c2.C_NATIONKEY AND c1.C_CUSTKEY <> c2.C_CUSTKEY; @@ -169,9 +169,9 @@ c2.C_NAME AS c2name, c2.C_NATIONKEY AS c2nationakey FROM - '{}' c1 + '{customer}' c1 RIGHT JOIN - '{}' c2 + '{customer}' c2 ON c1.C_NATIONKEY = c2.C_NATIONKEY AND c1.C_CUSTKEY <> c2.C_CUSTKEY; @@ -186,13 +186,13 @@ CASE WHEN EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ) THEN 'Marked' ELSE 'Not Marked' END AS mark_status FROM - '{}' c; + '{customer}' c; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -204,13 +204,13 @@ CASE WHEN EXISTS ( SELECT 1 - FROM '{}' c + FROM '{customer}' c WHERE c.C_CUSTKEY = o.O_CUSTKEY ) THEN 'Marked' ELSE 'Not Marked' END AS mark_status FROM - '{}' o; + '{orders}' o; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/project_relations.py b/substrait_consumer/functional/queries/sql/relations/project_relations.py index 51c56b2a..230ebe10 100644 --- a/substrait_consumer/functional/queries/sql/relations/project_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/project_relations.py @@ -6,28 +6,28 @@ "project_single_col": ( """ SELECT * - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "project_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "project_all_col": ( """ SELECT * - FROM '{}' + FROM '{region}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "extended_project": ( """ SELECT L_QUANTITY, L_EXTENDEDPRICE*10 AS MULTI_PRICE - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -35,24 +35,24 @@ """ SELECT C_CUSTKEY, (SELECT SUM(O_TOTALPRICE) - FROM {} + FROM '{orders}' WHERE C_CUSTKEY = O_CUSTKEY) AS total_price - FROM {} + FROM '{customer}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "distinct_in_project": ( """ SELECT DISTINCT L_LINESTATUS - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "count_distinct_in_project": ( """ SELECT COUNT(DISTINCT L_EXTENDEDPRICE) - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/read_relations.py b/substrait_consumer/functional/queries/sql/relations/read_relations.py index 6bcfbcf5..352f3f17 100644 --- a/substrait_consumer/functional/queries/sql/relations/read_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/read_relations.py @@ -5,7 +5,7 @@ READ_RELATIONS = { "read_named_table": ( """ - SELECT PS_PARTKEY FROM '{}' + SELECT PS_PARTKEY FROM '{partsupp}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -29,7 +29,7 @@ ), "duckdb_read_local_file": ( """ - SELECT * FROM read_parquet('{}'); + SELECT * FROM read_parquet('{customer_file_path}'); """, [DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/set_relations.py b/substrait_consumer/functional/queries/sql/relations/set_relations.py index bd9de966..695a86bb 100644 --- a/substrait_consumer/functional/queries/sql/relations/set_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/set_relations.py @@ -5,34 +5,34 @@ SET_RELATIONS = { "union_distinct": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' UNION - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' ORDER BY C_NATIONKEY """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "union_all": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' UNION ALL - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "intersect": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' INTERSECT - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "except": ( """ - SELECT o_totalprice FROM '{}' + SELECT o_totalprice FROM '{orders}' EXCEPT - SELECT c_acctbal FROM '{}' + SELECT c_acctbal FROM '{customer}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/sort_relations.py b/substrait_consumer/functional/queries/sql/relations/sort_relations.py index 87886ab2..3ecfbb33 100644 --- a/substrait_consumer/functional/queries/sql/relations/sort_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/sort_relations.py @@ -6,7 +6,7 @@ "single_col_default_sort": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_AVAILQTY LIMIT 10; """, @@ -15,7 +15,7 @@ "single_col_asc": ( """ SELECT PS_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC LIMIT 10; """, @@ -24,7 +24,7 @@ "single_col_desc": ( """ SELECT PS_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC LIMIT 10; """, @@ -33,7 +33,7 @@ "multi_col_asc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC, PS_AVAILQTY LIMIT 10; """, @@ -42,7 +42,7 @@ "multi_col_desc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC LIMIT 10; """, @@ -51,7 +51,7 @@ "multi_col_asc_desc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC, PS_AVAILQTY DESC LIMIT 10; """, @@ -60,7 +60,7 @@ "multi_col_desc_asc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC, PS_AVAILQTY ASC LIMIT 10; """, @@ -69,7 +69,7 @@ "order_by_col_number": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY 1, 2 LIMIT 10; """, diff --git a/substrait_consumer/functional/queries/sql/relations/write_relations.py b/substrait_consumer/functional/queries/sql/relations/write_relations.py index 71af90cd..aa891d8a 100644 --- a/substrait_consumer/functional/queries/sql/relations/write_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/write_relations.py @@ -5,14 +5,14 @@ WRITE_RELATIONS = { "insert": ( """ - INSERT INTO '{}' (r_regionkey, r_name, r_comment) + INSERT INTO '{region}' (r_regionkey, r_name, r_comment) VALUES (99999, 'region_name', 'region comment'); """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "update": ( """ - UPDATE '{}' + UPDATE '{customer}' SET c_address = 'Substait Avenue', c_phone = '123-456-7890' WHERE c_custkey = 1; """, @@ -20,7 +20,7 @@ ), "delete": ( """ - DELETE FROM '{}' + DELETE FROM '{customer}' WHERE c_custkey = 1; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/rounding_functions_sql.py b/substrait_consumer/functional/queries/sql/rounding_functions_sql.py index 8f2dcdd8..4c5e6d3b 100644 --- a/substrait_consumer/functional/queries/sql/rounding_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/rounding_functions_sql.py @@ -5,7 +5,7 @@ "ceil": ( """ SELECT PS_SUPPLYCOST, ceil(CAST(PS_SUPPLYCOST AS DOUBLE)) AS CEIL_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -13,7 +13,7 @@ "floor": ( """ SELECT PS_SUPPLYCOST, floor(CAST(PS_SUPPLYCOST AS DOUBLE)) AS FLOOR_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -21,7 +21,7 @@ "round": ( """ SELECT L_EXTENDEDPRICE, round(CAST(L_EXTENDEDPRICE AS DOUBLE), 1) AS ROUND_EXTENDEDPRICE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/string_functions_sql.py b/substrait_consumer/functional/queries/sql/string_functions_sql.py index fecc6b45..bc27e1a6 100644 --- a/substrait_consumer/functional/queries/sql/string_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/string_functions_sql.py @@ -5,21 +5,21 @@ "concat": ( """ SELECT N_NAME, concat(N_NAME, N_COMMENT) AS concat_nation - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "concat_ws": ( """ SELECT concat_ws('.', N_NAME, N_COMMENT) - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer], ), "like": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE N_NAME LIKE 'ALGERIA'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -27,7 +27,7 @@ "starts_with_duckdb": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE prefix(N_NAME, 'A'); """, [DuckDBProducer], @@ -35,7 +35,7 @@ "starts_with": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE starts_with(N_NAME, 'A'); """, [DataFusionProducer], @@ -43,7 +43,7 @@ "ends_with": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE suffix(N_NAME, 'A'); """, [DuckDBProducer], @@ -51,21 +51,21 @@ "substring": ( """ SELECT N_NAME, substr(N_NAME, 1, 3) AS substr_name - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer], ), "substring_isthmus": ( """ SELECT N_NAME, SUBSTRING(N_NAME FROM 1 FOR 3) AS substr_name - FROM '{}'; + FROM '{nation}'; """, [IsthmusProducer], ), "contains": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE contains(N_NAME, 'IA'); """, [DataFusionProducer, DuckDBProducer], @@ -73,42 +73,42 @@ "strpos": ( """ SELECT N_NAME, strpos(N_NAME, 'A') AS strpos_name - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "replace": ( """ SELECT N_NAME, replace(N_NAME, 'A', 'a') AS replace_name - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "repeat": ( """ SELECT N_NAME, repeat(N_NAME, 2) AS repeated_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "reverse": ( """ SELECT N_NAME, reverse(N_NAME) AS reversed_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "lower": ( """ SELECT N_NAME, lower(N_NAME) AS lowercase_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "upper": ( """ SELECT O_COMMENT, upper(O_COMMENT) AS uppercase_O_COMMENT - FROM '{}' + FROM '{orders}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -116,63 +116,63 @@ "char_length": ( """ SELECT N_NAME, length(N_NAME) AS char_length_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "bit_length": ( """ SELECT N_NAME, bit_length(N_NAME) AS bit_length_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "ltrim": ( """ SELECT N_NAME, ltrim(N_NAME, 'A') AS ltrim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "rtrim": ( """ SELECT N_NAME, rtrim(N_NAME, 'A') AS rtrim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "trim": ( """ SELECT N_NAME, trim(N_NAME, 'A') AS trim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "lpad": ( """ SELECT N_NAME, lpad(N_NAME, 10, ' ') AS lpad_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "rpad": ( """ SELECT N_NAME, rpad(N_NAME, 10, ' ') AS rpad_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "left": ( """ SELECT N_NAME, left(N_NAME, 2) AS left_extract_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "right": ( """ SELECT N_NAME, right(N_NAME, 2) AS right_extract_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), @@ -182,7 +182,7 @@ "string_agg": ( """ SELECT N_NAME, string_agg(N_NAME, ',') - FROM '{}' + FROM '{nation}' GROUP BY N_NAME ORDER BY N_NAME """, diff --git a/substrait_consumer/functional/read_relation_configs.py b/substrait_consumer/functional/read_relation_configs.py index b09021c5..2b7447c8 100644 --- a/substrait_consumer/functional/read_relation_configs.py +++ b/substrait_consumer/functional/read_relation_configs.py @@ -4,31 +4,36 @@ READ_RELATION_TESTS = ( { "test_name": "read_named_table", - "file_names": ['partsupp_small.parquet'], + "local_files": {}, + "named_tables": {"partsupp": "partsupp_small.parquet"}, "sql_query": READ_RELATIONS["read_named_table"], "ibis_expr": None }, { "test_name": "isthmus_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["isthmus_read_virtual_table"], "ibis_expr": None }, { "test_name": "datafusion_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["datafusion_read_virtual_table"], "ibis_expr": None }, { "test_name": "duckdb_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["duckdb_read_virtual_table"], "ibis_expr": None }, { "test_name": "duckdb_read_local_file", - "file_names": ['customer_small.parquet'], + "local_files": {"customer_file_path": "customer_small.parquet"}, + "named_tables": {}, "sql_query": READ_RELATIONS["duckdb_read_local_file"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/rounding_configs.py b/substrait_consumer/functional/rounding_configs.py index 7b69c8a3..084ffa69 100644 --- a/substrait_consumer/functional/rounding_configs.py +++ b/substrait_consumer/functional/rounding_configs.py @@ -4,19 +4,22 @@ SCALAR_FUNCTIONS = ( { "test_name": "ceil", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["ceil"], "ibis_expr": IBIS_SCALAR["ceil"], }, { "test_name": "floor", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["floor"], "ibis_expr": IBIS_SCALAR["floor"], }, { "test_name": "round", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["round"], "ibis_expr": IBIS_SCALAR["round"], }, diff --git a/substrait_consumer/functional/set_relation_configs.py b/substrait_consumer/functional/set_relation_configs.py index 82726faa..7adc4ee6 100644 --- a/substrait_consumer/functional/set_relation_configs.py +++ b/substrait_consumer/functional/set_relation_configs.py @@ -4,25 +4,41 @@ SET_RELATION_TESTS = ( { "test_name": "union_distinct", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["union_distinct"], "ibis_expr": None }, { "test_name": "union_all", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["union_all"], "ibis_expr": None }, { "test_name": "intersect", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["intersect"], "ibis_expr": None }, { "test_name": "except", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": SET_RELATIONS["except"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/sort_relation_configs.py b/substrait_consumer/functional/sort_relation_configs.py index 0fbc4ecd..f7d970d6 100644 --- a/substrait_consumer/functional/sort_relation_configs.py +++ b/substrait_consumer/functional/sort_relation_configs.py @@ -4,49 +4,57 @@ SORT_RELATION_TESTS = ( { "test_name": "single_col_default_sort", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_default_sort"], "ibis_expr": None }, { "test_name": "single_col_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_asc"], "ibis_expr": None }, { "test_name": "single_col_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_desc"], "ibis_expr": None }, { "test_name": "multi_col_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_asc"], "ibis_expr": None }, { "test_name": "multi_col_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_desc"], "ibis_expr": None }, { "test_name": "multi_col_asc_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_asc_desc"], "ibis_expr": None }, { "test_name": "multi_col_desc_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_desc_asc"], "ibis_expr": None }, { "test_name": "order_by_col_number", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["order_by_col_number"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/string_configs.py b/substrait_consumer/functional/string_configs.py index d5389eec..4cf3dd3a 100644 --- a/substrait_consumer/functional/string_configs.py +++ b/substrait_consumer/functional/string_configs.py @@ -5,145 +5,169 @@ SCALAR_FUNCTIONS = ( { "test_name": "concat", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["concat"], "ibis_expr": IBIS_SCALAR["concat"], }, { "test_name": "concat_ws", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["concat_ws"], "ibis_expr": None, }, { "test_name": "like", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["like"], "ibis_expr": None, }, { "test_name": "starts_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["starts_with"], "ibis_expr": IBIS_SCALAR["starts_with"], }, { "test_name": "starts_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["starts_with_duckdb"], "ibis_expr": None, }, { "test_name": "ends_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["ends_with"], "ibis_expr": IBIS_SCALAR["ends_with"], }, { "test_name": "substring", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["substring"], "ibis_expr": IBIS_SCALAR["substr"], }, { "test_name": "substring", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["substring_isthmus"], "ibis_expr": None, }, { "test_name": "contains", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["contains"], "ibis_expr": IBIS_SCALAR["contains"], }, { "test_name": "strpos", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["strpos"], "ibis_expr": IBIS_SCALAR["strpos"], }, { "test_name": "replace", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["replace"], "ibis_expr": IBIS_SCALAR["replace"], }, { "test_name": "repeat", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["repeat"], "ibis_expr": IBIS_SCALAR["repeat"], }, { "test_name": "reverse", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["reverse"], "ibis_expr": IBIS_SCALAR["reverse"], }, { "test_name": "lower", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["lower"], "ibis_expr": IBIS_SCALAR["lower"], }, { "test_name": "upper", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": SQL_SCALAR["upper"], "ibis_expr": IBIS_SCALAR["upper"], }, { "test_name": "char_length", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["char_length"], "ibis_expr": IBIS_SCALAR["char_length"], }, { "test_name": "bit_length", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["bit_length"], "ibis_expr": None, }, { "test_name": "ltrim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["ltrim"], "ibis_expr": IBIS_SCALAR["ltrim"], }, { "test_name": "rtrim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["rtrim"], "ibis_expr": IBIS_SCALAR["rtrim"], }, { "test_name": "trim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["trim"], "ibis_expr": IBIS_SCALAR["trim"], }, { "test_name": "lpad", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["lpad"], "ibis_expr": IBIS_SCALAR["lpad"], }, { "test_name": "rpad", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["rpad"], "ibis_expr": IBIS_SCALAR["rpad"], }, { "test_name": "left", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["left"], "ibis_expr": IBIS_SCALAR["left"], }, { "test_name": "right", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["right"], "ibis_expr": IBIS_SCALAR["right"], }, @@ -152,7 +176,8 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "string_agg", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_AGGREGATE["string_agg"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/write_relation_configs.py b/substrait_consumer/functional/write_relation_configs.py index c531b2f5..fcd0f334 100644 --- a/substrait_consumer/functional/write_relation_configs.py +++ b/substrait_consumer/functional/write_relation_configs.py @@ -4,19 +4,22 @@ WRITE_RELATION_TESTS = ( { "test_name": "insert", - "file_names": ["region.parquet"], + "local_files": {}, + "named_tables": {"region": "region.parquet"}, "sql_query": WRITE_RELATIONS["insert"], "ibis_expr": None }, { "test_name": "update", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": WRITE_RELATIONS["update"], "ibis_expr": None }, { "test_name": "delete", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": WRITE_RELATIONS["delete"], "ibis_expr": None }, diff --git a/substrait_consumer/producers/datafusion_producer.py b/substrait_consumer/producers/datafusion_producer.py index a9f0b79d..abed684c 100644 --- a/substrait_consumer/producers/datafusion_producer.py +++ b/substrait_consumer/producers/datafusion_producer.py @@ -22,10 +22,15 @@ def __init__(self, db_connection=None): else: self._db_connection = db_connection - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + self.register_named_tables(named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the DataFusion substrait plan using the given SQL query. @@ -55,27 +60,26 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N return MessageToJson(substrait_proto) - def register_tables(self, file_names): + def register_named_tables(self, named_tables): """ - Register tables to the datafusion session context. + Register named_tables to the datafusion session context. Parameters: - file_names: - Name of parquet files. + named_tables: + A `dict` mapping table names to local file paths, which should + be loaded into the datafusion session context. Returns: None """ - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem + if len(named_tables) > 0: + for table_name, file_path in named_tables.items(): if self._ctx.table_exist(table_name): self._ctx.deregister_table(table_name) self._ctx.register_parquet(table_name, file_path) assert self._ctx.table_exist(table_name) else: if not self._ctx.table_exist("t"): - tables = pa.RecordBatch.from_arrays( + named_tables = pa.RecordBatch.from_arrays( [ pa.array(COLUMN_A), pa.array(COLUMN_B), @@ -84,14 +88,7 @@ def register_tables(self, file_names): ], names=["a", "b", "c", "d"], ) - self._ctx.register_record_batches("t", [[tables]]) - - def format_sql(self, sql_query, file_names): - self.register_tables(file_names) - if len(file_names) > 0: - table_names = [Path(f).stem for f in file_names] - sql_query = sql_query.format(*table_names) - return sql_query + self._ctx.register_record_batches("t", [[named_tables]]) def name(self): return "DataFusionProducer" diff --git a/substrait_consumer/producers/duckdb_producer.py b/substrait_consumer/producers/duckdb_producer.py index aba4d435..06a1abfe 100644 --- a/substrait_consumer/producers/duckdb_producer.py +++ b/substrait_consumer/producers/duckdb_producer.py @@ -1,10 +1,11 @@ import json -import substrait_validator as sv -from .producer import Producer, load_tables_from_parquet -from substrait_consumer.common import SubstraitUtils +from typing import Optional import duckdb +import pyarrow as pa +import substrait_validator as sv +from .producer import Producer, load_named_tables class DuckDBProducer(Producer): """ @@ -19,10 +20,15 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the DuckDB substrait plan using the given SQL query. @@ -47,17 +53,11 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N python_json = json.loads(proto_bytes) return json.dumps(python_json, indent=2) - def format_sql(self, sql_query, file_names): - if len(file_names) > 0: - if "read_parquet" in sql_query: - parquet_file_path = SubstraitUtils.get_full_path(file_names) - sql_query = sql_query.format(parquet_file_path[0]) - else: - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query + def run_sql_query(self, sql_query: str) -> Optional[pa.Table]: + sql_query = self.format_sql(sql_query) + result = self._db_connection.query(f"{sql_query}") + if result is not None: + return result.arrow() def name(self): return "DuckDBProducer" diff --git a/substrait_consumer/producers/ibis_producer.py b/substrait_consumer/producers/ibis_producer.py index 7ee1668e..ec8dc98f 100644 --- a/substrait_consumer/producers/ibis_producer.py +++ b/substrait_consumer/producers/ibis_producer.py @@ -1,5 +1,5 @@ -from .producer import Producer, load_tables_from_parquet +from .producer import Producer, load_named_tables import duckdb import pytest @@ -21,10 +21,15 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the Ibis substrait plan using the given Ibis expression @@ -42,13 +47,5 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N substrait_plan = json_format.MessageToJson(tpch_proto_bytes) return substrait_plan - def format_sql(self, sql_query, file_names): - if len(file_names) > 0: - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query - def name(self): return "IbisProducer" diff --git a/substrait_consumer/producers/isthmus_producer.py b/substrait_consumer/producers/isthmus_producer.py index 94da68da..33cba152 100644 --- a/substrait_consumer/producers/isthmus_producer.py +++ b/substrait_consumer/producers/isthmus_producer.py @@ -1,5 +1,7 @@ +import re + import duckdb -from .producer import Producer, load_tables_from_parquet +from .producer import Producer, load_named_tables from ibis_substrait.compiler.core import SubstraitCompiler from substrait_consumer.context import get_schema, produce_isthmus_substrait @@ -18,12 +20,18 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") self.compiler = SubstraitCompiler() - self.file_names = None + self.table_names = None - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + self.table_names = list(named_tables.keys()) + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the Isthmus substrait plan using the given SQL query. @@ -35,21 +43,14 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N Returns: Substrait query plan in json format. """ - schema_list = get_schema(self.file_names) + schema_list = get_schema(self.table_names) substrait_plan_str = produce_isthmus_substrait(sql_query, schema_list, validate) return substrait_plan_str - def format_sql(self, sql_query, file_names): - sql_query = sql_query.replace("'{}'", "{}") - sql_query = sql_query.replace("'t'", "t") - if len(file_names) > 0: - self.file_names = file_names - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query + def _format_sql(self, sql_query): + sql_query = re.sub(r"'(\{[0-9a-zA-Z_]+\})'", r"\1", sql_query) + return sql_query.replace("'t'", "t") def name(self): return "IsthmusProducer" diff --git a/substrait_consumer/producers/producer.py b/substrait_consumer/producers/producer.py index c5f56eea..cd017547 100644 --- a/substrait_consumer/producers/producer.py +++ b/substrait_consumer/producers/producer.py @@ -1,50 +1,173 @@ -import string from abc import ABC, abstractmethod -from pathlib import Path -from typing import Iterable +from typing import Optional + +from duckdb import DuckDBPyConnection from substrait_consumer.common import SubstraitUtils class Producer(ABC): - @abstractmethod - def set_db_connection(self, db_connection): - pass + def __init__( + self, + db_connection: Optional[DuckDBPyConnection] = None, + local_files: Optional[dict[str, str]] = None, + named_tables: Optional[dict[str, str]] = None, + ): + if db_connection is None: + db_connection = DuckDBPyConnection() + if local_files is None: + local_files = {} + if named_tables is None: + named_tables = {} + self.setup(db_connection, local_files, named_tables) + + def setup( + self, + db_connection: DuckDBPyConnection, + local_files: dict[str, str], + named_tables: dict[str, str], + ): + """ + Initializes this `Producer` instance. + + In particular, expands the paths in `local_files` and `named_tables` to + absolute paths and forwards the arguments to `self._setup` implemented + by classes inheriting from `Producer`. + + Parameters: + db_connection: + DuckDB connection for this `Producer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ + self._db_connection = db_connection + self._local_files = SubstraitUtils.compute_full_paths(local_files) + self._named_tables = SubstraitUtils.compute_full_paths(named_tables) + self._setup(db_connection, self._local_files, self._named_tables) + + def produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: + """ + Produces a Substrait plan of the given query in JSON format. + + The query can be given either as sql_query or as Ibis expression. In + the first case, the function first formats the query using + `self.format_sql`. In either case, the function lets the concrete + class produce the substrait plan using `self._produce_substrait`. + + Parameters: + sql_query: + SQL query. + validate: + Whether the Substrait plan should be validated. + ibis_expr: + Ibis expression. + Returns: + Substrait query plan in JSON format. + """ + sql_query = self.format_sql(sql_query) + return self._produce_substrait(sql_query, validate, ibis_expr) + + def format_sql(self, sql_query: str) -> str: + """ + Formats the given SQL query. + + formatting consist of calling `self._format_sql` that is implemented by + concrete classes with producer-specific formatting logic as well as + substituting format arguments for named tables and local files. + + Parameters: + sql_query: + SQL query. + Returns: + Formatted SQL query. + """ + sql_query = self._format_sql(sql_query) + named_tables = {k: k for k in self._named_tables.keys()} + return sql_query.format(**self._local_files, **named_tables) @abstractmethod - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + """ + Initializes this `Producer` instance with base-class-specific logic. + + This typically consists of loading the named tables into the producer + back-end such that they are available during subsequent calls to + `produce_substrait`. + + Parameters: + db_connection: + DuckDB connection for this `Producer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ pass @abstractmethod - def format_sql(self, sql_query, file_names): + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: + """ + Produces a Substrait plan of the given SQL query in JSON format. + + At this point, the SQL query has already been formatted by the base + class. + + Parameters: + sql_query: + SQL query. + validate: + Whether the Substrait plan should be validated. + ibis_expr: + Ibis expression. + Returns: + Substrait query plan in JSON format. + """ pass + def _format_sql(self, sql_query: str) -> str: + """ + Executes producer-specific reformatting of the given SQL query. + + This function may be overridden by concrete classes in order to change + (i.e., "reformat") the given SQL query such that it fits the syntax of + the producer. + + Parameters: + sql_query: + SQL query. + Returns: + Formatted SQL query. + """ + return sql_query -def load_tables_from_parquet( + +def load_named_tables( db_connection, - file_names: Iterable[str], -) -> list: + named_tables: dict[str, str], +) -> None: """ - Load all the parquet files into separate tables in DuckDB. + Load all the parquet files into separate named_tables in DuckDB. Parameters: db_connection: DuckDB Connection. - file_names: - Name of parquet files. + named_tables: + A `dict` mapping table names to local file paths. Returns: A list of the table names. """ - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - table_names = [] - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem + for table_name, file_path in named_tables.items(): try: db_connection.execute(f"DROP TABLE {table_name}") except: pass create_table_sql = f"CREATE TABLE {table_name} AS SELECT * FROM read_parquet('{file_path}');" db_connection.execute(create_table_sql) - table_names.append(table_name) - - return table_names diff --git a/substrait_consumer/tests/adhoc/test_adhoc_expression.py b/substrait_consumer/tests/adhoc/test_adhoc_expression.py index f80bfadb..d1961595 100644 --- a/substrait_consumer/tests/adhoc/test_adhoc_expression.py +++ b/substrait_consumer/tests/adhoc/test_adhoc_expression.py @@ -63,15 +63,16 @@ def test_adhoc_expression( nation, region, ) -> None: - adhoc_producer.set_db_connection(self.db_connection) - consumer.setup(self.db_connection, FILE_NAMES) + local_files = FILE_NAMES + named_tables = dict() + producer.setup(self.db_connection, local_files, named_tables) + consumer.setup(self.db_connection, local_files, named_tables) with open(SQL_FILE_PATH, "r") as f: sql_query = f.read() if not sql_query: raise ValueError("No SQL query. Please write SQL into query.sql") - sql_query = adhoc_producer.format_sql(set(), sql_query, FILE_NAMES) substrait_plan = adhoc_producer.produce_substrait( sql_query, consumer, @@ -92,7 +93,9 @@ def test_adhoc_expression( ) actual_result = consumer.run_substrait_query(substrait_plan) - expected_result = self.db_connection.query(f"{sql_query}").arrow() + duckdb_producer = DuckDBProducer() + duckdb_producer.setup(self.db_connection, local_files, named_tables) + expected_result = duckdb_producer.run_substrait_query(sql_query) verify_equals( actual_result.columns, diff --git a/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py b/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py index e19bbe01..0454057f 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -55,7 +55,8 @@ def test_producer_approximation_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_approximation_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_approximation_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -92,7 +95,8 @@ def test_consumer_approximation_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -105,7 +109,8 @@ def test_generate_approximation_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -114,6 +119,7 @@ def test_generate_approximation_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py index 92955b7a..6b31bf8b 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -66,7 +66,8 @@ def test_producer_arithmetic_decimal_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_arithmetic_decimal_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -91,7 +93,8 @@ def test_consumer_arithmetic_decimal_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -103,7 +106,8 @@ def test_consumer_arithmetic_decimal_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -116,7 +120,8 @@ def test_generate_arithmetic_decimal_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -125,6 +130,7 @@ def test_generate_arithmetic_decimal_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py index bcd6441a..fc3abbec 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -82,7 +82,8 @@ def test_producer_arithmetic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -94,7 +95,8 @@ def test_producer_arithmetic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -110,7 +112,8 @@ def test_consumer_arithmetic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -121,7 +124,8 @@ def test_consumer_arithmetic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -135,7 +139,8 @@ def test_generate_arithmetic_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -144,6 +149,7 @@ def test_generate_arithmetic_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py b/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py index d4bd21ae..6ca0a151 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -56,7 +56,8 @@ def test_producer_boolean_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_boolean_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_boolean_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -91,7 +94,8 @@ def test_consumer_boolean_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -104,7 +108,8 @@ def test_generate_boolean_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -113,6 +118,7 @@ def test_generate_boolean_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py b/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py index 8e392ee9..33839588 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_comparison_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -81,7 +82,8 @@ def test_producer_comparison_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -96,7 +98,8 @@ def test_consumer_comparison_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -109,7 +112,8 @@ def test_consumer_comparison_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -122,7 +126,8 @@ def test_generate_comparison_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -131,6 +136,7 @@ def test_generate_comparison_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py b/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py index 8354cce1..37556835 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -66,7 +66,8 @@ def test_producer_datetime_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_datetime_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -91,7 +93,8 @@ def test_consumer_datetime_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -103,7 +106,8 @@ def test_consumer_datetime_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -116,7 +120,8 @@ def test_generate_datetime_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -125,6 +130,7 @@ def test_generate_datetime_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py b/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py index c79745d7..b0b9874e 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_logarithmic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -80,7 +81,8 @@ def test_producer_logarithmic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -94,7 +96,8 @@ def test_consumer_logarithmic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -106,7 +109,8 @@ def test_consumer_logarithmic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -120,7 +124,8 @@ def test_generate_logarithmic_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -129,6 +134,7 @@ def test_generate_logarithmic_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py b/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py index ffc7cf3d..3df62f73 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -53,7 +53,8 @@ def test_producer_rounding_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -65,7 +66,8 @@ def test_producer_rounding_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_rounding_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -93,7 +96,8 @@ def test_consumer_rounding_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -106,7 +110,8 @@ def test_generate_rounding_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -115,6 +120,7 @@ def test_generate_rounding_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_string_functions.py b/substrait_consumer/tests/functional/extension_functions/test_string_functions.py index 3164eb39..11e6dc79 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_string_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_string_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -65,7 +65,8 @@ def test_producer_string_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_string_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -92,7 +94,8 @@ def test_consumer_string_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -105,7 +108,8 @@ def test_consumer_string_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -118,7 +122,8 @@ def test_generate_string_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -127,6 +132,7 @@ def test_generate_string_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py b/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py index 9771513a..94f70821 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py +++ b/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py @@ -1,16 +1,15 @@ import json -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table from ibis_substrait.tests.compiler.conftest import * -from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.functional import ( arithmetic_configs, boolean_configs, comparison_configs, datetime_configs, logarithmic_configs, rounding_configs) from substrait_consumer.functional.common import check_subtrait_function_names, load_custom_duckdb_table from substrait_consumer.parametrization import custom_parametrization -from substrait_consumer.producers.producer import load_tables_from_parquet +from substrait_consumer.producers.duckdb_producer import DuckDBProducer @pytest.mark.usefixtures("prepare_tpch_parquet_data") @@ -44,7 +43,8 @@ def setup_teardown_function(request): def test_arithmetic_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -56,7 +56,8 @@ def test_arithmetic_function_names( """ self.run_function_name_test( test_name, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -71,7 +72,8 @@ def test_arithmetic_function_names( def test_boolean_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -80,14 +82,21 @@ def test_boolean_function_names( Verify the substrait function names for boolean functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, self.table_t + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + self.table_t, ) @custom_parametrization(comparison_configs.SCALAR_FUNCTIONS) def test_comparison_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -98,14 +107,22 @@ def test_comparison_function_names( Verify the substrait function names for comparison functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp, nation + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, + nation, ) @custom_parametrization(datetime_configs.SCALAR_FUNCTIONS) def test_datetime_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -115,14 +132,21 @@ def test_datetime_function_names( Verify the substrait function names for datetime functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) @custom_parametrization(logarithmic_configs.SCALAR_FUNCTIONS) def test_logarithmic_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -132,14 +156,21 @@ def test_logarithmic_function_names( Verify the substrait function names for logarithmic functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) @custom_parametrization(rounding_configs.SCALAR_FUNCTIONS) def test_rounding_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -149,13 +180,20 @@ def test_rounding_function_names( Verify the substrait function names for rounding functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) def run_function_name_test( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -169,8 +207,10 @@ def run_function_name_test( Parameters: test_name: Expected function name as defined by the substrait spec. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -178,28 +218,24 @@ def run_function_name_test( producer: Substrait producer class. *args: - The data tables to be passed to the ibis expression. + The data named_tables to be passed to the ibis expression. """ - producer.set_db_connection(self.db_connection) - - # Load the parquet files into DuckDB and return all the table names as a list - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(self.db_connection, local_files, named_tables) # Grab the json representation of the produced substrait plan to verify # the proper substrait function name. if type(producer).__name__ == "IbisProducer": if ibis_expr: - substrait_plan = producer.produce_substrait( + substrait_plan_json = producer.produce_substrait( sql_query, validate=False, ibis_expr=ibis_expr(*args) ) - substrait_plan = json.loads(substrait_plan) else: pytest.skip("ibis expression currently undefined") else: - load_tables_from_parquet(self.db_connection, file_names) - substrait_json = self.db_connection.get_substrait_json(sql_query) - proto = substrait_json.fetchone()[0] - substrait_plan = json.loads(proto) + duckdb_producer = DuckDBProducer(self.db_connection) + duckdb_producer.setup(self.db_connection, local_files, named_tables) + substrait_plan_json = duckdb_producer.produce_substrait(sql_query[0]) + substrait_plan = json.loads(substrait_plan_json) check_subtrait_function_names(substrait_plan, test_name) diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json index f1cdf092..e925b072 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json @@ -47,7 +47,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json index 84e6853b..1ab22c01 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json @@ -47,7 +47,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -106,7 +106,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice) * Int64(10)" + "avg(orders.o_totalprice) * Int64(10)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json index dc2b1b1b..4f016b93 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json index efc4ced6..79700375 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json index 5f165a36..dd37730b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json @@ -53,7 +53,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -152,7 +152,7 @@ } }, "names": [ - "sum(lineitem_small.l_extendedprice)", + "sum(lineitem.l_extendedprice)", "l_linenumber", "l_orderkey" ] diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json index 978e8be8..5a6fe23b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json @@ -54,7 +54,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -131,7 +131,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice) + MAX(orders_small.o_totalprice)" + "avg(orders.o_totalprice) + MAX(orders.o_totalprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json index a0a0fcc6..b1f4d1ad 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json @@ -45,7 +45,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -93,7 +93,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice * Int64(10))" + "avg(orders.o_totalprice * Int64(10))" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json index 2f91ca78..20a81062 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json @@ -52,7 +52,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -115,9 +115,9 @@ } }, "names": [ - "MIN(orders_small.o_totalprice)", - "MAX(orders_small.o_totalprice)", - "avg(orders_small.o_totalprice)" + "MIN(orders.o_totalprice)", + "MAX(orders.o_totalprice)", + "avg(orders.o_totalprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json index 7ff59292..6c35e688 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json @@ -45,7 +45,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -74,7 +74,7 @@ } }, "names": [ - "count(lineitem_small.l_partkey)" + "count(lineitem.l_partkey)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json index 5a8b3f9c..a7b5379c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json @@ -116,7 +116,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -206,7 +206,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json index 36e7d69b..be85886b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json @@ -111,7 +111,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json index bb67434e..a013bde1 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json @@ -154,7 +154,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json index be2bb472..89d0f06c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json @@ -118,7 +118,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json index dc8f8c85..5f8c6194 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json @@ -111,7 +111,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json index 10d97ccd..8d9677b6 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json @@ -114,7 +114,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json index 10fae6b9..354b2bc5 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json @@ -148,7 +148,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json index f79b3838..c3ab0ab9 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json @@ -85,7 +85,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -177,7 +177,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json index e4213325..206a9ecb 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json @@ -89,7 +89,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json index e6ad8f39..66516320 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json index e83e47d1..e9b498b6 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json index 7a94fbd0..85b334f8 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json index 5e0bc21a..c3bd95ba 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json @@ -123,7 +123,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json index 98f919e1..e109fa6c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json @@ -95,7 +95,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json index 01b844f2..7bdad7f1 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json @@ -82,7 +82,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json index 316780b1..3bdf7341 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json @@ -88,7 +88,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json index b4827b54..74654709 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json @@ -103,7 +103,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json index bddfdf51..f096985f 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json @@ -30,7 +30,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -59,7 +59,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json index e64bb2ab..36851268 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json index b2b24a1f..60582706 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json index 36b71de4..c04445cf 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -69,7 +69,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json index c1c0cae8..bc3ff9cf 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json index 0d427fdd..809d6966 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -69,7 +69,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json index daabccc9..ca43b71b 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -75,7 +75,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json index b64e04fc..a80b3f0a 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json index f629f0de..bcd21bce 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -67,7 +67,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json index 3f4b8028..42429620 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -162,7 +162,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json index 92079b20..c2f0c315 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json index e7182699..f8e972f8 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json index 65d685c3..b6cd32da 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json index 1153a98a..07445ad7 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -183,7 +183,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json index d07d72b1..a72d44fe 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json index 03386ece..ce318c03 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json @@ -103,7 +103,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -181,7 +181,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json index d9e5b2f1..96600cdd 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -118,7 +118,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json index dee70c25..5b0d1942 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json index c28d77a6..ff9ff74a 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json index 985f630a..18832797 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json @@ -81,7 +81,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -155,7 +155,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json index bbc8406e..2049dcd8 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json index a1a0fe0c..d497bf5b 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -138,7 +138,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json index e22e13bc..fff610d0 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json @@ -85,7 +85,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -193,7 +193,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json index 74b26e2e..049d8732 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json index e1750dd6..84b59063 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json @@ -76,7 +76,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -138,7 +138,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json index dbe862e8..61c3e462 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json @@ -49,7 +49,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -104,7 +104,7 @@ } }, "names": [ - "count(DISTINCT lineitem_small.l_extendedprice)" + "count(DISTINCT lineitem.l_extendedprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json index e2bca902..fd81e428 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json @@ -37,7 +37,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json index 4c37b5f6..7b35ee2c 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json index 9c678af9..822e8b61 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json @@ -26,7 +26,7 @@ }, "namedTable": { "names": [ - "region_small" + "region" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json index e7bd69eb..f2193df7 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json index 3ad6d75f..88668823 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json @@ -78,7 +78,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json index 38fc05d4..5ada1287 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json @@ -44,7 +44,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -82,7 +82,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json index eec4e67e..08e55c32 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json @@ -148,7 +148,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json index eeff096e..16ab4aff 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json @@ -149,7 +149,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json index 6be7845c..25668846 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json @@ -49,7 +49,7 @@ }, "namedTable": { "names": [ - "region_small" + "region" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json index 4b13193b..21b956dd 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json @@ -134,7 +134,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json index d8f9317c..a4663d38 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json @@ -174,7 +174,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json index f05f3485..dccf7af8 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json @@ -101,7 +101,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -192,7 +192,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json index fcd14182..27fd40ab 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json @@ -110,7 +110,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json index 290f79f3..17768b11 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json @@ -100,7 +100,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json index 4a5f4ee9..1ee39d4c 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json @@ -104,7 +104,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json index dc2f71e7..6d8393ce 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json @@ -34,7 +34,7 @@ } }, "namedTable": { - "names": ["REGION_SMALL"] + "names": ["REGION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json index 9b963d28..0e45d3a9 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json @@ -94,7 +94,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json index 7f9cc411..aa146e52 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json @@ -94,7 +94,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json index c4890931..744d735b 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json @@ -22,7 +22,7 @@ }, "namedTable": { "names": [ - "partsupp_small" + "partsupp" ] } } diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json index f921e268..cd5ec310 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json @@ -57,7 +57,7 @@ }, "namedTable": { "names": [ - "partsupp_small" + "partsupp" ] } } diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json index 0ec89a82..8c6af347 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json @@ -44,7 +44,7 @@ } }, "namedTable": { - "names": ["PARTSUPP_SMALL"] + "names": ["PARTSUPP"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json index ba116058..2d7c0a1e 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -85,7 +85,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json index 27768735..613cdb57 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -78,7 +78,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json index 18bf2fd6..73d8c6bf 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json @@ -30,7 +30,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -56,7 +56,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json index 29c12bc1..223098b9 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json @@ -34,7 +34,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -60,7 +60,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json index 7c7dc3c6..7993fcbd 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json @@ -86,7 +86,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -178,7 +178,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json index 1811bef1..31f434a2 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -144,7 +144,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json index 0fb4f4e7..3ff3cca2 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -144,7 +144,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json index cfe14d77..fd50a229 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json @@ -82,7 +82,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -146,7 +146,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json index e14f4412..a836a4f9 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json @@ -66,7 +66,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -137,7 +137,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json index 1a9392b4..1da990a8 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -115,7 +115,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json index 078609c3..2020ecc6 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -115,7 +115,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json index 46284bb1..3a6f9194 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json @@ -75,7 +75,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/test_aggregate_relation.py b/substrait_consumer/tests/functional/relations/test_aggregate_relation.py index 6e0a325f..fa987d7d 100644 --- a/substrait_consumer/tests/functional/relations/test_aggregate_relation.py +++ b/substrait_consumer/tests/functional/relations/test_aggregate_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_aggregate_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_aggregate_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_aggregate_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_aggregate_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_aggregate_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_aggregate_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_ddl_relation.py b/substrait_consumer/tests/functional/relations/test_ddl_relation.py index 62a42ebc..c1bb3301 100644 --- a/substrait_consumer/tests/functional/relations/test_ddl_relation.py +++ b/substrait_consumer/tests/functional/relations/test_ddl_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -57,7 +57,8 @@ def test_producer_ddl_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -68,7 +69,8 @@ def test_producer_ddl_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -83,7 +85,8 @@ def test_consumer_ddl_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -94,7 +97,8 @@ def test_consumer_ddl_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, diff --git a/substrait_consumer/tests/functional/relations/test_fetch_relation.py b/substrait_consumer/tests/functional/relations/test_fetch_relation.py index df500570..e4e76a99 100644 --- a/substrait_consumer/tests/functional/relations/test_fetch_relation.py +++ b/substrait_consumer/tests/functional/relations/test_fetch_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_fetch_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_fetch_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_fetch_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_fetch_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_fetch_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_fetch_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_filter_relation.py b/substrait_consumer/tests/functional/relations/test_filter_relation.py index 8828dc50..2c141bcb 100644 --- a/substrait_consumer/tests/functional/relations/test_filter_relation.py +++ b/substrait_consumer/tests/functional/relations/test_filter_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -54,7 +54,8 @@ def test_producer_filter_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -65,7 +66,8 @@ def test_producer_filter_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_filter_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -91,7 +94,8 @@ def test_consumer_filter_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -104,7 +108,8 @@ def test_generate_filter_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -113,6 +118,7 @@ def test_generate_filter_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_join_relation.py b/substrait_consumer/tests/functional/relations/test_join_relation.py index ab95b9a8..14ed6827 100644 --- a/substrait_consumer/tests/functional/relations/test_join_relation.py +++ b/substrait_consumer/tests/functional/relations/test_join_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -73,7 +73,8 @@ def test_producer_join_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -84,7 +85,8 @@ def test_producer_join_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -99,7 +101,8 @@ def test_consumer_join_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -110,7 +113,8 @@ def test_consumer_join_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -123,7 +127,8 @@ def test_generate_join_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -132,6 +137,7 @@ def test_generate_join_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_project_relation.py b/substrait_consumer/tests/functional/relations/test_project_relation.py index b3a4354d..21f91bf0 100644 --- a/substrait_consumer/tests/functional/relations/test_project_relation.py +++ b/substrait_consumer/tests/functional/relations/test_project_relation.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_project_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -80,7 +81,8 @@ def test_producer_project_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -95,7 +97,8 @@ def test_consumer_project_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -106,7 +109,8 @@ def test_consumer_project_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -119,7 +123,8 @@ def test_generate_project_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -128,6 +133,7 @@ def test_generate_project_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_read_relation.py b/substrait_consumer/tests/functional/relations/test_read_relation.py index d9a90cff..dfc8ccee 100644 --- a/substrait_consumer/tests/functional/relations/test_read_relation.py +++ b/substrait_consumer/tests/functional/relations/test_read_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_read_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_read_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_read_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_read_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_read_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_read_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_set_relation.py b/substrait_consumer/tests/functional/relations/test_set_relation.py index 9d9ac695..0eafc20e 100644 --- a/substrait_consumer/tests/functional/relations/test_set_relation.py +++ b/substrait_consumer/tests/functional/relations/test_set_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_set_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_set_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_set_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_set_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_set_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_set_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_sort_relation.py b/substrait_consumer/tests/functional/relations/test_sort_relation.py index 3309e0b3..cb36a891 100644 --- a/substrait_consumer/tests/functional/relations/test_sort_relation.py +++ b/substrait_consumer/tests/functional/relations/test_sort_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_sort_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_sort_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_sort_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_sort_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_sort_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_sort_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_write_relation.py b/substrait_consumer/tests/functional/relations/test_write_relation.py index 6df19481..23daa8a4 100644 --- a/substrait_consumer/tests/functional/relations/test_write_relation.py +++ b/substrait_consumer/tests/functional/relations/test_write_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -55,7 +55,8 @@ def test_producer_write_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_write_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -81,7 +83,8 @@ def test_consumer_write_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -92,7 +95,8 @@ def test_consumer_write_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql index 72c85fd7..ad2c2ee4 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql @@ -10,7 +10,7 @@ SELECT avg(l_discount) AS avg_disc, count(*) AS count_order FROM - '{}' + '{lineitem}' WHERE l_shipdate <= date '1998-12-01' - interval '120' day GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql index 0e03d7ba..6cfd4374 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql @@ -8,7 +8,7 @@ SELECT c_phone, c_comment FROM - '{}', '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}', '{nation}' WHERE c_custkey = o_custkey AND l_orderkey = o_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql index 8cc37a37..7480e9dd 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql @@ -2,9 +2,9 @@ SELECT ps.ps_partkey, sum(ps.ps_supplycost * ps.ps_availqty) AS "value" FROM - '{}' ps, - '{}' s, - '{}' n + '{partsupp}' ps, + '{supplier}' s, + '{nation}' n WHERE ps.ps_suppkey = s.s_suppkey AND s.s_nationkey = n.n_nationkey @@ -15,9 +15,9 @@ GROUP BY SELECT sum(ps.ps_supplycost * ps.ps_availqty) * 0.0001000000 FROM - '{}' ps, - '{}' s, - '{}' n + '{partsupp}' ps, + '{supplier}' s, + '{nation}' n WHERE ps.ps_suppkey = s.s_suppkey AND s.s_nationkey = n.n_nationkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql index 4e0fb885..35f00d96 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql @@ -15,7 +15,7 @@ SELECT 0 END) AS low_line_count FROM - '{}', '{}' + '{orders}', '{lineitem}' WHERE o_orderkey = l_orderkey AND l_shipmode IN ('MAIL', 'SHIP') diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql index 314d0189..e69d6192 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql @@ -6,8 +6,8 @@ FROM ( c_custkey, count(o_orderkey) FROM - '{}' - LEFT OUTER JOIN '{}' ON c_custkey = o_custkey + '{customer}' + LEFT OUTER JOIN '{orders}' ON c_custkey = o_custkey AND o_comment NOT LIKE '%special%requests%' GROUP BY c_custkey) AS c_orders (c_custkey, diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql index 50339498..e8074e0d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql @@ -6,7 +6,7 @@ SELECT 0 END) / sum(l_extendedprice * (1 - l_discount)) AS promo_revenue FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE l_partkey = p_partkey AND l_shipdate >= date '1995-09-01' diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql index b463ec3b..1fae40c6 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql @@ -5,13 +5,13 @@ SELECT s_phone, total_revenue FROM - '{}', + '{supplier}', ( SELECT l_suppkey AS supplier_no, sum(l_extendedprice * (1 - l_discount)) AS total_revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1996-01-01' AS date) AND l_shipdate < CAST('1996-04-01' AS date) @@ -27,7 +27,7 @@ WHERE l_suppkey AS supplier_no, sum(l_extendedprice * (1 - l_discount)) AS total_revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1996-01-01' AS date) AND l_shipdate < CAST('1996-04-01' AS date) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql index 0fe504fb..0ad1acc3 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql @@ -4,7 +4,7 @@ SELECT p_size, count(DISTINCT ps_suppkey) AS supplier_cnt FROM - '{}', '{}' + '{partsupp}', '{part}' WHERE p_partkey = ps_partkey AND p_brand <> 'Brand#45' @@ -14,7 +14,7 @@ WHERE SELECT s_suppkey FROM - '{}' + '{supplier}' WHERE s_comment LIKE '%Customer%Complaints%') GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql index 0ad02bb3..497fa221 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice) / 7.0 AS avg_yearly FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE p_partkey = l_partkey AND p_brand = 'Brand#23' @@ -10,6 +10,6 @@ WHERE SELECT 0.2 * avg(l_quantity) FROM - '{}' + '{lineitem}' WHERE l_partkey = p_partkey); diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql index 86a6727d..84f4f354 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql @@ -6,13 +6,13 @@ SELECT o_totalprice, sum(l_quantity) FROM - '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}' WHERE o_orderkey IN ( SELECT l_orderkey FROM - '{}' + '{lineitem}' GROUP BY l_orderkey HAVING diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql index d5fc6f2b..bf3342fb 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice * (1 - l_discount)) AS revenue FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE (p_partkey = l_partkey AND p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql index 1fa1ff23..7fd14096 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql @@ -8,7 +8,7 @@ SELECT s_phone, s_comment FROM - '{}', '{}', '{}', '{}', '{}' + '{part}', '{supplier}', '{partsupp}', '{nation}', '{region}' WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey @@ -21,7 +21,7 @@ WHERE SELECT min(ps_supplycost) FROM - '{}', '{}', '{}', '{}' + '{partsupp}', '{supplier}', '{nation}', '{region}' WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql index 34cc465e..cbd45d2d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql @@ -2,26 +2,26 @@ SELECT s_name, s_address FROM - '{}', '{}' + '{supplier}', '{nation}' WHERE s_suppkey IN ( SELECT ps_suppkey FROM - '{}' + '{partsupp}' WHERE ps_partkey IN ( SELECT p_partkey FROM - '{}' + '{part}' WHERE p_name LIKE 'forest%') AND ps_availqty > ( SELECT 0.5 * sum(l_quantity) FROM - '{}' + '{lineitem}' WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql index cb767db3..e8672169 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql @@ -2,7 +2,7 @@ SELECT s_name, count(*) AS numwait FROM - '{}', '{}' l1, '{}', '{}' + '{supplier}', '{lineitem}' l1, '{orders}', '{nation}' WHERE s_suppkey = l1.l_suppkey AND o_orderkey = l1.l_orderkey @@ -12,7 +12,7 @@ WHERE SELECT * FROM - '{}' l2 + '{lineitem}' l2 WHERE l2.l_orderkey = l1.l_orderkey AND l2.l_suppkey <> l1.l_suppkey) @@ -20,7 +20,7 @@ WHERE SELECT * FROM - '{}' l3 + '{lineitem}' l3 WHERE l3.l_orderkey = l1.l_orderkey AND l3.l_suppkey <> l1.l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql index c9e31690..e0fe3f10 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql @@ -7,14 +7,14 @@ FROM ( substring(c_phone FROM 1 FOR 2) AS cntrycode, c_acctbal FROM - '{}' + '{customer}' WHERE substring(c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > ( SELECT avg(c_acctbal) FROM - '{}' + '{customer}' WHERE c_acctbal > 0.00 AND substring(c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17')) @@ -22,7 +22,7 @@ FROM ( SELECT * FROM - '{}' + '{orders}' WHERE o_custkey = c_custkey)) AS custsale GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql index 84877309..3e998043 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql @@ -4,7 +4,7 @@ SELECT o_orderdate, o_shippriority FROM - '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}' WHERE c_mktsegment = 'BUILDING' AND c_custkey = o_custkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql index 376e8d49..ea04a9b5 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql @@ -2,7 +2,7 @@ SELECT o_orderpriority, count(*) AS order_count FROM - '{}' + '{orders}' WHERE o_orderdate >= CAST('1993-07-01' AS date) AND o_orderdate < CAST('1993-10-01' AS date) @@ -10,7 +10,7 @@ WHERE SELECT * FROM - '{}' + '{lineitem}' WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql index 22bb50c7..d12f46cb 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql @@ -2,7 +2,7 @@ SELECT n_name, sum(l_extendedprice * (1 - l_discount)) AS revenue FROM - '{}', '{}', '{}', '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}', '{supplier}', '{nation}', '{region}' WHERE c_custkey = o_custkey AND l_orderkey = o_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql index abe1b225..dc195aa6 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice * l_discount) AS revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1994-01-01' AS date) AND l_shipdate < CAST('1995-01-01' AS date) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql index a48faa50..54c7f8aa 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql @@ -10,7 +10,7 @@ FROM ( extract(year FROM l_shipdate) AS l_year, l_extendedprice * (1 - l_discount) AS volume FROM - '{}', '{}', '{}', '{}', '{}' n1, '{}' n2 + '{supplier}', '{lineitem}', '{orders}', '{customer}', '{nation}' n1, '{nation}' n2 WHERE s_suppkey = l_suppkey AND o_orderkey = l_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql index 6a38bf07..f241b6e9 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql @@ -12,7 +12,7 @@ FROM ( l_extendedprice * (1 - l_discount) AS volume, n2.n_name AS nation FROM - '{}', '{}', '{}', '{}', '{}', '{}' n1, '{}' n2, '{}' + '{part}', '{supplier}', '{lineitem}', '{orders}', '{customer}', '{nation}' n1, '{nation}' n2, '{region}' WHERE p_partkey = l_partkey AND s_suppkey = l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql index 8586f5b9..7093a748 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql @@ -8,7 +8,7 @@ FROM ( extract(year FROM o_orderdate) AS o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount FROM - '{}', '{}', '{}', '{}', '{}', '{}' + '{part}', '{supplier}', '{lineitem}', '{partsupp}', '{orders}', '{nation}' WHERE s_suppkey = l_suppkey AND ps_suppkey = l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json b/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json index d4dea1d0..9ed211fc 100644 --- a/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json +++ b/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json @@ -113,40 +113,26 @@ } }, "baseSchema": { - "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], "struct": { "types": [{ "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { - "i64": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "i64": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "i64": { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "i32": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -163,36 +149,12 @@ "string": { "nullability": "NULLABILITY_REQUIRED" } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } }], "nullability": "NULLABILITY_REQUIRED" } }, "namedTable": { - "names": ["LINEITEM"] + "names": ["CUSTOMER"] } } }, @@ -203,14 +165,14 @@ } }, "baseSchema": { - "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], "struct": { "types": [{ "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -218,23 +180,27 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "i32": { + "decimal": { + "scale": 2, + "precision": 15, "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "date": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } + }, { + "i32": { + "nullability": "NULLABILITY_REQUIRED" + } }, { "string": { "nullability": "NULLABILITY_REQUIRED" @@ -244,7 +210,7 @@ } }, "namedTable": { - "names": ["CUSTOMER"] + "names": ["ORDERS"] } } } @@ -257,7 +223,7 @@ } }, "baseSchema": { - "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], "struct": { "types": [{ "i64": { @@ -268,7 +234,11 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -278,19 +248,49 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "date": { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "i32": { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -302,7 +302,7 @@ } }, "namedTable": { - "names": ["ORDERS"] + "names": ["LINEITEM"] } } } @@ -329,7 +329,7 @@ "selection": { "directReference": { "structField": { - "field": 22 + "field": 6 } }, "rootReference": { @@ -359,7 +359,6 @@ "selection": { "directReference": { "structField": { - "field": 16 } }, "rootReference": { @@ -371,7 +370,7 @@ "selection": { "directReference": { "structField": { - "field": 25 + "field": 9 } }, "rootReference": { @@ -395,6 +394,7 @@ "selection": { "directReference": { "structField": { + "field": 17 } }, "rootReference": { @@ -406,7 +406,7 @@ "selection": { "directReference": { "structField": { - "field": 24 + "field": 8 } }, "rootReference": { @@ -430,7 +430,7 @@ "selection": { "directReference": { "structField": { - "field": 28 + "field": 12 } }, "rootReference": { @@ -470,7 +470,7 @@ "selection": { "directReference": { "structField": { - "field": 10 + "field": 27 } }, "rootReference": { @@ -505,6 +505,7 @@ "selection": { "directReference": { "structField": { + "field": 17 } }, "rootReference": { @@ -514,7 +515,7 @@ "selection": { "directReference": { "structField": { - "field": 28 + "field": 12 } }, "rootReference": { @@ -524,7 +525,7 @@ "selection": { "directReference": { "structField": { - "field": 31 + "field": 15 } }, "rootReference": { @@ -545,7 +546,7 @@ "selection": { "directReference": { "structField": { - "field": 5 + "field": 22 } }, "rootReference": { @@ -586,7 +587,7 @@ "selection": { "directReference": { "structField": { - "field": 6 + "field": 23 } }, "rootReference": { diff --git a/substrait_consumer/tests/integration/queries/tpch_test_cases.py b/substrait_consumer/tests/integration/queries/tpch_test_cases.py index 6579054e..5065702d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_test_cases.py +++ b/substrait_consumer/tests/integration/queries/tpch_test_cases.py @@ -3,211 +3,245 @@ TPCH_QUERY_TESTS = ( { "test_name": "test_tpch_sql_1", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q1.sql"), "substrait_query": get_substrait_plan("query_01_plan.json"), }, { "test_name": "test_tpch_sql_2", - "file_names": [ - "part.parquet", - "supplier.parquet", - "partsupp.parquet", - "nation.parquet", - "region.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "partsupp": "partsupp.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q2.sql"), "substrait_query": get_substrait_plan("query_02_plan.json"), }, { "test_name": "test_tpch_sql_3", - "file_names": [ - "lineitem.parquet", - "customer.parquet", - "orders.parquet", - ], + "local_files": {}, + "named_tables": { + "lineitem": "lineitem.parquet", + "customer": "customer.parquet", + "orders": "orders.parquet", + }, "sql_query": get_sql("q3.sql"), "substrait_query": get_substrait_plan("query_03_plan.json"), }, { "test_name": "test_tpch_sql_4", - "file_names": ["orders.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet", "lineitem": "lineitem.parquet"}, "sql_query": get_sql("q4.sql"), "substrait_query": get_substrait_plan("query_04_plan.json"), }, { "test_name": "test_tpch_sql_5", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q5.sql"), "substrait_query": get_substrait_plan("query_05_plan.json"), }, { "test_name": "test_tpch_sql_6", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q6.sql"), "substrait_query": get_substrait_plan("query_06_plan.json"), }, { "test_name": "test_tpch_sql_7", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "customer.parquet", - "nation.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "customer": "customer.parquet", + "nation": "nation.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q7.sql"), "substrait_query": get_substrait_plan("query_07_plan.json"), }, { "test_name": "test_tpch_sql_8", - "file_names": [ - "part.parquet", - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "customer.parquet", - "nation.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "customer": "customer.parquet", + "nation": "nation.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q8.sql"), "substrait_query": get_substrait_plan("query_08_plan.json"), }, { "test_name": "test_tpch_sql_9", - "file_names": [ - "part.parquet", - "supplier.parquet", - "lineitem.parquet", - "partsupp.parquet", - "orders.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "partsupp": "partsupp.parquet", + "orders": "orders.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q9.sql"), "substrait_query": get_substrait_plan("query_09_plan.json"), }, { "test_name": "test_tpch_sql_10", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q10.sql"), "substrait_query": get_substrait_plan("query_10_plan.json"), }, { "test_name": "test_tpch_sql_11", - "file_names": [ - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q11.sql"), "substrait_query": get_substrait_plan("query_11_plan.json"), }, { "test_name": "test_tpch_sql_12", - "file_names": ["orders.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet", "lineitem": "lineitem.parquet"}, "sql_query": get_sql("q12.sql"), "substrait_query": get_substrait_plan("query_12_plan.json"), }, { "test_name": "test_tpch_sql_13", - "file_names": ["customer.parquet", "orders.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet", "orders": "orders.parquet"}, "sql_query": get_sql("q13.sql"), "substrait_query": get_substrait_plan("query_13_plan.json"), }, { "test_name": "test_tpch_sql_14", - "file_names": ["lineitem.parquet", "part.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet", "part": "part.parquet"}, "sql_query": get_sql("q14.sql"), "substrait_query": get_substrait_plan("query_14_plan.json"), }, { "test_name": "test_tpch_sql_15", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q15.sql"), "substrait_query": get_substrait_plan("query_15_plan.json"), }, { "test_name": "test_tpch_sql_16", - "file_names": ["partsupp.parquet", "part.parquet", "supplier.parquet"], + "local_files": {}, + "named_tables": { + "partsupp": "partsupp.parquet", + "part": "part.parquet", + "supplier": "supplier.parquet", + }, "sql_query": get_sql("q16.sql"), "substrait_query": get_substrait_plan("query_16_plan.json"), }, { "test_name": "test_tpch_sql_17", - "file_names": ["lineitem.parquet", "part.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": { + "lineitem": "lineitem.parquet", + "part": "part.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q17.sql"), "substrait_query": get_substrait_plan("query_17_plan.json"), }, { "test_name": "test_tpch_sql_18", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q18.sql"), "substrait_query": get_substrait_plan("query_18_plan.json"), }, { "test_name": "test_tpch_sql_19", - "file_names": ["lineitem.parquet", "part.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet", "part": "part.parquet"}, "sql_query": get_sql("q19.sql"), "substrait_query": get_substrait_plan("query_19_plan.json"), }, { "test_name": "test_tpch_sql_20", - "file_names": [ - "supplier.parquet", - "nation.parquet", - "partsupp.parquet", - "part.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "partsupp": "partsupp.parquet", + "part": "part.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q20.sql"), "substrait_query": get_substrait_plan("query_20_plan.json"), }, { "test_name": "test_tpch_sql_21", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "nation.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "nation": "nation.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q21.sql"), "substrait_query": get_substrait_plan("query_21_plan.json"), }, { "test_name": "test_tpch_sql_22", - "file_names": ["customer.parquet", "customer.parquet", "orders.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "customer": "customer.parquet", + "orders": "orders.parquet", + }, "sql_query": get_sql("q22.sql"), "substrait_query": get_substrait_plan("query_22_plan.json"), }, diff --git a/substrait_consumer/tests/integration/test_acero_tpch.py b/substrait_consumer/tests/integration/test_acero_tpch.py index 7f512b4c..8c31d8d2 100644 --- a/substrait_consumer/tests/integration/test_acero_tpch.py +++ b/substrait_consumer/tests/integration/test_acero_tpch.py @@ -3,12 +3,11 @@ import duckdb import pyarrow as pa import pytest -from pyarrow import compute from substrait_consumer.common import SubstraitUtils from substrait_consumer.consumers.acero_consumer import AceroConsumer -from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.verification import verify_equals from substrait_consumer.tests.integration.queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -26,7 +25,7 @@ def setup_teardown_class(request): cls.db_connection = duckdb.connect() cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") - cls.duckdb_consumer = DuckDBConsumer(cls.db_connection) + cls.duckdb_producer = DuckDBProducer(cls.db_connection) cls.acero_consumer = AceroConsumer() cls.utils = SubstraitUtils() @@ -38,7 +37,8 @@ def setup_teardown_class(request): def test_isthmus_substrait_plan( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -56,8 +56,10 @@ def test_isthmus_substrait_plan( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. substrait_query: @@ -68,7 +70,7 @@ def test_isthmus_substrait_plan( # Format the substrait query to include the parquet file paths. # Calculate the result of running the substrait query plan. consumer = AceroConsumer() - consumer.setup(self.db_connection, file_names) + consumer.setup(self.db_connection, local_files, named_tables) subtrait_query_result_tb = consumer.run_substrait_query( substrait_query @@ -77,8 +79,8 @@ def test_isthmus_substrait_plan( # Reformat the sql query to be used by duck db by inserting all the # parquet filepaths where the table names should be. # Calculate results to verify against by running the SQL query on DuckDB - sql_query = self.utils.format_sql_query(sql_query, file_names) - duckdb_query_result_tb = self.db_connection.query(f"{sql_query}").arrow() + sql_query = self.duckdb_producer.format_sql(sql_query) + duckdb_query_result_tb = self.duckdb_producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_query_result_tb.column_names] @@ -115,13 +117,14 @@ def test_isthmus_substrait_plan( def test_duckdb_substrait_plan( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, ) -> None: """ - 1. Load all the parquet files into DuckDB as separate tables. + 1. Load all the parquet files into DuckDB as separate named_tables. 2. Format the SQL query to work with DuckDB by inserting all the table names. 3. Execute the SQL on DuckDB. 4. Produce the substrait plan with duckdb @@ -132,28 +135,24 @@ def test_duckdb_substrait_plan( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.duckdb_consumer.load_tables_from_parquet( - file_names - ) - - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.duckdb_producer.setup(self.db_connection, local_files, named_tables) + self.acero_consumer.setup(self.db_connection, local_files, named_tables) # Convert the SQL into a substrait query plan - duckdb_substrait_plan = self.db_connection.get_substrait_json(sql_query) - proto_bytes = duckdb_substrait_plan.fetchone()[0] + proto_bytes = self.duckdb_producer.produce_substrait(sql_query) # Run the duckdb produced substrait plan against Acero subtrait_query_result_tb = self.acero_consumer.run_substrait_query(proto_bytes) # Calculate results to verify against by running the SQL query on DuckDB - duckdb_sql_result_tb = self.db_connection.query(f"{sql_query}").arrow() + duckdb_sql_result_tb = self.duckdb_producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names] diff --git a/substrait_consumer/tests/integration/test_duckdb_tpch.py b/substrait_consumer/tests/integration/test_duckdb_tpch.py index 8d291cda..c143feaf 100644 --- a/substrait_consumer/tests/integration/test_duckdb_tpch.py +++ b/substrait_consumer/tests/integration/test_duckdb_tpch.py @@ -3,6 +3,7 @@ from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.verification import verify_equals from .queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -22,6 +23,7 @@ def setup_teardown_class(request): cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") cls.consumer = DuckDBConsumer(cls.db_connection) + cls.producer = DuckDBProducer(cls.db_connection) yield @@ -31,13 +33,14 @@ def setup_teardown_class(request): def test_substrait_query( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, ) -> None: """ - 1. Load all the parquet files into DuckDB as separate tables. + 1. Load all the parquet files into DuckDB as separate named_tables. 2. Format the SQL query to work with DuckDB by inserting all the table names. 3. Execute the SQL on DuckDB. 4. Run the substrait query plan. @@ -47,28 +50,23 @@ def test_substrait_query( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ - - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.consumer.load_tables_from_parquet( - file_names - ) - - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.consumer.setup(self.db_connection, local_files, named_tables) + self.producer.setup(self.db_connection, local_files, named_tables) # Convert the SQL into a substrait query plan and run the plan. - substrait_plan = self.db_connection.get_substrait_json(sql_query) - proto_bytes = substrait_plan.fetchone()[0] + proto_bytes = self.producer.produce_substrait(sql_query) subtrait_query_result_tb = self.consumer.run_substrait_query(proto_bytes) # Calculate results to verify against by running the SQL query on DuckDB - duckdb_sql_result_tb = self.db_connection.query(f"{sql_query}").arrow() + duckdb_sql_result_tb = self.producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names] diff --git a/substrait_consumer/tests/integration/test_tpch_plans_valid.py b/substrait_consumer/tests/integration/test_tpch_plans_valid.py index f2735593..224b1b57 100644 --- a/substrait_consumer/tests/integration/test_tpch_plans_valid.py +++ b/substrait_consumer/tests/integration/test_tpch_plans_valid.py @@ -6,6 +6,7 @@ from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.producers.isthmus_producer import IsthmusProducer from .queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -27,6 +28,7 @@ def setup_teardown_class(request): cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") cls.duckdb_consumer = DuckDBConsumer(cls.db_connection) + cls.duckdb_producer = DuckDBProducer(cls.db_connection) yield @@ -37,7 +39,8 @@ def test_isthmus_substrait_plan_generation( self, snapshot, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -46,8 +49,7 @@ def test_isthmus_substrait_plan_generation( Generate the substrait plans using Isthmus. """ producer = IsthmusProducer() - producer.set_db_connection(self.db_connection) - sql_query = producer.format_sql(sql_query, file_names) + producer.setup(self.db_connection, local_files, named_tables) substrait_query = producer.produce_substrait(sql_query) snapshot.snapshot_dir = PLAN_SNAPSHOT_DIR @@ -58,7 +60,8 @@ def test_isthmus_substrait_plan_generation( def test_isthmus_substrait_plans_valid( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -86,7 +89,8 @@ def test_isthmus_substrait_plans_valid( def test_duckdb_substrait_plans_valid( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -95,8 +99,10 @@ def test_duckdb_substrait_plans_valid( Run the Duckdb generated substrait plans through the substrait validator. Parameters: - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ @@ -112,14 +118,8 @@ def test_duckdb_substrait_plans_valid( # too few field names config.override_diagnostic_level(4003, "error", "info") - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.duckdb_consumer.load_tables_from_parquet( - file_names - ) - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.duckdb_producer.setup(self.db_connection, local_files, named_tables) - duckdb_substrait_plan = self.db_connection.get_substrait(sql_query) - proto_bytes = duckdb_substrait_plan.fetchone()[0] + proto_bytes = self.duckdb_producer.produce_substrait(sql_query) sv.check_plan_valid(proto_bytes, config)