From d4f2aa741ce2150adc7a7ca0571bccc580024d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ingo=20M=C3=BCller?= Date: Tue, 12 Nov 2024 16:43:15 +0100 Subject: [PATCH] feat: overhaul SQL string reformatting and Producer/Consumer interfaces (#128) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR does two someone independent changes that makes writing test cases simpler and more robust: an overhaul of the `Producer` and `Consumer` interfaces and a change in how the format arguments of named tables and local files in SQL strings are specified. The two changes have been tied together into one PR because changing the arguments alone proved to be difficult due to the previous brittleness of the two interfaces. The change related to the format arguments consists in the following: instead of specifying a list of `local_files` for each test case, each of which would then either be loaded into a table whose name is derived from the corresponding file name *or* would be processed as is if the SQL string contained the magic works `read_parquet`, local files and named tables are now specified independently of each other and both are specified as a dict: the value of each entry corresponds to the placeholder used in the format string (such as '{customer}') and the value consists of the local file path (such as `customer_small.parquet`). For named tables, the idea is that the corresponding system loads the local file into a table with the given name; local files are processed directly. Since the definition of test cases is used to create parametrized test fixtures, this change involves all test functions uses these parametrized fixtures. As another consequence of this change, some plan snapshots change: some table names now don't have the `_small` suffix anymore because the table name is specified explicitly rather than being derived from the file name and in one case the order of the input tables in the `FROM` clause has changed (the new order corresponds to the one in the official TPC-H query wheras the previous order didn't). The change related to the `Producer` and `Consumer` interfaces simplifies how consumers are created and used. First, both interfaces now have a `setup` method implemented by the interface which takes care of expanding the relative file paths into absolute ones. This removes the need to do that expansion in various other places. Similarly, `Producer.format_sql` takes care of replacing format arguments such that derived classes don't have to. Again in the same spirit, `Producer.produce_substrait` also takes care of formatting the SQL query such that call sites can directly call that function instead of having to remember to reformat the SQL string beforehand. The PR also replaces some direct usages of the DuckDB connection with more high-level usages of `DuckDBConsumer`, such that the encapsulated functionaly described above can be used. To that aim, that class also gets a new method `run_sql_query`. Finally, the PR also removes some duplicate or unused code related to loading local files and formattting queries. I have manually checked and there are now tests that change their fail/pass status compared to the current `main`. Signed-off-by: Ingo Müller --- README.md | 59 ++--- substrait_consumer/common.py | 63 +---- .../consumers/acero_consumer.py | 25 +- substrait_consumer/consumers/consumer.py | 31 ++- .../consumers/datafusion_consumer.py | 24 +- .../consumers/duckdb_consumer.py | 41 +-- substrait_consumer/context.py | 8 +- .../functional/aggregate_relation_configs.py | 33 ++- .../functional/approximation_configs.py | 6 +- .../functional/arithmetic_configs.py | 90 ++++--- .../functional/arithmetic_decimal_configs.py | 27 +- .../functional/boolean_configs.py | 18 +- substrait_consumer/functional/common.py | 45 ++-- .../functional/comparison_configs.py | 42 ++-- .../functional/datetime_configs.py | 24 +- .../functional/ddl_relation_configs.py | 21 +- .../functional/fetch_relation_configs.py | 6 +- .../functional/filter_relation_configs.py | 36 ++- .../functional/join_relation_configs.py | 78 +++++- .../functional/logarithmic_configs.py | 12 +- .../functional/project_relation_configs.py | 24 +- .../sql/approximation_functions_sql.py | 4 +- .../sql/arithmetic_demical_functions_sql.py | 18 +- .../queries/sql/arithmetic_functions_sql.py | 56 ++--- .../queries/sql/comparison_functions_sql.py | 12 +- .../queries/sql/datetime_functions_sql.py | 14 +- .../queries/sql/logarithmic_functions_sql.py | 8 +- .../sql/relations/aggregate_relations.py | 22 +- .../queries/sql/relations/ddl_relations.py | 12 +- .../queries/sql/relations/fetch_relations.py | 4 +- .../queries/sql/relations/filter_relations.py | 24 +- .../queries/sql/relations/join_relations.py | 52 ++-- .../sql/relations/project_relations.py | 16 +- .../queries/sql/relations/read_relations.py | 4 +- .../queries/sql/relations/set_relations.py | 16 +- .../queries/sql/relations/sort_relations.py | 16 +- .../queries/sql/relations/write_relations.py | 6 +- .../queries/sql/rounding_functions_sql.py | 6 +- .../queries/sql/string_functions_sql.py | 50 ++-- .../functional/read_relation_configs.py | 15 +- .../functional/rounding_configs.py | 9 +- .../functional/set_relation_configs.py | 24 +- .../functional/sort_relation_configs.py | 24 +- .../functional/string_configs.py | 75 ++++-- .../functional/write_relation_configs.py | 9 +- .../producers/datafusion_producer.py | 35 ++- .../producers/duckdb_producer.py | 32 +-- substrait_consumer/producers/ibis_producer.py | 19 +- .../producers/isthmus_producer.py | 31 +-- substrait_consumer/producers/producer.py | 165 ++++++++++-- .../tests/adhoc/test_adhoc_expression.py | 11 +- .../test_approximation_functions.py | 20 +- .../test_arithmetic_decimal_functions.py | 20 +- .../test_arithmetic_functions.py | 20 +- .../test_boolean_functions.py | 20 +- .../test_comparison_functions.py | 20 +- .../test_datetime_functions.py | 20 +- .../test_logarithmic_functions.py | 20 +- .../test_rounding_functions.py | 20 +- .../test_string_functions.py | 20 +- .../test_substrait_function_names.py | 94 ++++--- .../aggregate_in_subquery_plan.json | 4 +- .../aggregate_with_computation_plan.json | 4 +- .../aggregate_with_group_by_plan.json | 2 +- .../aggregate_with_group_by_rollup_plan.json | 2 +- .../aggregate_with_grouping_set_plan.json | 4 +- .../computation_between_aggregates_plan.json | 4 +- .../compute_within_aggregate_plan.json | 4 +- .../multiple_measure_aggregate_plan.json | 8 +- .../single_measure_aggregate_plan.json | 4 +- .../aggregate_in_subquery_plan.json | 4 +- .../aggregate_with_computation_plan.json | 2 +- .../aggregate_with_group_by_cube_plan.json | 2 +- .../aggregate_with_group_by_plan.json | 2 +- .../aggregate_with_group_by_rollup_plan.json | 2 +- .../aggregate_with_grouping_set_plan.json | 2 +- .../computation_between_aggregates_plan.json | 2 +- .../compute_within_aggregate_plan.json | 2 +- .../multiple_measure_aggregate_plan.json | 2 +- .../single_measure_aggregate_plan.json | 2 +- .../aggregate_in_subquery_plan.json | 4 +- .../aggregate_with_computation_plan.json | 2 +- .../aggregate_with_group_by_cube_plan.json | 2 +- .../aggregate_with_group_by_plan.json | 2 +- .../aggregate_with_group_by_rollup_plan.json | 2 +- .../aggregate_with_grouping_set_plan.json | 2 +- .../computation_between_aggregates_plan.json | 2 +- .../compute_within_aggregate_plan.json | 2 +- .../multiple_measure_aggregate_plan.json | 2 +- .../single_measure_aggregate_plan.json | 2 +- .../DataFusionProducer/cross_join_plan.json | 4 +- .../DataFusionProducer/full_join_plan.json | 4 +- .../DataFusionProducer/inner_join_plan.json | 4 +- .../left_anti_join_plan.json | 4 +- .../DataFusionProducer/left_join_plan.json | 4 +- .../left_semi_join_plan.json | 4 +- .../right_anti_join_plan.json | 4 +- .../DataFusionProducer/right_join_plan.json | 4 +- .../right_semi_join_plan.json | 4 +- .../DuckDBProducer/cross_join_plan.json | 4 +- .../DuckDBProducer/full_join_plan.json | 4 +- .../DuckDBProducer/inner_join_plan.json | 4 +- .../DuckDBProducer/left_join_plan.json | 4 +- .../DuckDBProducer/left_semi_join_plan.json | 4 +- .../DuckDBProducer/right_join_plan.json | 4 +- .../DuckDBProducer/right_semi_join_plan.json | 4 +- .../IsthmusProducer/cross_join_plan.json | 4 +- .../IsthmusProducer/full_join_plan.json | 4 +- .../IsthmusProducer/inner_join_plan.json | 4 +- .../IsthmusProducer/left_anti_join_plan.json | 4 +- .../IsthmusProducer/left_join_plan.json | 4 +- .../IsthmusProducer/left_semi_join_plan.json | 4 +- .../IsthmusProducer/right_anti_join_plan.json | 4 +- .../IsthmusProducer/right_join_plan.json | 4 +- .../IsthmusProducer/right_semi_join_plan.json | 4 +- .../count_distinct_in_project_plan.json | 4 +- .../distinct_in_project_plan.json | 2 +- .../extended_project_plan.json | 2 +- .../project_all_col_plan.json | 2 +- .../project_multi_col_plan.json | 2 +- .../project_single_col_plan.json | 2 +- .../subquery_in_project_plan.json | 4 +- .../count_distinct_in_project_plan.json | 2 +- .../DuckDBProducer/extended_project_plan.json | 2 +- .../DuckDBProducer/project_all_col_plan.json | 2 +- .../project_multi_col_plan.json | 2 +- .../project_single_col_plan.json | 2 +- .../subquery_in_project_plan.json | 4 +- .../count_distinct_in_project_plan.json | 2 +- .../distinct_in_project_plan.json | 2 +- .../extended_project_plan.json | 2 +- .../IsthmusProducer/project_all_col_plan.json | 2 +- .../project_multi_col_plan.json | 2 +- .../project_single_col_plan.json | 2 +- .../read_named_table_plan.json | 2 +- .../DuckDBProducer/read_named_table_plan.json | 2 +- .../read_named_table_plan.json | 2 +- .../DataFusionProducer/except_plan.json | 4 +- .../DataFusionProducer/intersect_plan.json | 4 +- .../DataFusionProducer/union_all_plan.json | 4 +- .../union_distinct_plan.json | 4 +- .../DuckDBProducer/except_plan.json | 4 +- .../DuckDBProducer/intersect_plan.json | 4 +- .../DuckDBProducer/union_all_plan.json | 4 +- .../DuckDBProducer/union_distinct_plan.json | 4 +- .../IsthmusProducer/except_plan.json | 4 +- .../IsthmusProducer/intersect_plan.json | 4 +- .../IsthmusProducer/union_all_plan.json | 4 +- .../IsthmusProducer/union_distinct_plan.json | 4 +- .../relations/test_aggregate_relation.py | 20 +- .../functional/relations/test_ddl_relation.py | 14 +- .../relations/test_fetch_relation.py | 20 +- .../relations/test_filter_relation.py | 20 +- .../relations/test_join_relation.py | 20 +- .../relations/test_project_relation.py | 20 +- .../relations/test_read_relation.py | 20 +- .../functional/relations/test_set_relation.py | 20 +- .../relations/test_sort_relation.py | 20 +- .../relations/test_write_relation.py | 14 +- .../tests/integration/queries/tpch_sql/q1.sql | 2 +- .../integration/queries/tpch_sql/q10.sql | 2 +- .../integration/queries/tpch_sql/q11.sql | 12 +- .../integration/queries/tpch_sql/q12.sql | 2 +- .../integration/queries/tpch_sql/q13.sql | 4 +- .../integration/queries/tpch_sql/q14.sql | 2 +- .../integration/queries/tpch_sql/q15.sql | 6 +- .../integration/queries/tpch_sql/q16.sql | 4 +- .../integration/queries/tpch_sql/q17.sql | 4 +- .../integration/queries/tpch_sql/q18.sql | 4 +- .../integration/queries/tpch_sql/q19.sql | 2 +- .../tests/integration/queries/tpch_sql/q2.sql | 4 +- .../integration/queries/tpch_sql/q20.sql | 8 +- .../integration/queries/tpch_sql/q21.sql | 6 +- .../integration/queries/tpch_sql/q22.sql | 6 +- .../tests/integration/queries/tpch_sql/q3.sql | 2 +- .../tests/integration/queries/tpch_sql/q4.sql | 4 +- .../tests/integration/queries/tpch_sql/q5.sql | 2 +- .../tests/integration/queries/tpch_sql/q6.sql | 2 +- .../tests/integration/queries/tpch_sql/q7.sql | 2 +- .../tests/integration/queries/tpch_sql/q8.sql | 2 +- .../tests/integration/queries/tpch_sql/q9.sql | 2 +- .../tpch_substrait_plans/query_03_plan.json | 135 +++++----- .../integration/queries/tpch_test_cases.py | 234 ++++++++++-------- .../tests/integration/test_acero_tpch.py | 45 ++-- .../tests/integration/test_duckdb_tpch.py | 28 +-- .../integration/test_tpch_plans_valid.py | 30 +-- 186 files changed, 1622 insertions(+), 1176 deletions(-) diff --git a/README.md b/README.md index f41dc087..89a0fe3b 100644 --- a/README.md +++ b/README.md @@ -98,58 +98,38 @@ query_1.py TPCH_QUERY_TESTS = ( { "test_name": "test_tpch_sql_1", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q1.sql"), "substrait_query": get_substrait_plan("query_01_plan.json"), }, { "test_name": "test_tpch_sql_2", - "file_names": [ - "part.parquet", - "supplier.parquet", - "partsupp.parquet", - "nation.parquet", - "region.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "partsupp": "partsupp.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q2.sql"), "substrait_query": get_substrait_plan("query_02_plan.json"), }, -] +) ``` ## Substrait Plans Substrait query plans are located in `substrait_consumer/tests/integration/queries/tpch_substrait_plans`. -The substrait query plans have placeholder strings in the `local_files` objects in the json -structure. -```json -"local_files": { - "items": [ - { - "uri_file": "file://FILENAME_PLACEHOLDER_0", - "parquet": {} - } - ] -} -``` - - -When the tests are run, these placeholders are replaced by the parquet data listed -listed in `"file_names"` in the test case args file. The order of parquet file appearance in the -`"file_names"` list should be consistent with the ordering for the table names in the substrait -query plan. ## SQL Queries SQL queries are located in `substrait_consumer/tests/integration/queries/tpch_sql`. -The SQL queries have empty bracket placeholders (`'{}'`) where the table names will be inserted. -Table names are determined based on the `"file_names"` in the test case args file. The order of -parquet file appearance in the `"file_names"` list should be consistent with the ordering for the -table names in the SQL query. The actual format after replacement will depend on the consumer being -used. - +The SQL queries have named placeholders (`'{customer}'`) where the table names or file paths will be inserted. +Table names are determined based on the `"named_tables"` and `"local_files"` in the test case args file. # Function Tests The substrait function tests aim to test the functions available in Substrait. This is done @@ -182,7 +162,8 @@ arithmetic_tests.py SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": IBIS_SCALAR["add"], }, @@ -196,7 +177,7 @@ SQL_SCALAR = { "add": """ SELECT PS_PARTKEY, PS_SUPPKEY, add(PS_PARTKEY, PS_SUPPKEY) AS ADD_KEY - FROM '{}'; + FROM '{partsupp}'; """, ``` diff --git a/substrait_consumer/common.py b/substrait_consumer/common.py index 19da85ec..708bc5c7 100644 --- a/substrait_consumer/common.py +++ b/substrait_consumer/common.py @@ -44,67 +44,16 @@ class SubstraitUtils: """ @staticmethod - def get_full_path(file_names: Iterable[str]) -> list[str]: + def compute_full_paths(local_files: dict[str, str]) -> dict[str, str]: """ - Get full paths for the TPCH parquet data. + Get the full paths for the given local files. Parameters: - file_names: - List of TPCH parquet data file names provided by the test case. + local_files: + A `dict` mapping format argument names to local files paths. Returns: - List of full paths. + A `dict` where the paths are expanded to absolute paths. """ data_dir = CUR_DIR / "data" / "tpch_parquet" - full_paths_list = [f"{data_dir}/{dataset}" for dataset in file_names] - - return full_paths_list - - def format_sql_query(self, sql_query: str, file_names: list[str]) -> str: - """ - Replace the 'Table' Parameters from the SQL query with the relative - file paths of the parquet data. - - Parameters: - sql_query: - SQL query. - file_names: - List of file names. - - Returns: - SQL Query with file paths. - """ - sql_commands_list = [line.strip() for line in sql_query.strip().split("\n")] - sql_query = " ".join(sql_commands_list) - # Get full path for all datasets used in the query - parquet_file_paths = self.get_full_path(file_names) - - return sql_query.format(*parquet_file_paths) - - def format_substrait_query( - self, substrait_query: str, file_names: list[str] - ) -> str: - """ - Replace the 'local_files' path in the substrait query plan with - the full path of the parquet data. - - Parameters: - substrait_query: - Substrait query. - file_names: - List of file names. - - Returns: - Substrait query plan in byte format. - """ - # Get full path for all datasets used in the query - parquet_file_paths = self.get_full_path(file_names) - - # Replace the filename placeholder in the substrait query plan with - # the proper parquet data file paths. - for count, file_path in enumerate(parquet_file_paths): - substrait_query = substrait_query.replace( - f"FILENAME_PLACEHOLDER_{count}", file_path - ) - - return substrait_query + return {k: f"{data_dir}/{v}" for k, v in local_files.items()} diff --git a/substrait_consumer/consumers/acero_consumer.py b/substrait_consumer/consumers/acero_consumer.py index a154ba5e..26aaaa57 100644 --- a/substrait_consumer/consumers/acero_consumer.py +++ b/substrait_consumer/consumers/acero_consumer.py @@ -1,15 +1,9 @@ from __future__ import annotations -import string -from pathlib import Path -from typing import Iterable - import pyarrow as pa import pyarrow.parquet as pq import pyarrow.substrait as substrait -from substrait_consumer.common import SubstraitUtils - from .consumer import COLUMN_A, COLUMN_B, COLUMN_C, COLUMN_D, Consumer @@ -19,15 +13,14 @@ class AceroConsumer(Consumer): """ def __init__(self): - self.tables = {} - self.table_provider = lambda names, schema: self.tables[names[0].lower()] - - def setup(self, db_connection, file_names: Iterable[str]): - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - self.tables[table_name] = pq.read_table(file_path) + self.named_tables = {} + self.table_provider = lambda names, schema: self.named_tables[names[0].lower()] + + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + for table_name, file_path in named_tables.items(): + self.named_tables[table_name] = pq.read_table(file_path) else: table = pa.table( { @@ -37,7 +30,7 @@ def setup(self, db_connection, file_names: Iterable[str]): "d": COLUMN_D, } ) - self.tables["t"] = table + self.named_tables["t"] = table def run_substrait_query(self, substrait_query: str) -> pa.Table: """ diff --git a/substrait_consumer/consumers/consumer.py b/substrait_consumer/consumers/consumer.py index 0a8a4a77..129fc04a 100644 --- a/substrait_consumer/consumers/consumer.py +++ b/substrait_consumer/consumers/consumer.py @@ -1,10 +1,12 @@ from __future__ import annotations from abc import ABC, abstractmethod -from typing import Iterable import pyarrow as pa +from substrait_consumer.common import SubstraitUtils + + COLUMN_A = [1, 2, 3, -4, 5, -6, 7, 8, 9, None] COLUMN_B = [1, 1, 1, 1, 1, 2, 2, 2, 2, 2] COLUMN_C = [ @@ -34,8 +36,33 @@ class Consumer(ABC): + + def setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + """ + Initializes this `Consumer` instance. + + In particular, expands the paths in `local_files` and `named_tables` to + absolute paths and forwards the arguments to `self._setup` implemented + by classes inheriting from `Consumer`. + + Parameters: + db_connection: + DuckDB connection for this `Consumer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ + local_files = SubstraitUtils.compute_full_paths(local_files) + named_tables = SubstraitUtils.compute_full_paths(named_tables) + self._setup(db_connection, local_files, named_tables) + @abstractmethod - def setup(self, db_connection, file_names: Iterable[str]): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): pass @abstractmethod diff --git a/substrait_consumer/consumers/datafusion_consumer.py b/substrait_consumer/consumers/datafusion_consumer.py index 89cb9873..1789dd18 100644 --- a/substrait_consumer/consumers/datafusion_consumer.py +++ b/substrait_consumer/consumers/datafusion_consumer.py @@ -1,9 +1,6 @@ from __future__ import annotations import json -import string -from pathlib import Path -from typing import Iterable import pyarrow as pa from datafusion import SessionContext @@ -11,8 +8,6 @@ from google.protobuf.json_format import Parse from substrait.gen.proto.plan_pb2 import Plan -from substrait_consumer.common import SubstraitUtils - from .consumer import COLUMN_A, COLUMN_B, COLUMN_C, COLUMN_D, Consumer @@ -24,17 +19,16 @@ class DataFusionConsumer(Consumer): def __init__(self): self._ctx = SessionContext() - def setup(self, db_connection, file_names: Iterable[str]): - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - if self._ctx.table_exist(table_name): - self._ctx.deregister_table(table_name) - self._ctx.register_parquet(table_name, file_path) + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + for table_name, file_path in named_tables.items(): + if self._ctx.table_exist(table_name): + self._ctx.deregister_table(table_name) + self._ctx.register_parquet(table_name, file_path) else: if not self._ctx.table_exist("t"): - tables = pa.RecordBatch.from_arrays( + named_tables = pa.RecordBatch.from_arrays( [ pa.array(COLUMN_A), pa.array(COLUMN_B), @@ -44,7 +38,7 @@ def setup(self, db_connection, file_names: Iterable[str]): names=["a", "b", "c", "d"], ) - self._ctx.register_record_batches("t", [[tables]]) + self._ctx.register_record_batches("t", [[named_tables]]) def run_substrait_query(self, substrait_query: str) -> pa.Table: """ diff --git a/substrait_consumer/consumers/duckdb_consumer.py b/substrait_consumer/consumers/duckdb_consumer.py index 2c850527..7e0fa304 100644 --- a/substrait_consumer/consumers/duckdb_consumer.py +++ b/substrait_consumer/consumers/duckdb_consumer.py @@ -1,15 +1,10 @@ from __future__ import annotations -import string -from pathlib import Path -from typing import Iterable - import duckdb import pyarrow as pa -from substrait_consumer.common import SubstraitUtils - from .consumer import Consumer +from substrait_consumer.producers.producer import load_named_tables class DuckDBConsumer(Consumer): @@ -26,9 +21,11 @@ def __init__(self, db_connection=None): self.db_connection.execute("INSTALL substrait") self.db_connection.execute("LOAD substrait") - def setup(self, db_connection, file_names: Iterable[str]): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self.db_connection = db_connection - self.load_tables_from_parquet(file_names) + load_named_tables(db_connection, named_tables) def run_substrait_query(self, substrait_query: str) -> pa.Table: """ @@ -42,31 +39,3 @@ def run_substrait_query(self, substrait_query: str) -> pa.Table: A pyarrow table resulting from running the substrait query plan. """ return self.db_connection.from_substrait_json(substrait_query).arrow() - - def load_tables_from_parquet( - self, - file_names: Iterable[str], - ) -> list: - """ - Load all the parquet files into separate tables in DuckDB. - - Parameters: - file_names: - Name of parquet files. - - Returns: - A list of the table names. - """ - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - table_names = [] - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem - try: - self.db_connection.execute(f"DROP TABLE {table_name}") - except: - pass - create_table_sql = f"CREATE TABLE {table_name} AS SELECT * FROM read_parquet('{file_path}');" - self.db_connection.execute(create_table_sql) - table_names.append(table_name) - - return table_names diff --git a/substrait_consumer/context.py b/substrait_consumer/context.py index 2af74cc3..6a8f3e47 100644 --- a/substrait_consumer/context.py +++ b/substrait_consumer/context.py @@ -42,13 +42,13 @@ def produce_isthmus_substrait(sql_string, schema_list, validate=False): return json_plan -def get_schema(file_names): +def get_schema(local_files): """ Create the list of schemas based on the given file names. If there are no files give, a custom schema for the data is used. Parameters: - file_names: List of file names. + local_files: List of file names. Returns: List of all schemas as a java list. @@ -56,11 +56,11 @@ def get_schema(file_names): import substrait_consumer.java_definitions as java arr = java.ArrayListClass() - if file_names: + if local_files: text_schema_file = open(schema_file) schema_string = text_schema_file.read().replace("\n", " ").split(";")[:-1] for create_table in schema_string: - if "small" not in file_names[0]: + if "small" not in local_files[0]: create_table = create_table.replace("_small", "") java_obj = jpype.JObject @ jpype.JString(create_table) arr.add(java_obj) diff --git a/substrait_consumer/functional/aggregate_relation_configs.py b/substrait_consumer/functional/aggregate_relation_configs.py index 53644b4d..4cd81c93 100644 --- a/substrait_consumer/functional/aggregate_relation_configs.py +++ b/substrait_consumer/functional/aggregate_relation_configs.py @@ -4,61 +4,74 @@ AGGREGATE_RELATION_TESTS = ( { "test_name": "single_measure_aggregate", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["single_measure_aggregate"], "ibis_expr": None }, { "test_name": "multiple_measure_aggregate", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["multiple_measure_aggregate"], "ibis_expr": None }, { "test_name": "aggregate_with_computation", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_computation"], "ibis_expr": None }, { "test_name": "compute_within_aggregate", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["compute_within_aggregate"], "ibis_expr": None }, { "test_name": "computation_between_aggregates", - "file_names": ['orders_small.parquet'], + "local_files": {}, + "named_tables": {"orders": "orders_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["computation_between_aggregates"], "ibis_expr": None }, { "test_name": "aggregate_in_subquery", - "file_names": ['orders_small.parquet', 'orders_small.parquet'], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": AGGREGATE_RELATIONS["aggregate_in_subquery"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by_cube", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_cube"], "ibis_expr": None }, { "test_name": "aggregate_with_group_by_rollup", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_group_by_rollup"], "ibis_expr": None }, { "test_name": "aggregate_with_grouping_set", - "file_names": ['lineitem_small.parquet'], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": AGGREGATE_RELATIONS["aggregate_with_grouping_set"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/approximation_configs.py b/substrait_consumer/functional/approximation_configs.py index b804a80a..f382930d 100644 --- a/substrait_consumer/functional/approximation_configs.py +++ b/substrait_consumer/functional/approximation_configs.py @@ -3,13 +3,15 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "approx_count_distinct", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["approx_count_distinct"], "ibis_expr": None, }, { "test_name": "approx_distinct", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["approx_distinct"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/arithmetic_configs.py b/substrait_consumer/functional/arithmetic_configs.py index f723793b..fc933dcb 100644 --- a/substrait_consumer/functional/arithmetic_configs.py +++ b/substrait_consumer/functional/arithmetic_configs.py @@ -6,115 +6,134 @@ SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": IBIS_SCALAR["add"], }, { "test_name": "subtract", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": IBIS_SCALAR["subtract"], }, { "test_name": "multiply", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["multiply"], "ibis_expr": IBIS_SCALAR["multiply"], }, { "test_name": "divide", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["divide"], "ibis_expr": IBIS_SCALAR["divide"], }, { "test_name": "modulus", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["modulus"], "ibis_expr": IBIS_SCALAR["modulus"], }, { "test_name": "factorial", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["factorial"], "ibis_expr": None, }, { "test_name": "power", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["power"], "ibis_expr": IBIS_SCALAR["power"], }, { "test_name": "sqrt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["sqrt"], "ibis_expr": IBIS_SCALAR["sqrt"], }, { "test_name": "exp", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["exp"], "ibis_expr": IBIS_SCALAR["exp"], }, { "test_name": "negate", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["negate"], "ibis_expr": IBIS_SCALAR["negate"], }, { "test_name": "cos", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["cos"], "ibis_expr": IBIS_SCALAR["cos"], }, { "test_name": "acos", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["acos"], "ibis_expr": IBIS_SCALAR["acos"], }, { "test_name": "sin", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["sin"], "ibis_expr": IBIS_SCALAR["sin"], }, { "test_name": "asin", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["asin"], "ibis_expr": IBIS_SCALAR["asin"], }, { "test_name": "tan", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["tan"], "ibis_expr": IBIS_SCALAR["tan"], }, { "test_name": "atan", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["atan"], "ibis_expr": IBIS_SCALAR["atan"], }, { "test_name": "atan2", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["atan2"], "ibis_expr": None, }, { "test_name": "abs", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["abs"], "ibis_expr": IBIS_SCALAR["abs"], }, { "test_name": "sign", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["sign"], "ibis_expr": IBIS_SCALAR["sign"], }, @@ -124,67 +143,78 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "sum", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["sum"], "ibis_expr": IBIS_AGGREGATE["sum"], }, { "test_name": "count", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["count"], "ibis_expr": None, }, { "test_name": "count_star", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["count_star"], "ibis_expr": None, }, { "test_name": "avg", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["avg"], "ibis_expr": IBIS_AGGREGATE["avg"], }, { "test_name": "min", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["min"], "ibis_expr": IBIS_AGGREGATE["min"], }, { "test_name": "max", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["max"], "ibis_expr": IBIS_AGGREGATE["max"], }, { "test_name": "median", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["median"], "ibis_expr": IBIS_AGGREGATE["median"], }, { "test_name": "mode", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["mode"], "ibis_expr": None, }, { "test_name": "product", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["product"], "ibis_expr": None, }, { "test_name": "std_dev", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["std_dev"], "ibis_expr": None, }, { "test_name": "variance", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_AGGREGATE["variance"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/arithmetic_decimal_configs.py b/substrait_consumer/functional/arithmetic_decimal_configs.py index b0fc14f9..abe4e14b 100644 --- a/substrait_consumer/functional/arithmetic_decimal_configs.py +++ b/substrait_consumer/functional/arithmetic_decimal_configs.py @@ -6,31 +6,36 @@ SCALAR_FUNCTIONS = ( { "test_name": "add", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": None, }, { "test_name": "subtract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": None, }, { "test_name": "multiply", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["multiply"], "ibis_expr": None, }, { "test_name": "divide", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["divide"], "ibis_expr": None, }, { "test_name": "modulus", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["modulus"], "ibis_expr": None, }, @@ -39,25 +44,29 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "sum", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["sum"], "ibis_expr": None, }, { "test_name": "avg", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["avg"], "ibis_expr": None, }, { "test_name": "min", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["min"], "ibis_expr": None, }, { "test_name": "max", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_AGGREGATE["max"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/boolean_configs.py b/substrait_consumer/functional/boolean_configs.py index e002ad54..9b686d6e 100644 --- a/substrait_consumer/functional/boolean_configs.py +++ b/substrait_consumer/functional/boolean_configs.py @@ -4,25 +4,29 @@ SCALAR_FUNCTIONS = ( { "test_name": "or", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["or"], "ibis_expr": IBIS_SCALAR["or"], }, { "test_name": "and", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["and"], "ibis_expr": IBIS_SCALAR["and"], }, { "test_name": "not", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["not"], "ibis_expr": None, }, { "test_name": "xor", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["xor"], "ibis_expr": None, }, @@ -31,13 +35,15 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "bool_and", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_AGGREGATE["bool_and"], "ibis_expr": None, }, { "test_name": "bool_or", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_AGGREGATE["bool_or"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/common.py b/substrait_consumer/functional/common.py index 685b18ef..9f88d627 100644 --- a/substrait_consumer/functional/common.py +++ b/substrait_consumer/functional/common.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import TYPE_CHECKING, Callable, Iterable +from typing import TYPE_CHECKING, Callable import pytest from duckdb import DuckDBPyConnection @@ -51,7 +51,8 @@ def generate_snapshot_results( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ): """ @@ -65,17 +66,18 @@ def generate_snapshot_results( Pytest snapshot plugin used for verification. db_con: DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ # Load the parquet files into DuckDB and return all the table names as a list producer = DuckDBProducer() - producer.set_db_connection(db_con) - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(db_con, local_files, named_tables) - duckdb_result = db_con.query(f"{sql_query}").arrow() + duckdb_result = producer.run_sql_query(sql_query[0]) duckdb_result = duckdb_result.rename_columns( list(map(str.lower, duckdb_result.column_names)) ) @@ -96,7 +98,8 @@ def substrait_producer_sql_test( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -114,9 +117,11 @@ def substrait_producer_sql_test( snapshot: Pytest snapshot plugin used for verification. db_con: - DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + DuckDB connection for creating in memory named_tables. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -126,11 +131,8 @@ def substrait_producer_sql_test( *args: The data tables to be passed to the ibis expression. """ - producer.set_db_connection(db_con) - supported_producers = sql_query[1] - - # Load the parquet files into DuckDB and return all the table names as a list - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(db_con, local_files, named_tables) + sql_query, supported_producers = sql_query # Convert the SQL/Ibis expression to a substrait query plan if type(producer).__name__ == "IbisProducer": @@ -159,7 +161,8 @@ def substrait_consumer_sql_test( test_name: str, snapshot: Snapshot, db_con: DuckDBPyConnection, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -177,8 +180,10 @@ def substrait_consumer_sql_test( Pytest snapshot plugin used for verification. db_con: DuckDB connection for creating in memory tables. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -188,7 +193,7 @@ def substrait_consumer_sql_test( consumer: Substrait consumer class. """ - consumer.setup(db_con, file_names) + consumer.setup(db_con, local_files, named_tables) group, name = test_name.split(":") snopshot_dir = RELATION_SNAPSHOT_DIR if "relation" in group else FUNCTION_SNAPSHOT_DIR diff --git a/substrait_consumer/functional/comparison_configs.py b/substrait_consumer/functional/comparison_configs.py index 5930b508..5c10ef88 100644 --- a/substrait_consumer/functional/comparison_configs.py +++ b/substrait_consumer/functional/comparison_configs.py @@ -6,85 +6,99 @@ SCALAR_FUNCTIONS = ( { "test_name": "not_equal", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["not_equal"], "ibis_expr": IBIS_SCALAR["not_equal"], }, { "test_name": "equal", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["equal"], "ibis_expr": IBIS_SCALAR["equal"], }, { "test_name": "is_not_distinct_from", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_not_distinct_from"], "ibis_expr": None, }, { "test_name": "lt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["lt"], "ibis_expr": IBIS_SCALAR["lt"], }, { "test_name": "lte", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["lte"], "ibis_expr": IBIS_SCALAR["lte"], }, { "test_name": "gt", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["gt"], "ibis_expr": IBIS_SCALAR["gt"], }, { "test_name": "gte", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["gte"], "ibis_expr": IBIS_SCALAR["gte"], }, { "test_name": "is_not_null", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_not_null"], "ibis_expr": None, }, { "test_name": "is_null", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_null"], "ibis_expr": None, }, { "test_name": "is_nan", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_nan"], "ibis_expr": None, }, { "test_name": "is_finite", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_finite"], "ibis_expr": None, }, { "test_name": "is_infinite", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["is_infinite"], "ibis_expr": None, }, { "test_name": "between", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["between"], "ibis_expr": None, }, { "test_name": "coalesce", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": SQL_SCALAR["coalesce"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/datetime_configs.py b/substrait_consumer/functional/datetime_configs.py index 2b9eb470..afd4ad08 100644 --- a/substrait_consumer/functional/datetime_configs.py +++ b/substrait_consumer/functional/datetime_configs.py @@ -3,49 +3,57 @@ SCALAR_FUNCTIONS = ( { "test_name": "extract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["extract"], "ibis_expr": None, }, { "test_name": "add", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["add"], "ibis_expr": None, }, { "test_name": "subtract", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["subtract"], "ibis_expr": None, }, { "test_name": "lt", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["lt"], "ibis_expr": None, }, { "test_name": "lte", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["lte"], "ibis_expr": None, }, { "test_name": "gt", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["gt"], "ibis_expr": None, }, { "test_name": "gte", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["gte"], "ibis_expr": None, }, { "test_name": "add_intervals", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": SQL_SCALAR["add_intervals"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/ddl_relation_configs.py b/substrait_consumer/functional/ddl_relation_configs.py index 8f450533..c2afa23a 100644 --- a/substrait_consumer/functional/ddl_relation_configs.py +++ b/substrait_consumer/functional/ddl_relation_configs.py @@ -4,43 +4,50 @@ DDL_RELATION_TESTS = ( { "test_name": "create_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": DDL_RELATIONS["create_table"], "ibis_expr": None }, { "test_name": "drop_table", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["drop_table"], "ibis_expr": None }, { "test_name": "alter_table", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["alter_table"], "ibis_expr": None }, { "test_name": "alter_column", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["alter_column"], "ibis_expr": None }, { "test_name": "drop_column", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["drop_column"], "ibis_expr": None }, { "test_name": "create_view", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["create_view"], "ibis_expr": None }, { "test_name": "create_or_replace_view", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": DDL_RELATIONS["create_or_replace_view"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/fetch_relation_configs.py b/substrait_consumer/functional/fetch_relation_configs.py index 1d0ed311..d6f8950c 100644 --- a/substrait_consumer/functional/fetch_relation_configs.py +++ b/substrait_consumer/functional/fetch_relation_configs.py @@ -4,13 +4,15 @@ FETCH_RELATION_TESTS = ( { "test_name": "fetch", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": FETCH_RELATIONS["fetch"], "ibis_expr": None }, { "test_name": "fetch_with_offset", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": FETCH_RELATIONS["fetch_with_offset"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/filter_relation_configs.py b/substrait_consumer/functional/filter_relation_configs.py index e762dc05..91833a9c 100644 --- a/substrait_consumer/functional/filter_relation_configs.py +++ b/substrait_consumer/functional/filter_relation_configs.py @@ -4,73 +4,85 @@ FILTER_RELATION_TESTS = ( { "test_name": "where_equal_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_equal_multi_col"], "ibis_expr": None }, { "test_name": "where_not_equal_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_not_equal_multi_col"], "ibis_expr": None }, { "test_name": "where_gt_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_gt_multi_col"], "ibis_expr": None }, { "test_name": "where_gte_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_gte_multi_col"], "ibis_expr": None }, { "test_name": "where_lt_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_lt_multi_col"], "ibis_expr": None }, { "test_name": "where_lte_multi_col", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_lte_multi_col"], "ibis_expr": None }, { "test_name": "where_like", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_like"], "ibis_expr": None }, { "test_name": "where_between", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_between"], "ibis_expr": None }, { "test_name": "where_in", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_in"], "ibis_expr": None }, { "test_name": "where_or", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_or"], "ibis_expr": None }, { "test_name": "where_and", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["where_and"], "ibis_expr": None }, { "test_name": "having", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": FILTER_RELATIONS["having"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/join_relation_configs.py b/substrait_consumer/functional/join_relation_configs.py index c1d82305..4fc21fea 100644 --- a/substrait_consumer/functional/join_relation_configs.py +++ b/substrait_consumer/functional/join_relation_configs.py @@ -4,79 +4,131 @@ JOIN_RELATION_TESTS = ( { "test_name": "inner_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["inner_join"], "ibis_expr": None }, { "test_name": "left_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_join"], "ibis_expr": None }, { "test_name": "right_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_join"], "ibis_expr": None }, { "test_name": "full_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["full_join"], "ibis_expr": None }, { "test_name": "cross_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["cross_join"], "ibis_expr": None }, { "test_name": "left_semi_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_semi_join"], "ibis_expr": None }, { "test_name": "right_semi_join", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_semi_join"], "ibis_expr": None }, { "test_name": "left_anti_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_anti_join"], "ibis_expr": None }, { "test_name": "right_anti_join", - "file_names": ["orders_small.parquet", "lineitem_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "lineitem": "lineitem_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_anti_join"], "ibis_expr": None }, { "test_name": "left_single_join", - "file_names": ["customer_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_single_join"], "ibis_expr": None }, { "test_name": "right_single_join", - "file_names": ["customer_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_single_join"], "ibis_expr": None }, { "test_name": "left_mark_join", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": JOIN_RELATIONS["left_mark_join"], "ibis_expr": None }, { "test_name": "right_mark_join", - "file_names": ["customer_small.parquet", "orders_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "orders": "orders_small.parquet", + }, "sql_query": JOIN_RELATIONS["right_mark_join"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/logarithmic_configs.py b/substrait_consumer/functional/logarithmic_configs.py index f59ad9d9..1089648e 100644 --- a/substrait_consumer/functional/logarithmic_configs.py +++ b/substrait_consumer/functional/logarithmic_configs.py @@ -5,25 +5,29 @@ SCALAR_FUNCTIONS = ( { "test_name": "ln", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["ln"], "ibis_expr": IBIS_SCALAR["ln"], }, { "test_name": "log10", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["log10"], "ibis_expr": IBIS_SCALAR["log10"], }, { "test_name": "log2", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["log2"], "ibis_expr": IBIS_SCALAR["log2"], }, { "test_name": "logb", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["logb"], "ibis_expr": IBIS_SCALAR["logb"], }, diff --git a/substrait_consumer/functional/project_relation_configs.py b/substrait_consumer/functional/project_relation_configs.py index aa8d3602..a669f220 100644 --- a/substrait_consumer/functional/project_relation_configs.py +++ b/substrait_consumer/functional/project_relation_configs.py @@ -4,43 +4,53 @@ PROJECT_RELATION_TESTS = ( { "test_name": "project_single_col", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_single_col"], "ibis_expr": None }, { "test_name": "project_multi_col", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_multi_col"], "ibis_expr": None }, { "test_name": "project_all_col", - "file_names": ["region_small.parquet"], + "local_files": {}, + "named_tables": {"region": "region_small.parquet"}, "sql_query": PROJECT_RELATIONS["project_all_col"], "ibis_expr": None }, { "test_name": "extended_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["extended_project"], "ibis_expr": None }, { "test_name": "subquery_in_project", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": PROJECT_RELATIONS["subquery_in_project"], "ibis_expr": None }, { "test_name": "distinct_in_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["distinct_in_project"], "ibis_expr": None }, { "test_name": "count_distinct_in_project", - "file_names": ["lineitem_small.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem_small.parquet"}, "sql_query": PROJECT_RELATIONS["count_distinct_in_project"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/queries/sql/approximation_functions_sql.py b/substrait_consumer/functional/queries/sql/approximation_functions_sql.py index e2af20ec..4937c22c 100644 --- a/substrait_consumer/functional/queries/sql/approximation_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/approximation_functions_sql.py @@ -5,14 +5,14 @@ "approx_count_distinct": ( """ SELECT approx_count_distinct(l_comment) - FROM '{}'; + FROM '{lineitem}'; """, [DuckDBProducer], ), "approx_distinct": ( """ SELECT approx_distinct(l_comment) - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer], ), diff --git a/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py b/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py index d44875d0..60d8fd34 100644 --- a/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/arithmetic_demical_functions_sql.py @@ -5,7 +5,7 @@ "add": ( """ SELECT L_TAX, L_DISCOUNT, add(L_TAX, L_DISCOUNT) AS ADD_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -13,7 +13,7 @@ "subtract": ( """ SELECT L_TAX, L_DISCOUNT, subtract(L_TAX, L_DISCOUNT) AS SUBTRACT_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -21,7 +21,7 @@ "multiply": ( """ SELECT L_TAX, L_EXTENDEDPRICE, round(multiply(L_TAX, L_EXTENDEDPRICE), 2) AS MULTIPLY_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -29,7 +29,7 @@ "divide": ( """ SELECT L_TAX, L_EXTENDEDPRICE, round(divide(L_EXTENDEDPRICE, L_TAX), 2) AS DIVIDE_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -37,7 +37,7 @@ "modulus": ( """ SELECT L_EXTENDEDPRICE, L_TAX, round(mod(L_EXTENDEDPRICE, L_TAX), 2) AS MODULUS_KEY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -48,28 +48,28 @@ "sum": ( """ SELECT sum(L_EXTENDEDPRICE) AS SUM_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "avg": ( """ SELECT round(avg(L_EXTENDEDPRICE), 2) AS AVG_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "min": ( """ SELECT min(L_EXTENDEDPRICE) AS MIN_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), "max": ( """ SELECT max(L_EXTENDEDPRICE) AS MAX_EXTENDEDPRICE - FROM '{}'; + FROM '{lineitem}'; """, [DataFusionProducer, DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py b/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py index d3d6000a..6243135c 100644 --- a/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/arithmetic_functions_sql.py @@ -6,7 +6,7 @@ "add": ( """ SELECT PS_PARTKEY, PS_SUPPKEY, PS_PARTKEY + PS_SUPPKEY AS ADD_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "subtract": ( """ SELECT PS_PARTKEY, PS_SUPPKEY, PS_PARTKEY - PS_SUPPKEY AS SUBTRACT_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -22,7 +22,7 @@ "multiply": ( """ SELECT PS_PARTKEY, PS_PARTKEY * 10 AS MULTIPLY_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -30,7 +30,7 @@ "divide": ( """ SELECT PS_PARTKEY, PS_PARTKEY / 10 AS DIVIDE_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -38,7 +38,7 @@ "modulus": ( """ SELECT PS_PARTKEY, mod(PS_PARTKEY, 10) AS MODULUS_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DuckDBProducer, IsthmusProducer], @@ -46,7 +46,7 @@ "factorial": ( """ SELECT N_NATIONKEY, factorial(N_NATIONKEY) AS FACTORIAL_KEY - FROM '{}' + FROM '{nation}' WHERE N_NATIONKEY <= 10 LIMIT 100; """, @@ -55,7 +55,7 @@ "power": ( """ SELECT PS_PARTKEY, power(PS_PARTKEY, 2) AS POWER_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -63,7 +63,7 @@ "sqrt": ( """ SELECT PS_PARTKEY, round(sqrt(CAST(PS_PARTKEY AS DOUBLE)), 2) AS SQRT_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -71,7 +71,7 @@ "exp": ( """ SELECT PS_PARTKEY, round(exp(CAST(PS_PARTKEY AS DOUBLE)), 2) AS EXP_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -79,7 +79,7 @@ "negate": ( """ SELECT PS_PARTKEY, negate(PS_PARTKEY) AS NEGATE_KEY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DuckDBProducer], @@ -87,7 +87,7 @@ "cos": ( """ SELECT round(cos(CAST(ps_supplycost AS DOUBLE)), 2) AS COS_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -95,7 +95,7 @@ "acos": ( """ SELECT round(acos(CAST(l_tax AS DOUBLE)), 2) AS ACOS_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -103,7 +103,7 @@ "sin": ( """ SELECT round(sin(CAST(ps_supplycost AS DOUBLE)), 2) AS SIN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -111,7 +111,7 @@ "asin": ( """ SELECT round(asin(CAST(l_tax AS DOUBLE)), 2) AS ASIN_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -119,7 +119,7 @@ "tan": ( """ SELECT round(tan(CAST(ps_supplycost AS DOUBLE)), 2) AS TAN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -127,7 +127,7 @@ "atan": ( """ SELECT round(atan(CAST(l_tax AS DOUBLE)), 2) AS ATAN_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -135,7 +135,7 @@ "atan2": ( """ SELECT round(atan2(CAST(l_tax AS DOUBLE), CAST(l_tax AS DOUBLE)), 2) AS ATAN2_TAX - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -162,77 +162,77 @@ "sum": ( """ SELECT sum(PS_SUPPLYCOST) AS SUM_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "count": ( """ SELECT count(PS_SUPPLYCOST) AS COUNT_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "count_star": ( """ SELECT count(*) - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "avg": ( """ SELECT round(avg(PS_SUPPLYCOST), 2) AS AVG_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer], ), "min": ( """ SELECT min(PS_SUPPLYCOST) AS MIN_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "max": ( """ SELECT max(PS_SUPPLYCOST) AS MAX_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "median": ( """ SELECT median(PS_SUPPLYCOST) AS MEDIAN_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DataFusionProducer, DuckDBProducer], ), "mode": ( """ SELECT mode(PS_SUPPLYCOST) AS MODE_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "product": ( """ SELECT product(PS_SUPPLYCOST) AS PRODUCT_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "std_dev": ( """ SELECT round(stddev(PS_SUPPLYCOST), 2) AS STDDEV_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), "variance": ( """ SELECT round(variance(PS_SUPPLYCOST), 2) AS VARIANCE_SUPPLYCOST - FROM '{}'; + FROM '{partsupp}'; """, [DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/comparison_functions_sql.py b/substrait_consumer/functional/queries/sql/comparison_functions_sql.py index 4e786247..ba92b4a8 100644 --- a/substrait_consumer/functional/queries/sql/comparison_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/comparison_functions_sql.py @@ -6,7 +6,7 @@ "not_equal": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE NOT N_NAME = 'CANADA' """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "equal": ( """ SELECT PS_AVAILQTY, PS_PARTKEY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY = PS_PARTKEY """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -30,7 +30,7 @@ "lt": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY < 10 ORDER BY PS_AVAILQTY """, @@ -39,7 +39,7 @@ "lte": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY <= 10 ORDER BY PS_AVAILQTY """, @@ -48,7 +48,7 @@ "gt": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY > 9990 ORDER BY PS_AVAILQTY """, @@ -57,7 +57,7 @@ "gte": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' WHERE PS_AVAILQTY >= 9990 ORDER BY PS_AVAILQTY """, diff --git a/substrait_consumer/functional/queries/sql/datetime_functions_sql.py b/substrait_consumer/functional/queries/sql/datetime_functions_sql.py index 859b513c..4165e77d 100644 --- a/substrait_consumer/functional/queries/sql/datetime_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/datetime_functions_sql.py @@ -6,7 +6,7 @@ "extract": ( """ SELECT L_SHIPDATE, extract(year FROM L_SHIPDATE) - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -14,7 +14,7 @@ "add": ( """ SELECT L_SHIPDATE, L_SHIPDATE + INTERVAL 5 DAY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DuckDBProducer], @@ -22,7 +22,7 @@ "subtract": ( """ SELECT L_SHIPDATE, L_SHIPDATE - INTERVAL 5 DAY - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DuckDBProducer], @@ -30,7 +30,7 @@ "lt": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE < L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -38,7 +38,7 @@ "lte": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE <= L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -46,7 +46,7 @@ "gt": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE > L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -54,7 +54,7 @@ "gte": ( """ SELECT L_COMMITDATE, L_RECEIPTDATE, L_COMMITDATE >= L_RECEIPTDATE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py b/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py index 024f315a..73a199af 100644 --- a/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/logarithmic_functions_sql.py @@ -5,7 +5,7 @@ "ln": ( """ SELECT PS_SUPPLYCOST, round(ln(PS_SUPPLYCOST), 2) AS LN_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -13,7 +13,7 @@ "log10": ( """ SELECT PS_SUPPLYCOST, round(log10(PS_SUPPLYCOST), 2) AS LOG10_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -21,7 +21,7 @@ "log2": ( """ SELECT PS_SUPPLYCOST, round(log2(PS_SUPPLYCOST), 2) AS LOG2_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -29,7 +29,7 @@ "logb": ( """ SELECT PS_SUPPLYCOST, round(logb(PS_SUPPLYCOST, 10), 2) AS LOGB_SUPPLY - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], diff --git a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py index 64128657..b42c4ce9 100644 --- a/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/aggregate_relations.py @@ -6,35 +6,35 @@ "single_measure_aggregate": ( """ SELECT COUNT(L_PARTKEY) - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "multiple_measure_aggregate": ( """ SELECT MIN(O_TOTALPRICE), MAX(O_TOTALPRICE), AVG(O_TOTALPRICE) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "aggregate_with_computation": ( """ SELECT AVG(O_TOTALPRICE) * 10 - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "compute_within_aggregate": ( """ SELECT AVG(O_TOTALPRICE * 10) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "computation_between_aggregates": ( """ SELECT AVG(O_TOTALPRICE) + MAX(O_TOTALPRICE) - FROM '{}' + FROM '{orders}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -42,15 +42,15 @@ """ SELECT O_TOTALPRICE - FROM '{}' - WHERE O_TOTALPRICE <= (SELECT AVG(O_TOTALPRICE) FROM '{}') + FROM '{orders}' + WHERE O_TOTALPRICE <= (SELECT AVG(O_TOTALPRICE) FROM '{orders}') """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "aggregate_with_group_by": ( """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY L_ORDERKEY, L_LINENUMBER ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -59,7 +59,7 @@ "aggregate_with_group_by_cube": ( """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY CUBE(L_ORDERKEY, L_LINENUMBER) ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -69,7 +69,7 @@ """ SELECT L_ORDERKEY, L_LINENUMBER, count(*) - FROM '{}' + FROM '{lineitem}' GROUP BY ROLLUP(L_ORDERKEY, L_LINENUMBER) ORDER BY L_ORDERKEY, L_LINENUMBER """, @@ -79,7 +79,7 @@ """ SELECT SUM(L_EXTENDEDPRICE), L_LINENUMBER, L_ORDERKEY - FROM '{}' + FROM '{lineitem}' GROUP BY GROUPING SETS ( (L_LINENUMBER), diff --git a/substrait_consumer/functional/queries/sql/relations/ddl_relations.py b/substrait_consumer/functional/queries/sql/relations/ddl_relations.py index a8161537..bd33f604 100644 --- a/substrait_consumer/functional/queries/sql/relations/ddl_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/ddl_relations.py @@ -15,27 +15,27 @@ ), "drop_table": ( """ - DROP TABLE '{}'; + DROP TABLE '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "alter_table": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' ADD email VARCHAR; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "alter_column": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' RENAME COLUMN c_address TO c_street_address; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "drop_column": ( """ - ALTER TABLE '{}' + ALTER TABLE '{customer}' DROP COLUMN c_address; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -47,7 +47,7 @@ C_CUSTKEY, C_NAME, FROM - '{}'; + '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -58,7 +58,7 @@ C_CUSTKEY, C_NAME, FROM - '{}'; + '{customer}'; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/fetch_relations.py b/substrait_consumer/functional/queries/sql/relations/fetch_relations.py index ec4c7a15..01bb1f30 100644 --- a/substrait_consumer/functional/queries/sql/relations/fetch_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/fetch_relations.py @@ -5,14 +5,14 @@ FETCH_RELATIONS = { "fetch": ( """ - SELECT O_ORDERKEY FROM '{}' + SELECT O_ORDERKEY FROM '{orders}' FETCH NEXT 1 ROWS ONLY; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "fetch_with_offset": ( """ - SELECT O_ORDERKEY FROM '{}' + SELECT O_ORDERKEY FROM '{orders}' OFFSET 5 ROWS FETCH NEXT 5 ROWS ONLY; """, diff --git a/substrait_consumer/functional/queries/sql/relations/filter_relations.py b/substrait_consumer/functional/queries/sql/relations/filter_relations.py index 5d62d683..3b17b023 100644 --- a/substrait_consumer/functional/queries/sql/relations/filter_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/filter_relations.py @@ -6,7 +6,7 @@ "where_equal_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT = L_TAX ORDER BY L_DISCOUNT LIMIT 20; @@ -16,7 +16,7 @@ "where_not_equal_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT != L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -26,7 +26,7 @@ "where_gt_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT > L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -36,7 +36,7 @@ "where_gte_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT >= L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -46,7 +46,7 @@ "where_lt_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT < L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -56,7 +56,7 @@ "where_lte_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' WHERE L_DISCOUNT <= L_TAX ORDER BY L_DISCOUNT, L_TAX LIMIT 20; @@ -66,7 +66,7 @@ "where_like": ( """ SELECT L_SHIPINSTRUCT, L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_SHIPINSTRUCT LIKE '%DELIVER IN PERSON%' ORDER BY L_ORDERKEY LIMIT 20; @@ -76,7 +76,7 @@ "where_between": ( """ SELECT L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY BETWEEN 20 AND 50 LIMIT 20; """, @@ -85,7 +85,7 @@ "where_in": ( """ SELECT L_ORDERKEY - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY IN (1, 2, 3) """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -93,7 +93,7 @@ "where_or": ( """ SELECT L_ORDERKEY, L_SHIPINSTRUCT - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY = 2 OR L_ORDERKEY = 3 """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -101,7 +101,7 @@ "where_and": ( """ SELECT L_ORDERKEY, L_SHIPINSTRUCT - FROM '{}' + FROM '{lineitem}' WHERE L_ORDERKEY = 2 AND L_SHIPINSTRUCT = 'TAKE BACK RETURN' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], @@ -109,7 +109,7 @@ "having": ( """ SELECT L_QUANTITY, COUNT(*) - FROM '{}' + FROM '{lineitem}' GROUP BY L_QUANTITY HAVING COUNT(*) > 12100 ORDER BY L_QUANTITY diff --git a/substrait_consumer/functional/queries/sql/relations/join_relations.py b/substrait_consumer/functional/queries/sql/relations/join_relations.py index 4e0e943b..5acbf3c9 100644 --- a/substrait_consumer/functional/queries/sql/relations/join_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/join_relations.py @@ -10,9 +10,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c INNER JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -25,9 +25,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c LEFT JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -40,9 +40,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c RIGHT JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -55,9 +55,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c FULL JOIN - '{}' o + '{orders}' o ON c.C_CUSTKEY = o.O_CUSTKEY; """, @@ -70,9 +70,9 @@ c.C_NAME, o.O_ORDERKEY FROM - '{}' c + '{customer}' c CROSS JOIN - '{}' o + '{orders}' o """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -82,11 +82,11 @@ c.C_CUSTKEY, c.C_NAME FROM - '{}' c + '{customer}' c WHERE EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ); """, @@ -98,11 +98,11 @@ o.O_ORDERKEY, o.O_CUSTKEY FROM - '{}' o + '{orders}' o WHERE EXISTS ( SELECT 1 - FROM '{}' c + FROM '{customer}' c WHERE c.C_CUSTKEY = o.O_CUSTKEY ); """, @@ -114,11 +114,11 @@ c.C_CUSTKEY, c.C_NAME FROM - '{}' c + '{customer}' c WHERE NOT EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ); """, @@ -130,11 +130,11 @@ o.O_ORDERKEY, o.O_CUSTKEY FROM - '{}' o + '{orders}' o WHERE NOT EXISTS ( SELECT 1 - FROM '{}' l + FROM '{lineitem}' l WHERE l.L_ORDERKEY = o.O_ORDERKEY ); """, @@ -150,9 +150,9 @@ c2.C_NAME AS c2name, c2.C_NATIONKEY AS c2nationakey FROM - '{}' c1 + '{customer}' c1 LEFT JOIN - '{}' c2 + '{customer}' c2 ON c1.C_NATIONKEY = c2.C_NATIONKEY AND c1.C_CUSTKEY <> c2.C_CUSTKEY; @@ -169,9 +169,9 @@ c2.C_NAME AS c2name, c2.C_NATIONKEY AS c2nationakey FROM - '{}' c1 + '{customer}' c1 RIGHT JOIN - '{}' c2 + '{customer}' c2 ON c1.C_NATIONKEY = c2.C_NATIONKEY AND c1.C_CUSTKEY <> c2.C_CUSTKEY; @@ -186,13 +186,13 @@ CASE WHEN EXISTS ( SELECT 1 - FROM '{}' o + FROM '{orders}' o WHERE o.O_CUSTKEY = c.C_CUSTKEY ) THEN 'Marked' ELSE 'Not Marked' END AS mark_status FROM - '{}' c; + '{customer}' c; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -204,13 +204,13 @@ CASE WHEN EXISTS ( SELECT 1 - FROM '{}' c + FROM '{customer}' c WHERE c.C_CUSTKEY = o.O_CUSTKEY ) THEN 'Marked' ELSE 'Not Marked' END AS mark_status FROM - '{}' o; + '{orders}' o; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/project_relations.py b/substrait_consumer/functional/queries/sql/relations/project_relations.py index 51c56b2a..230ebe10 100644 --- a/substrait_consumer/functional/queries/sql/relations/project_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/project_relations.py @@ -6,28 +6,28 @@ "project_single_col": ( """ SELECT * - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "project_multi_col": ( """ SELECT L_DISCOUNT, L_TAX - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "project_all_col": ( """ SELECT * - FROM '{}' + FROM '{region}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "extended_project": ( """ SELECT L_QUANTITY, L_EXTENDEDPRICE*10 AS MULTI_PRICE - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -35,24 +35,24 @@ """ SELECT C_CUSTKEY, (SELECT SUM(O_TOTALPRICE) - FROM {} + FROM '{orders}' WHERE C_CUSTKEY = O_CUSTKEY) AS total_price - FROM {} + FROM '{customer}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "distinct_in_project": ( """ SELECT DISTINCT L_LINESTATUS - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "count_distinct_in_project": ( """ SELECT COUNT(DISTINCT L_EXTENDEDPRICE) - FROM '{}' + FROM '{lineitem}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/read_relations.py b/substrait_consumer/functional/queries/sql/relations/read_relations.py index 6bcfbcf5..352f3f17 100644 --- a/substrait_consumer/functional/queries/sql/relations/read_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/read_relations.py @@ -5,7 +5,7 @@ READ_RELATIONS = { "read_named_table": ( """ - SELECT PS_PARTKEY FROM '{}' + SELECT PS_PARTKEY FROM '{partsupp}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), @@ -29,7 +29,7 @@ ), "duckdb_read_local_file": ( """ - SELECT * FROM read_parquet('{}'); + SELECT * FROM read_parquet('{customer_file_path}'); """, [DuckDBProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/set_relations.py b/substrait_consumer/functional/queries/sql/relations/set_relations.py index bd9de966..695a86bb 100644 --- a/substrait_consumer/functional/queries/sql/relations/set_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/set_relations.py @@ -5,34 +5,34 @@ SET_RELATIONS = { "union_distinct": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' UNION - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' ORDER BY C_NATIONKEY """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "union_all": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' UNION ALL - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "intersect": ( """ - SELECT C_NATIONKEY FROM '{}' + SELECT C_NATIONKEY FROM '{customer}' INTERSECT - SELECT N_NATIONKEY FROM '{}' + SELECT N_NATIONKEY FROM '{nation}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "except": ( """ - SELECT o_totalprice FROM '{}' + SELECT o_totalprice FROM '{orders}' EXCEPT - SELECT c_acctbal FROM '{}' + SELECT c_acctbal FROM '{customer}' """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), diff --git a/substrait_consumer/functional/queries/sql/relations/sort_relations.py b/substrait_consumer/functional/queries/sql/relations/sort_relations.py index 87886ab2..3ecfbb33 100644 --- a/substrait_consumer/functional/queries/sql/relations/sort_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/sort_relations.py @@ -6,7 +6,7 @@ "single_col_default_sort": ( """ SELECT PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_AVAILQTY LIMIT 10; """, @@ -15,7 +15,7 @@ "single_col_asc": ( """ SELECT PS_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC LIMIT 10; """, @@ -24,7 +24,7 @@ "single_col_desc": ( """ SELECT PS_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC LIMIT 10; """, @@ -33,7 +33,7 @@ "multi_col_asc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC, PS_AVAILQTY LIMIT 10; """, @@ -42,7 +42,7 @@ "multi_col_desc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC LIMIT 10; """, @@ -51,7 +51,7 @@ "multi_col_asc_desc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST ASC, PS_AVAILQTY DESC LIMIT 10; """, @@ -60,7 +60,7 @@ "multi_col_desc_asc": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY PS_SUPPLYCOST DESC, PS_AVAILQTY ASC LIMIT 10; """, @@ -69,7 +69,7 @@ "order_by_col_number": ( """ SELECT PS_SUPPLYCOST, PS_AVAILQTY - FROM '{}' + FROM '{partsupp}' ORDER BY 1, 2 LIMIT 10; """, diff --git a/substrait_consumer/functional/queries/sql/relations/write_relations.py b/substrait_consumer/functional/queries/sql/relations/write_relations.py index 71af90cd..aa891d8a 100644 --- a/substrait_consumer/functional/queries/sql/relations/write_relations.py +++ b/substrait_consumer/functional/queries/sql/relations/write_relations.py @@ -5,14 +5,14 @@ WRITE_RELATIONS = { "insert": ( """ - INSERT INTO '{}' (r_regionkey, r_name, r_comment) + INSERT INTO '{region}' (r_regionkey, r_name, r_comment) VALUES (99999, 'region_name', 'region comment'); """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], ), "update": ( """ - UPDATE '{}' + UPDATE '{customer}' SET c_address = 'Substait Avenue', c_phone = '123-456-7890' WHERE c_custkey = 1; """, @@ -20,7 +20,7 @@ ), "delete": ( """ - DELETE FROM '{}' + DELETE FROM '{customer}' WHERE c_custkey = 1; """, [DuckDBProducer, DataFusionProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/rounding_functions_sql.py b/substrait_consumer/functional/queries/sql/rounding_functions_sql.py index 8f2dcdd8..4c5e6d3b 100644 --- a/substrait_consumer/functional/queries/sql/rounding_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/rounding_functions_sql.py @@ -5,7 +5,7 @@ "ceil": ( """ SELECT PS_SUPPLYCOST, ceil(CAST(PS_SUPPLYCOST AS DOUBLE)) AS CEIL_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -13,7 +13,7 @@ "floor": ( """ SELECT PS_SUPPLYCOST, floor(CAST(PS_SUPPLYCOST AS DOUBLE)) AS FLOOR_SUPPLYCOST - FROM '{}' + FROM '{partsupp}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -21,7 +21,7 @@ "round": ( """ SELECT L_EXTENDEDPRICE, round(CAST(L_EXTENDEDPRICE AS DOUBLE), 1) AS ROUND_EXTENDEDPRICE - FROM '{}' + FROM '{lineitem}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], diff --git a/substrait_consumer/functional/queries/sql/string_functions_sql.py b/substrait_consumer/functional/queries/sql/string_functions_sql.py index fecc6b45..bc27e1a6 100644 --- a/substrait_consumer/functional/queries/sql/string_functions_sql.py +++ b/substrait_consumer/functional/queries/sql/string_functions_sql.py @@ -5,21 +5,21 @@ "concat": ( """ SELECT N_NAME, concat(N_NAME, N_COMMENT) AS concat_nation - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], ), "concat_ws": ( """ SELECT concat_ws('.', N_NAME, N_COMMENT) - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer], ), "like": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE N_NAME LIKE 'ALGERIA'; """, [DataFusionProducer, DuckDBProducer, IsthmusProducer], @@ -27,7 +27,7 @@ "starts_with_duckdb": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE prefix(N_NAME, 'A'); """, [DuckDBProducer], @@ -35,7 +35,7 @@ "starts_with": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE starts_with(N_NAME, 'A'); """, [DataFusionProducer], @@ -43,7 +43,7 @@ "ends_with": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE suffix(N_NAME, 'A'); """, [DuckDBProducer], @@ -51,21 +51,21 @@ "substring": ( """ SELECT N_NAME, substr(N_NAME, 1, 3) AS substr_name - FROM '{}'; + FROM '{nation}'; """, [DataFusionProducer, DuckDBProducer], ), "substring_isthmus": ( """ SELECT N_NAME, SUBSTRING(N_NAME FROM 1 FOR 3) AS substr_name - FROM '{}'; + FROM '{nation}'; """, [IsthmusProducer], ), "contains": ( """ SELECT N_NAME - FROM '{}' + FROM '{nation}' WHERE contains(N_NAME, 'IA'); """, [DataFusionProducer, DuckDBProducer], @@ -73,42 +73,42 @@ "strpos": ( """ SELECT N_NAME, strpos(N_NAME, 'A') AS strpos_name - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "replace": ( """ SELECT N_NAME, replace(N_NAME, 'A', 'a') AS replace_name - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "repeat": ( """ SELECT N_NAME, repeat(N_NAME, 2) AS repeated_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "reverse": ( """ SELECT N_NAME, reverse(N_NAME) AS reversed_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "lower": ( """ SELECT N_NAME, lower(N_NAME) AS lowercase_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "upper": ( """ SELECT O_COMMENT, upper(O_COMMENT) AS uppercase_O_COMMENT - FROM '{}' + FROM '{orders}' LIMIT 10; """, [DataFusionProducer, DuckDBProducer], @@ -116,63 +116,63 @@ "char_length": ( """ SELECT N_NAME, length(N_NAME) AS char_length_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "bit_length": ( """ SELECT N_NAME, bit_length(N_NAME) AS bit_length_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "ltrim": ( """ SELECT N_NAME, ltrim(N_NAME, 'A') AS ltrim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "rtrim": ( """ SELECT N_NAME, rtrim(N_NAME, 'A') AS rtrim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "trim": ( """ SELECT N_NAME, trim(N_NAME, 'A') AS trim_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "lpad": ( """ SELECT N_NAME, lpad(N_NAME, 10, ' ') AS lpad_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "rpad": ( """ SELECT N_NAME, rpad(N_NAME, 10, ' ') AS rpad_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "left": ( """ SELECT N_NAME, left(N_NAME, 2) AS left_extract_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), "right": ( """ SELECT N_NAME, right(N_NAME, 2) AS right_extract_N_NAME - FROM '{}' + FROM '{nation}' """, [DataFusionProducer, DuckDBProducer], ), @@ -182,7 +182,7 @@ "string_agg": ( """ SELECT N_NAME, string_agg(N_NAME, ',') - FROM '{}' + FROM '{nation}' GROUP BY N_NAME ORDER BY N_NAME """, diff --git a/substrait_consumer/functional/read_relation_configs.py b/substrait_consumer/functional/read_relation_configs.py index b09021c5..2b7447c8 100644 --- a/substrait_consumer/functional/read_relation_configs.py +++ b/substrait_consumer/functional/read_relation_configs.py @@ -4,31 +4,36 @@ READ_RELATION_TESTS = ( { "test_name": "read_named_table", - "file_names": ['partsupp_small.parquet'], + "local_files": {}, + "named_tables": {"partsupp": "partsupp_small.parquet"}, "sql_query": READ_RELATIONS["read_named_table"], "ibis_expr": None }, { "test_name": "isthmus_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["isthmus_read_virtual_table"], "ibis_expr": None }, { "test_name": "datafusion_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["datafusion_read_virtual_table"], "ibis_expr": None }, { "test_name": "duckdb_read_virtual_table", - "file_names": [], + "local_files": {}, + "named_tables": {}, "sql_query": READ_RELATIONS["duckdb_read_virtual_table"], "ibis_expr": None }, { "test_name": "duckdb_read_local_file", - "file_names": ['customer_small.parquet'], + "local_files": {"customer_file_path": "customer_small.parquet"}, + "named_tables": {}, "sql_query": READ_RELATIONS["duckdb_read_local_file"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/rounding_configs.py b/substrait_consumer/functional/rounding_configs.py index 7b69c8a3..084ffa69 100644 --- a/substrait_consumer/functional/rounding_configs.py +++ b/substrait_consumer/functional/rounding_configs.py @@ -4,19 +4,22 @@ SCALAR_FUNCTIONS = ( { "test_name": "ceil", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["ceil"], "ibis_expr": IBIS_SCALAR["ceil"], }, { "test_name": "floor", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SQL_SCALAR["floor"], "ibis_expr": IBIS_SCALAR["floor"], }, { "test_name": "round", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": SQL_SCALAR["round"], "ibis_expr": IBIS_SCALAR["round"], }, diff --git a/substrait_consumer/functional/set_relation_configs.py b/substrait_consumer/functional/set_relation_configs.py index 82726faa..7adc4ee6 100644 --- a/substrait_consumer/functional/set_relation_configs.py +++ b/substrait_consumer/functional/set_relation_configs.py @@ -4,25 +4,41 @@ SET_RELATION_TESTS = ( { "test_name": "union_distinct", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["union_distinct"], "ibis_expr": None }, { "test_name": "union_all", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["union_all"], "ibis_expr": None }, { "test_name": "intersect", - "file_names": ["customer_small.parquet", "nation_small.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer_small.parquet", + "nation": "nation_small.parquet", + }, "sql_query": SET_RELATIONS["intersect"], "ibis_expr": None }, { "test_name": "except", - "file_names": ["orders_small.parquet", "customer_small.parquet"], + "local_files": {}, + "named_tables": { + "orders": "orders_small.parquet", + "customer": "customer_small.parquet", + }, "sql_query": SET_RELATIONS["except"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/sort_relation_configs.py b/substrait_consumer/functional/sort_relation_configs.py index 0fbc4ecd..f7d970d6 100644 --- a/substrait_consumer/functional/sort_relation_configs.py +++ b/substrait_consumer/functional/sort_relation_configs.py @@ -4,49 +4,57 @@ SORT_RELATION_TESTS = ( { "test_name": "single_col_default_sort", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_default_sort"], "ibis_expr": None }, { "test_name": "single_col_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_asc"], "ibis_expr": None }, { "test_name": "single_col_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_desc"], "ibis_expr": None }, { "test_name": "multi_col_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_asc"], "ibis_expr": None }, { "test_name": "multi_col_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["single_col_desc"], "ibis_expr": None }, { "test_name": "multi_col_asc_desc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_asc_desc"], "ibis_expr": None }, { "test_name": "multi_col_desc_asc", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["multi_col_desc_asc"], "ibis_expr": None }, { "test_name": "order_by_col_number", - "file_names": ["partsupp.parquet"], + "local_files": {}, + "named_tables": {"partsupp": "partsupp.parquet"}, "sql_query": SORT_RELATIONS["order_by_col_number"], "ibis_expr": None }, diff --git a/substrait_consumer/functional/string_configs.py b/substrait_consumer/functional/string_configs.py index d5389eec..4cf3dd3a 100644 --- a/substrait_consumer/functional/string_configs.py +++ b/substrait_consumer/functional/string_configs.py @@ -5,145 +5,169 @@ SCALAR_FUNCTIONS = ( { "test_name": "concat", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["concat"], "ibis_expr": IBIS_SCALAR["concat"], }, { "test_name": "concat_ws", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["concat_ws"], "ibis_expr": None, }, { "test_name": "like", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["like"], "ibis_expr": None, }, { "test_name": "starts_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["starts_with"], "ibis_expr": IBIS_SCALAR["starts_with"], }, { "test_name": "starts_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["starts_with_duckdb"], "ibis_expr": None, }, { "test_name": "ends_with", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["ends_with"], "ibis_expr": IBIS_SCALAR["ends_with"], }, { "test_name": "substring", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["substring"], "ibis_expr": IBIS_SCALAR["substr"], }, { "test_name": "substring", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["substring_isthmus"], "ibis_expr": None, }, { "test_name": "contains", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["contains"], "ibis_expr": IBIS_SCALAR["contains"], }, { "test_name": "strpos", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["strpos"], "ibis_expr": IBIS_SCALAR["strpos"], }, { "test_name": "replace", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["replace"], "ibis_expr": IBIS_SCALAR["replace"], }, { "test_name": "repeat", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["repeat"], "ibis_expr": IBIS_SCALAR["repeat"], }, { "test_name": "reverse", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["reverse"], "ibis_expr": IBIS_SCALAR["reverse"], }, { "test_name": "lower", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["lower"], "ibis_expr": IBIS_SCALAR["lower"], }, { "test_name": "upper", - "file_names": ["orders.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet"}, "sql_query": SQL_SCALAR["upper"], "ibis_expr": IBIS_SCALAR["upper"], }, { "test_name": "char_length", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["char_length"], "ibis_expr": IBIS_SCALAR["char_length"], }, { "test_name": "bit_length", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["bit_length"], "ibis_expr": None, }, { "test_name": "ltrim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["ltrim"], "ibis_expr": IBIS_SCALAR["ltrim"], }, { "test_name": "rtrim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["rtrim"], "ibis_expr": IBIS_SCALAR["rtrim"], }, { "test_name": "trim", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["trim"], "ibis_expr": IBIS_SCALAR["trim"], }, { "test_name": "lpad", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["lpad"], "ibis_expr": IBIS_SCALAR["lpad"], }, { "test_name": "rpad", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["rpad"], "ibis_expr": IBIS_SCALAR["rpad"], }, { "test_name": "left", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["left"], "ibis_expr": IBIS_SCALAR["left"], }, { "test_name": "right", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_SCALAR["right"], "ibis_expr": IBIS_SCALAR["right"], }, @@ -152,7 +176,8 @@ AGGREGATE_FUNCTIONS = ( { "test_name": "string_agg", - "file_names": ["nation.parquet"], + "local_files": {}, + "named_tables": {"nation": "nation.parquet"}, "sql_query": SQL_AGGREGATE["string_agg"], "ibis_expr": None, }, diff --git a/substrait_consumer/functional/write_relation_configs.py b/substrait_consumer/functional/write_relation_configs.py index c531b2f5..fcd0f334 100644 --- a/substrait_consumer/functional/write_relation_configs.py +++ b/substrait_consumer/functional/write_relation_configs.py @@ -4,19 +4,22 @@ WRITE_RELATION_TESTS = ( { "test_name": "insert", - "file_names": ["region.parquet"], + "local_files": {}, + "named_tables": {"region": "region.parquet"}, "sql_query": WRITE_RELATIONS["insert"], "ibis_expr": None }, { "test_name": "update", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": WRITE_RELATIONS["update"], "ibis_expr": None }, { "test_name": "delete", - "file_names": ["customer.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet"}, "sql_query": WRITE_RELATIONS["delete"], "ibis_expr": None }, diff --git a/substrait_consumer/producers/datafusion_producer.py b/substrait_consumer/producers/datafusion_producer.py index a9f0b79d..abed684c 100644 --- a/substrait_consumer/producers/datafusion_producer.py +++ b/substrait_consumer/producers/datafusion_producer.py @@ -22,10 +22,15 @@ def __init__(self, db_connection=None): else: self._db_connection = db_connection - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + self.register_named_tables(named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the DataFusion substrait plan using the given SQL query. @@ -55,27 +60,26 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N return MessageToJson(substrait_proto) - def register_tables(self, file_names): + def register_named_tables(self, named_tables): """ - Register tables to the datafusion session context. + Register named_tables to the datafusion session context. Parameters: - file_names: - Name of parquet files. + named_tables: + A `dict` mapping table names to local file paths, which should + be loaded into the datafusion session context. Returns: None """ - if len(file_names) > 0: - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem + if len(named_tables) > 0: + for table_name, file_path in named_tables.items(): if self._ctx.table_exist(table_name): self._ctx.deregister_table(table_name) self._ctx.register_parquet(table_name, file_path) assert self._ctx.table_exist(table_name) else: if not self._ctx.table_exist("t"): - tables = pa.RecordBatch.from_arrays( + named_tables = pa.RecordBatch.from_arrays( [ pa.array(COLUMN_A), pa.array(COLUMN_B), @@ -84,14 +88,7 @@ def register_tables(self, file_names): ], names=["a", "b", "c", "d"], ) - self._ctx.register_record_batches("t", [[tables]]) - - def format_sql(self, sql_query, file_names): - self.register_tables(file_names) - if len(file_names) > 0: - table_names = [Path(f).stem for f in file_names] - sql_query = sql_query.format(*table_names) - return sql_query + self._ctx.register_record_batches("t", [[named_tables]]) def name(self): return "DataFusionProducer" diff --git a/substrait_consumer/producers/duckdb_producer.py b/substrait_consumer/producers/duckdb_producer.py index aba4d435..06a1abfe 100644 --- a/substrait_consumer/producers/duckdb_producer.py +++ b/substrait_consumer/producers/duckdb_producer.py @@ -1,10 +1,11 @@ import json -import substrait_validator as sv -from .producer import Producer, load_tables_from_parquet -from substrait_consumer.common import SubstraitUtils +from typing import Optional import duckdb +import pyarrow as pa +import substrait_validator as sv +from .producer import Producer, load_named_tables class DuckDBProducer(Producer): """ @@ -19,10 +20,15 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the DuckDB substrait plan using the given SQL query. @@ -47,17 +53,11 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N python_json = json.loads(proto_bytes) return json.dumps(python_json, indent=2) - def format_sql(self, sql_query, file_names): - if len(file_names) > 0: - if "read_parquet" in sql_query: - parquet_file_path = SubstraitUtils.get_full_path(file_names) - sql_query = sql_query.format(parquet_file_path[0]) - else: - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query + def run_sql_query(self, sql_query: str) -> Optional[pa.Table]: + sql_query = self.format_sql(sql_query) + result = self._db_connection.query(f"{sql_query}") + if result is not None: + return result.arrow() def name(self): return "DuckDBProducer" diff --git a/substrait_consumer/producers/ibis_producer.py b/substrait_consumer/producers/ibis_producer.py index 7ee1668e..ec8dc98f 100644 --- a/substrait_consumer/producers/ibis_producer.py +++ b/substrait_consumer/producers/ibis_producer.py @@ -1,5 +1,5 @@ -from .producer import Producer, load_tables_from_parquet +from .producer import Producer, load_named_tables import duckdb import pytest @@ -21,10 +21,15 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the Ibis substrait plan using the given Ibis expression @@ -42,13 +47,5 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N substrait_plan = json_format.MessageToJson(tpch_proto_bytes) return substrait_plan - def format_sql(self, sql_query, file_names): - if len(file_names) > 0: - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query - def name(self): return "IbisProducer" diff --git a/substrait_consumer/producers/isthmus_producer.py b/substrait_consumer/producers/isthmus_producer.py index 94da68da..33cba152 100644 --- a/substrait_consumer/producers/isthmus_producer.py +++ b/substrait_consumer/producers/isthmus_producer.py @@ -1,5 +1,7 @@ +import re + import duckdb -from .producer import Producer, load_tables_from_parquet +from .producer import Producer, load_named_tables from ibis_substrait.compiler.core import SubstraitCompiler from substrait_consumer.context import get_schema, produce_isthmus_substrait @@ -18,12 +20,18 @@ def __init__(self, db_connection=None): self._db_connection.execute("INSTALL substrait") self._db_connection.execute("LOAD substrait") self.compiler = SubstraitCompiler() - self.file_names = None + self.table_names = None - def set_db_connection(self, db_connection): + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): self._db_connection = db_connection + self.table_names = list(named_tables.keys()) + load_named_tables(self._db_connection, named_tables) - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: """ Produce the Isthmus substrait plan using the given SQL query. @@ -35,21 +43,14 @@ def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = N Returns: Substrait query plan in json format. """ - schema_list = get_schema(self.file_names) + schema_list = get_schema(self.table_names) substrait_plan_str = produce_isthmus_substrait(sql_query, schema_list, validate) return substrait_plan_str - def format_sql(self, sql_query, file_names): - sql_query = sql_query.replace("'{}'", "{}") - sql_query = sql_query.replace("'t'", "t") - if len(file_names) > 0: - self.file_names = file_names - table_names = load_tables_from_parquet( - self._db_connection, file_names - ) - sql_query = sql_query.format(*table_names) - return sql_query + def _format_sql(self, sql_query): + sql_query = re.sub(r"'(\{[0-9a-zA-Z_]+\})'", r"\1", sql_query) + return sql_query.replace("'t'", "t") def name(self): return "IsthmusProducer" diff --git a/substrait_consumer/producers/producer.py b/substrait_consumer/producers/producer.py index c5f56eea..cd017547 100644 --- a/substrait_consumer/producers/producer.py +++ b/substrait_consumer/producers/producer.py @@ -1,50 +1,173 @@ -import string from abc import ABC, abstractmethod -from pathlib import Path -from typing import Iterable +from typing import Optional + +from duckdb import DuckDBPyConnection from substrait_consumer.common import SubstraitUtils class Producer(ABC): - @abstractmethod - def set_db_connection(self, db_connection): - pass + def __init__( + self, + db_connection: Optional[DuckDBPyConnection] = None, + local_files: Optional[dict[str, str]] = None, + named_tables: Optional[dict[str, str]] = None, + ): + if db_connection is None: + db_connection = DuckDBPyConnection() + if local_files is None: + local_files = {} + if named_tables is None: + named_tables = {} + self.setup(db_connection, local_files, named_tables) + + def setup( + self, + db_connection: DuckDBPyConnection, + local_files: dict[str, str], + named_tables: dict[str, str], + ): + """ + Initializes this `Producer` instance. + + In particular, expands the paths in `local_files` and `named_tables` to + absolute paths and forwards the arguments to `self._setup` implemented + by classes inheriting from `Producer`. + + Parameters: + db_connection: + DuckDB connection for this `Producer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ + self._db_connection = db_connection + self._local_files = SubstraitUtils.compute_full_paths(local_files) + self._named_tables = SubstraitUtils.compute_full_paths(named_tables) + self._setup(db_connection, self._local_files, self._named_tables) + + def produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: + """ + Produces a Substrait plan of the given query in JSON format. + + The query can be given either as sql_query or as Ibis expression. In + the first case, the function first formats the query using + `self.format_sql`. In either case, the function lets the concrete + class produce the substrait plan using `self._produce_substrait`. + + Parameters: + sql_query: + SQL query. + validate: + Whether the Substrait plan should be validated. + ibis_expr: + Ibis expression. + Returns: + Substrait query plan in JSON format. + """ + sql_query = self.format_sql(sql_query) + return self._produce_substrait(sql_query, validate, ibis_expr) + + def format_sql(self, sql_query: str) -> str: + """ + Formats the given SQL query. + + formatting consist of calling `self._format_sql` that is implemented by + concrete classes with producer-specific formatting logic as well as + substituting format arguments for named tables and local files. + + Parameters: + sql_query: + SQL query. + Returns: + Formatted SQL query. + """ + sql_query = self._format_sql(sql_query) + named_tables = {k: k for k in self._named_tables.keys()} + return sql_query.format(**self._local_files, **named_tables) @abstractmethod - def produce_substrait(self, sql_query: str, validate = False, ibis_expr: str = None) -> str: + def _setup( + self, db_connection, local_files: dict[str, str], named_tables: dict[str, str] + ): + """ + Initializes this `Producer` instance with base-class-specific logic. + + This typically consists of loading the named tables into the producer + back-end such that they are available during subsequent calls to + `produce_substrait`. + + Parameters: + db_connection: + DuckDB connection for this `Producer`. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. + """ pass @abstractmethod - def format_sql(self, sql_query, file_names): + def _produce_substrait( + self, sql_query: str, validate=False, ibis_expr: str = None + ) -> str: + """ + Produces a Substrait plan of the given SQL query in JSON format. + + At this point, the SQL query has already been formatted by the base + class. + + Parameters: + sql_query: + SQL query. + validate: + Whether the Substrait plan should be validated. + ibis_expr: + Ibis expression. + Returns: + Substrait query plan in JSON format. + """ pass + def _format_sql(self, sql_query: str) -> str: + """ + Executes producer-specific reformatting of the given SQL query. + + This function may be overridden by concrete classes in order to change + (i.e., "reformat") the given SQL query such that it fits the syntax of + the producer. + + Parameters: + sql_query: + SQL query. + Returns: + Formatted SQL query. + """ + return sql_query -def load_tables_from_parquet( + +def load_named_tables( db_connection, - file_names: Iterable[str], -) -> list: + named_tables: dict[str, str], +) -> None: """ - Load all the parquet files into separate tables in DuckDB. + Load all the parquet files into separate named_tables in DuckDB. Parameters: db_connection: DuckDB Connection. - file_names: - Name of parquet files. + named_tables: + A `dict` mapping table names to local file paths. Returns: A list of the table names. """ - parquet_file_paths = SubstraitUtils.get_full_path(file_names) - table_names = [] - for file_name, file_path in zip(file_names, parquet_file_paths): - table_name = Path(file_name).stem + for table_name, file_path in named_tables.items(): try: db_connection.execute(f"DROP TABLE {table_name}") except: pass create_table_sql = f"CREATE TABLE {table_name} AS SELECT * FROM read_parquet('{file_path}');" db_connection.execute(create_table_sql) - table_names.append(table_name) - - return table_names diff --git a/substrait_consumer/tests/adhoc/test_adhoc_expression.py b/substrait_consumer/tests/adhoc/test_adhoc_expression.py index f80bfadb..d1961595 100644 --- a/substrait_consumer/tests/adhoc/test_adhoc_expression.py +++ b/substrait_consumer/tests/adhoc/test_adhoc_expression.py @@ -63,15 +63,16 @@ def test_adhoc_expression( nation, region, ) -> None: - adhoc_producer.set_db_connection(self.db_connection) - consumer.setup(self.db_connection, FILE_NAMES) + local_files = FILE_NAMES + named_tables = dict() + producer.setup(self.db_connection, local_files, named_tables) + consumer.setup(self.db_connection, local_files, named_tables) with open(SQL_FILE_PATH, "r") as f: sql_query = f.read() if not sql_query: raise ValueError("No SQL query. Please write SQL into query.sql") - sql_query = adhoc_producer.format_sql(set(), sql_query, FILE_NAMES) substrait_plan = adhoc_producer.produce_substrait( sql_query, consumer, @@ -92,7 +93,9 @@ def test_adhoc_expression( ) actual_result = consumer.run_substrait_query(substrait_plan) - expected_result = self.db_connection.query(f"{sql_query}").arrow() + duckdb_producer = DuckDBProducer() + duckdb_producer.setup(self.db_connection, local_files, named_tables) + expected_result = duckdb_producer.run_substrait_query(sql_query) verify_equals( actual_result.columns, diff --git a/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py b/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py index e19bbe01..0454057f 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_approximation_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -55,7 +55,8 @@ def test_producer_approximation_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_approximation_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_approximation_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -92,7 +95,8 @@ def test_consumer_approximation_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -105,7 +109,8 @@ def test_generate_approximation_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -114,6 +119,7 @@ def test_generate_approximation_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py index 92955b7a..6b31bf8b 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_decimal_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -66,7 +66,8 @@ def test_producer_arithmetic_decimal_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_arithmetic_decimal_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -91,7 +93,8 @@ def test_consumer_arithmetic_decimal_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -103,7 +106,8 @@ def test_consumer_arithmetic_decimal_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -116,7 +120,8 @@ def test_generate_arithmetic_decimal_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -125,6 +130,7 @@ def test_generate_arithmetic_decimal_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py index bcd6441a..fc3abbec 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_arithmetic_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -82,7 +82,8 @@ def test_producer_arithmetic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -94,7 +95,8 @@ def test_producer_arithmetic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -110,7 +112,8 @@ def test_consumer_arithmetic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -121,7 +124,8 @@ def test_consumer_arithmetic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -135,7 +139,8 @@ def test_generate_arithmetic_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -144,6 +149,7 @@ def test_generate_arithmetic_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py b/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py index d4bd21ae..6ca0a151 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_boolean_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -56,7 +56,8 @@ def test_producer_boolean_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_boolean_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_boolean_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -91,7 +94,8 @@ def test_consumer_boolean_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -104,7 +108,8 @@ def test_generate_boolean_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -113,6 +118,7 @@ def test_generate_boolean_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py b/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py index 8e392ee9..33839588 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_comparison_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_comparison_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -81,7 +82,8 @@ def test_producer_comparison_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -96,7 +98,8 @@ def test_consumer_comparison_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -109,7 +112,8 @@ def test_consumer_comparison_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -122,7 +126,8 @@ def test_generate_comparison_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -131,6 +136,7 @@ def test_generate_comparison_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py b/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py index 8354cce1..37556835 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_datetime_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -66,7 +66,8 @@ def test_producer_datetime_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_datetime_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -91,7 +93,8 @@ def test_consumer_datetime_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -103,7 +106,8 @@ def test_consumer_datetime_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -116,7 +120,8 @@ def test_generate_datetime_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -125,6 +130,7 @@ def test_generate_datetime_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py b/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py index c79745d7..b0b9874e 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_logarithmic_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_logarithmic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -80,7 +81,8 @@ def test_producer_logarithmic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -94,7 +96,8 @@ def test_consumer_logarithmic_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -106,7 +109,8 @@ def test_consumer_logarithmic_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -120,7 +124,8 @@ def test_generate_logarithmic_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -129,6 +134,7 @@ def test_generate_logarithmic_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py b/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py index ffc7cf3d..3df62f73 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_rounding_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -53,7 +53,8 @@ def test_producer_rounding_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -65,7 +66,8 @@ def test_producer_rounding_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_rounding_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -93,7 +96,8 @@ def test_consumer_rounding_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -106,7 +110,8 @@ def test_generate_rounding_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -115,6 +120,7 @@ def test_generate_rounding_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_string_functions.py b/substrait_consumer/tests/functional/extension_functions/test_string_functions.py index 3164eb39..11e6dc79 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_string_functions.py +++ b/substrait_consumer/tests/functional/extension_functions/test_string_functions.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -65,7 +65,8 @@ def test_producer_string_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -77,7 +78,8 @@ def test_producer_string_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -92,7 +94,8 @@ def test_consumer_string_functions( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -105,7 +108,8 @@ def test_consumer_string_functions( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -118,7 +122,8 @@ def test_generate_string_functions_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -127,6 +132,7 @@ def test_generate_string_functions_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py b/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py index 9771513a..94f70821 100644 --- a/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py +++ b/substrait_consumer/tests/functional/extension_functions/test_substrait_function_names.py @@ -1,16 +1,15 @@ import json -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table from ibis_substrait.tests.compiler.conftest import * -from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.functional import ( arithmetic_configs, boolean_configs, comparison_configs, datetime_configs, logarithmic_configs, rounding_configs) from substrait_consumer.functional.common import check_subtrait_function_names, load_custom_duckdb_table from substrait_consumer.parametrization import custom_parametrization -from substrait_consumer.producers.producer import load_tables_from_parquet +from substrait_consumer.producers.duckdb_producer import DuckDBProducer @pytest.mark.usefixtures("prepare_tpch_parquet_data") @@ -44,7 +43,8 @@ def setup_teardown_function(request): def test_arithmetic_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -56,7 +56,8 @@ def test_arithmetic_function_names( """ self.run_function_name_test( test_name, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -71,7 +72,8 @@ def test_arithmetic_function_names( def test_boolean_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -80,14 +82,21 @@ def test_boolean_function_names( Verify the substrait function names for boolean functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, self.table_t + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + self.table_t, ) @custom_parametrization(comparison_configs.SCALAR_FUNCTIONS) def test_comparison_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -98,14 +107,22 @@ def test_comparison_function_names( Verify the substrait function names for comparison functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp, nation + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, + nation, ) @custom_parametrization(datetime_configs.SCALAR_FUNCTIONS) def test_datetime_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -115,14 +132,21 @@ def test_datetime_function_names( Verify the substrait function names for datetime functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) @custom_parametrization(logarithmic_configs.SCALAR_FUNCTIONS) def test_logarithmic_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, ibis_expr: Callable[[Table], Table], producer, @@ -132,14 +156,21 @@ def test_logarithmic_function_names( Verify the substrait function names for logarithmic functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) @custom_parametrization(rounding_configs.SCALAR_FUNCTIONS) def test_rounding_function_names( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -149,13 +180,20 @@ def test_rounding_function_names( Verify the substrait function names for rounding functions. """ self.run_function_name_test( - test_name, file_names, sql_query, ibis_expr, producer, partsupp + test_name, + local_files, + named_tables, + sql_query, + ibis_expr, + producer, + partsupp, ) def run_function_name_test( self, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -169,8 +207,10 @@ def run_function_name_test( Parameters: test_name: Expected function name as defined by the substrait spec. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. ibis_expr: @@ -178,28 +218,24 @@ def run_function_name_test( producer: Substrait producer class. *args: - The data tables to be passed to the ibis expression. + The data named_tables to be passed to the ibis expression. """ - producer.set_db_connection(self.db_connection) - - # Load the parquet files into DuckDB and return all the table names as a list - sql_query = producer.format_sql(sql_query[0], file_names) + producer.setup(self.db_connection, local_files, named_tables) # Grab the json representation of the produced substrait plan to verify # the proper substrait function name. if type(producer).__name__ == "IbisProducer": if ibis_expr: - substrait_plan = producer.produce_substrait( + substrait_plan_json = producer.produce_substrait( sql_query, validate=False, ibis_expr=ibis_expr(*args) ) - substrait_plan = json.loads(substrait_plan) else: pytest.skip("ibis expression currently undefined") else: - load_tables_from_parquet(self.db_connection, file_names) - substrait_json = self.db_connection.get_substrait_json(sql_query) - proto = substrait_json.fetchone()[0] - substrait_plan = json.loads(proto) + duckdb_producer = DuckDBProducer(self.db_connection) + duckdb_producer.setup(self.db_connection, local_files, named_tables) + substrait_plan_json = duckdb_producer.produce_substrait(sql_query[0]) + substrait_plan = json.loads(substrait_plan_json) check_subtrait_function_names(substrait_plan, test_name) diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json index f1cdf092..e925b072 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_in_subquery_plan.json @@ -47,7 +47,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json index 84e6853b..1ab22c01 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_computation_plan.json @@ -47,7 +47,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -106,7 +106,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice) * Int64(10)" + "avg(orders.o_totalprice) * Int64(10)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json index dc2b1b1b..4f016b93 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json index efc4ced6..79700375 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_group_by_rollup_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json index 5f165a36..dd37730b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/aggregate_with_grouping_set_plan.json @@ -53,7 +53,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -152,7 +152,7 @@ } }, "names": [ - "sum(lineitem_small.l_extendedprice)", + "sum(lineitem.l_extendedprice)", "l_linenumber", "l_orderkey" ] diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json index 978e8be8..5a6fe23b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/computation_between_aggregates_plan.json @@ -54,7 +54,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -131,7 +131,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice) + MAX(orders_small.o_totalprice)" + "avg(orders.o_totalprice) + MAX(orders.o_totalprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json index a0a0fcc6..b1f4d1ad 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/compute_within_aggregate_plan.json @@ -45,7 +45,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -93,7 +93,7 @@ } }, "names": [ - "avg(orders_small.o_totalprice * Int64(10))" + "avg(orders.o_totalprice * Int64(10))" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json index 2f91ca78..20a81062 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/multiple_measure_aggregate_plan.json @@ -52,7 +52,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -115,9 +115,9 @@ } }, "names": [ - "MIN(orders_small.o_totalprice)", - "MAX(orders_small.o_totalprice)", - "avg(orders_small.o_totalprice)" + "MIN(orders.o_totalprice)", + "MAX(orders.o_totalprice)", + "avg(orders.o_totalprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json index 7ff59292..6c35e688 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DataFusionProducer/single_measure_aggregate_plan.json @@ -45,7 +45,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -74,7 +74,7 @@ } }, "names": [ - "count(lineitem_small.l_partkey)" + "count(lineitem.l_partkey)" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json index 5a8b3f9c..a7b5379c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_in_subquery_plan.json @@ -116,7 +116,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -206,7 +206,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json index 36e7d69b..be85886b 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_computation_plan.json @@ -111,7 +111,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_cube_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json index 15729190..4f656d74 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_group_by_rollup_plan.json @@ -151,7 +151,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json index bb67434e..a013bde1 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/aggregate_with_grouping_set_plan.json @@ -154,7 +154,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json index be2bb472..89d0f06c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/computation_between_aggregates_plan.json @@ -118,7 +118,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json index dc8f8c85..5f8c6194 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/compute_within_aggregate_plan.json @@ -111,7 +111,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json index 10d97ccd..8d9677b6 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/multiple_measure_aggregate_plan.json @@ -114,7 +114,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json index 10fae6b9..354b2bc5 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/DuckDBProducer/single_measure_aggregate_plan.json @@ -148,7 +148,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json index f79b3838..c3ab0ab9 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_in_subquery_plan.json @@ -85,7 +85,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -177,7 +177,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json index e4213325..206a9ecb 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_computation_plan.json @@ -89,7 +89,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json index e6ad8f39..66516320 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_cube_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json index e83e47d1..e9b498b6 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json index 7a94fbd0..85b334f8 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_group_by_rollup_plan.json @@ -116,7 +116,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json index 5e0bc21a..c3bd95ba 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/aggregate_with_grouping_set_plan.json @@ -123,7 +123,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json index 98f919e1..e109fa6c 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/computation_between_aggregates_plan.json @@ -95,7 +95,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json index 01b844f2..7bdad7f1 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/compute_within_aggregate_plan.json @@ -82,7 +82,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json index 316780b1..3bdf7341 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/multiple_measure_aggregate_plan.json @@ -88,7 +88,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json index b4827b54..74654709 100644 --- a/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json +++ b/substrait_consumer/tests/functional/relations/aggregate_relation_snapshots/IsthmusProducer/single_measure_aggregate_plan.json @@ -103,7 +103,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json index bddfdf51..f096985f 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/cross_join_plan.json @@ -30,7 +30,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -59,7 +59,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json index e64bb2ab..36851268 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/full_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json index b2b24a1f..60582706 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/inner_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json index 36b71de4..c04445cf 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_anti_join_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -69,7 +69,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json index c1c0cae8..bc3ff9cf 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json index 0d427fdd..809d6966 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/left_semi_join_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -69,7 +69,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json index daabccc9..ca43b71b 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_anti_join_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -75,7 +75,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json index b64e04fc..a80b3f0a 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_join_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -72,7 +72,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json index f629f0de..bcd21bce 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DataFusionProducer/right_semi_join_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -67,7 +67,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json index 3f4b8028..42429620 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/cross_join_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -162,7 +162,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json index 92079b20..c2f0c315 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/full_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json index e7182699..f8e972f8 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/inner_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json index 65d685c3..b6cd32da 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json index 1153a98a..07445ad7 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/left_semi_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -183,7 +183,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json index d07d72b1..a72d44fe 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_join_plan.json @@ -97,7 +97,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -182,7 +182,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json index 03386ece..ce318c03 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/DuckDBProducer/right_semi_join_plan.json @@ -103,7 +103,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -181,7 +181,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json index d9e5b2f1..96600cdd 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/cross_join_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -118,7 +118,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } } diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json index dee70c25..5b0d1942 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/full_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json index c28d77a6..ff9ff74a 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/inner_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json index 985f630a..18832797 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_anti_join_plan.json @@ -81,7 +81,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -155,7 +155,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json index bbc8406e..2049dcd8 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json index a1a0fe0c..d497bf5b 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/left_semi_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -138,7 +138,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json index e22e13bc..fff610d0 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_anti_join_plan.json @@ -85,7 +85,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -193,7 +193,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json index 74b26e2e..049d8732 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_join_plan.json @@ -72,7 +72,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, diff --git a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json index e1750dd6..84b59063 100644 --- a/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json +++ b/substrait_consumer/tests/functional/relations/join_relation_snapshots/IsthmusProducer/right_semi_join_plan.json @@ -76,7 +76,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -138,7 +138,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json index dbe862e8..61c3e462 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/count_distinct_in_project_plan.json @@ -49,7 +49,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } @@ -104,7 +104,7 @@ } }, "names": [ - "count(DISTINCT lineitem_small.l_extendedprice)" + "count(DISTINCT lineitem.l_extendedprice)" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json index e2bca902..fd81e428 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/distinct_in_project_plan.json @@ -37,7 +37,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json index 4c37b5f6..7b35ee2c 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/extended_project_plan.json @@ -48,7 +48,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json index 9c678af9..822e8b61 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_all_col_plan.json @@ -26,7 +26,7 @@ }, "namedTable": { "names": [ - "region_small" + "region" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json index e7bd69eb..f2193df7 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_multi_col_plan.json @@ -38,7 +38,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json index 3ad6d75f..88668823 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/project_single_col_plan.json @@ -78,7 +78,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json index 38fc05d4..5ada1287 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DataFusionProducer/subquery_in_project_plan.json @@ -44,7 +44,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -82,7 +82,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json index eec4e67e..08e55c32 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/count_distinct_in_project_plan.json @@ -148,7 +148,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json index eeff096e..16ab4aff 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/extended_project_plan.json @@ -149,7 +149,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json index 6be7845c..25668846 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_all_col_plan.json @@ -49,7 +49,7 @@ }, "namedTable": { "names": [ - "region_small" + "region" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json index 4b13193b..21b956dd 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_multi_col_plan.json @@ -134,7 +134,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json index d8f9317c..a4663d38 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/project_single_col_plan.json @@ -174,7 +174,7 @@ }, "namedTable": { "names": [ - "lineitem_small" + "lineitem" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json index f05f3485..dccf7af8 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/DuckDBProducer/subquery_in_project_plan.json @@ -101,7 +101,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -192,7 +192,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json index fcd14182..27fd40ab 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/count_distinct_in_project_plan.json @@ -110,7 +110,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json index 290f79f3..17768b11 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/distinct_in_project_plan.json @@ -100,7 +100,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json index 4a5f4ee9..1ee39d4c 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/extended_project_plan.json @@ -104,7 +104,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json index dc2f71e7..6d8393ce 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_all_col_plan.json @@ -34,7 +34,7 @@ } }, "namedTable": { - "names": ["REGION_SMALL"] + "names": ["REGION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json index 9b963d28..0e45d3a9 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_multi_col_plan.json @@ -94,7 +94,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json index 7f9cc411..aa146e52 100644 --- a/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json +++ b/substrait_consumer/tests/functional/relations/project_relation_snapshots/IsthmusProducer/project_single_col_plan.json @@ -94,7 +94,7 @@ } }, "namedTable": { - "names": ["LINEITEM_SMALL"] + "names": ["LINEITEM"] } } }, diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json index c4890931..744d735b 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DataFusionProducer/read_named_table_plan.json @@ -22,7 +22,7 @@ }, "namedTable": { "names": [ - "partsupp_small" + "partsupp" ] } } diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json index f921e268..cd5ec310 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/DuckDBProducer/read_named_table_plan.json @@ -57,7 +57,7 @@ }, "namedTable": { "names": [ - "partsupp_small" + "partsupp" ] } } diff --git a/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json b/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json index 0ec89a82..8c6af347 100644 --- a/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json +++ b/substrait_consumer/tests/functional/relations/read_relation_snapshots/IsthmusProducer/read_named_table_plan.json @@ -44,7 +44,7 @@ } }, "namedTable": { - "names": ["PARTSUPP_SMALL"] + "names": ["PARTSUPP"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json index ba116058..2d7c0a1e 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/except_plan.json @@ -40,7 +40,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -85,7 +85,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json index 27768735..613cdb57 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/intersect_plan.json @@ -39,7 +39,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -78,7 +78,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json index 18bf2fd6..73d8c6bf 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_all_plan.json @@ -30,7 +30,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -56,7 +56,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json index 29c12bc1..223098b9 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DataFusionProducer/union_distinct_plan.json @@ -34,7 +34,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -60,7 +60,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json index 7c7dc3c6..7993fcbd 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/except_plan.json @@ -86,7 +86,7 @@ }, "namedTable": { "names": [ - "orders_small" + "orders" ] } } @@ -178,7 +178,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json index 1811bef1..31f434a2 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/intersect_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -144,7 +144,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json index 0fb4f4e7..3ff3cca2 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_all_plan.json @@ -80,7 +80,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -144,7 +144,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json index cfe14d77..fd50a229 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/DuckDBProducer/union_distinct_plan.json @@ -82,7 +82,7 @@ }, "namedTable": { "names": [ - "customer_small" + "customer" ] } } @@ -146,7 +146,7 @@ }, "namedTable": { "names": [ - "nation_small" + "nation" ] } } diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json index e14f4412..a836a4f9 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/except_plan.json @@ -66,7 +66,7 @@ } }, "namedTable": { - "names": ["ORDERS_SMALL"] + "names": ["ORDERS"] } } }, @@ -137,7 +137,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json index 1a9392b4..1da990a8 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/intersect_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -115,7 +115,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json index 078609c3..2020ecc6 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_all_plan.json @@ -62,7 +62,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -115,7 +115,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json index 46284bb1..3a6f9194 100644 --- a/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json +++ b/substrait_consumer/tests/functional/relations/set_relation_snapshots/IsthmusProducer/union_distinct_plan.json @@ -75,7 +75,7 @@ } }, "namedTable": { - "names": ["CUSTOMER_SMALL"] + "names": ["CUSTOMER"] } } }, @@ -128,7 +128,7 @@ } }, "namedTable": { - "names": ["NATION_SMALL"] + "names": ["NATION"] } } }, diff --git a/substrait_consumer/tests/functional/relations/test_aggregate_relation.py b/substrait_consumer/tests/functional/relations/test_aggregate_relation.py index 6e0a325f..fa987d7d 100644 --- a/substrait_consumer/tests/functional/relations/test_aggregate_relation.py +++ b/substrait_consumer/tests/functional/relations/test_aggregate_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_aggregate_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_aggregate_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_aggregate_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_aggregate_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_aggregate_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_aggregate_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_ddl_relation.py b/substrait_consumer/tests/functional/relations/test_ddl_relation.py index 62a42ebc..c1bb3301 100644 --- a/substrait_consumer/tests/functional/relations/test_ddl_relation.py +++ b/substrait_consumer/tests/functional/relations/test_ddl_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -57,7 +57,8 @@ def test_producer_ddl_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -68,7 +69,8 @@ def test_producer_ddl_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -83,7 +85,8 @@ def test_consumer_ddl_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -94,7 +97,8 @@ def test_consumer_ddl_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, diff --git a/substrait_consumer/tests/functional/relations/test_fetch_relation.py b/substrait_consumer/tests/functional/relations/test_fetch_relation.py index df500570..e4e76a99 100644 --- a/substrait_consumer/tests/functional/relations/test_fetch_relation.py +++ b/substrait_consumer/tests/functional/relations/test_fetch_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_fetch_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_fetch_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_fetch_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_fetch_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_fetch_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_fetch_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_filter_relation.py b/substrait_consumer/tests/functional/relations/test_filter_relation.py index 8828dc50..2c141bcb 100644 --- a/substrait_consumer/tests/functional/relations/test_filter_relation.py +++ b/substrait_consumer/tests/functional/relations/test_filter_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -54,7 +54,8 @@ def test_producer_filter_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -65,7 +66,8 @@ def test_producer_filter_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -80,7 +82,8 @@ def test_consumer_filter_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -91,7 +94,8 @@ def test_consumer_filter_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -104,7 +108,8 @@ def test_generate_filter_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -113,6 +118,7 @@ def test_generate_filter_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_join_relation.py b/substrait_consumer/tests/functional/relations/test_join_relation.py index ab95b9a8..14ed6827 100644 --- a/substrait_consumer/tests/functional/relations/test_join_relation.py +++ b/substrait_consumer/tests/functional/relations/test_join_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -73,7 +73,8 @@ def test_producer_join_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -84,7 +85,8 @@ def test_producer_join_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -99,7 +101,8 @@ def test_consumer_join_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -110,7 +113,8 @@ def test_consumer_join_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -123,7 +127,8 @@ def test_generate_join_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -132,6 +137,7 @@ def test_generate_join_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_project_relation.py b/substrait_consumer/tests/functional/relations/test_project_relation.py index b3a4354d..21f91bf0 100644 --- a/substrait_consumer/tests/functional/relations/test_project_relation.py +++ b/substrait_consumer/tests/functional/relations/test_project_relation.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -69,7 +69,8 @@ def test_producer_project_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -80,7 +81,8 @@ def test_producer_project_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -95,7 +97,8 @@ def test_consumer_project_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -106,7 +109,8 @@ def test_consumer_project_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -119,7 +123,8 @@ def test_generate_project_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -128,6 +133,7 @@ def test_generate_project_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_read_relation.py b/substrait_consumer/tests/functional/relations/test_read_relation.py index d9a90cff..dfc8ccee 100644 --- a/substrait_consumer/tests/functional/relations/test_read_relation.py +++ b/substrait_consumer/tests/functional/relations/test_read_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_read_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_read_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_read_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_read_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_read_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_read_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_set_relation.py b/substrait_consumer/tests/functional/relations/test_set_relation.py index 9d9ac695..0eafc20e 100644 --- a/substrait_consumer/tests/functional/relations/test_set_relation.py +++ b/substrait_consumer/tests/functional/relations/test_set_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_set_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_set_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_set_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_set_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_set_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_set_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_sort_relation.py b/substrait_consumer/tests/functional/relations/test_sort_relation.py index 3309e0b3..cb36a891 100644 --- a/substrait_consumer/tests/functional/relations/test_sort_relation.py +++ b/substrait_consumer/tests/functional/relations/test_sort_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -51,7 +51,8 @@ def test_producer_sort_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -62,7 +63,8 @@ def test_producer_sort_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -77,7 +79,8 @@ def test_consumer_sort_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -88,7 +91,8 @@ def test_consumer_sort_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -101,7 +105,8 @@ def test_generate_sort_relation_results( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], ) -> None: @@ -110,6 +115,7 @@ def test_generate_sort_relation_results( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ) diff --git a/substrait_consumer/tests/functional/relations/test_write_relation.py b/substrait_consumer/tests/functional/relations/test_write_relation.py index 6df19481..23daa8a4 100644 --- a/substrait_consumer/tests/functional/relations/test_write_relation.py +++ b/substrait_consumer/tests/functional/relations/test_write_relation.py @@ -1,4 +1,4 @@ -from typing import Callable, Iterable +from typing import Callable import duckdb from ibis.expr.types.relations import Table @@ -55,7 +55,8 @@ def test_producer_write_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -66,7 +67,8 @@ def test_producer_write_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, @@ -81,7 +83,8 @@ def test_consumer_write_relations( self, snapshot, test_name: str, - file_names: Iterable[str], + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: tuple, ibis_expr: Callable[[Table], Table], producer, @@ -92,7 +95,8 @@ def test_consumer_write_relations( test_name, snapshot, self.db_connection, - file_names, + local_files, + named_tables, sql_query, ibis_expr, producer, diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql index 72c85fd7..ad2c2ee4 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q1.sql @@ -10,7 +10,7 @@ SELECT avg(l_discount) AS avg_disc, count(*) AS count_order FROM - '{}' + '{lineitem}' WHERE l_shipdate <= date '1998-12-01' - interval '120' day GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql index 0e03d7ba..6cfd4374 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q10.sql @@ -8,7 +8,7 @@ SELECT c_phone, c_comment FROM - '{}', '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}', '{nation}' WHERE c_custkey = o_custkey AND l_orderkey = o_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql index 8cc37a37..7480e9dd 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q11.sql @@ -2,9 +2,9 @@ SELECT ps.ps_partkey, sum(ps.ps_supplycost * ps.ps_availqty) AS "value" FROM - '{}' ps, - '{}' s, - '{}' n + '{partsupp}' ps, + '{supplier}' s, + '{nation}' n WHERE ps.ps_suppkey = s.s_suppkey AND s.s_nationkey = n.n_nationkey @@ -15,9 +15,9 @@ GROUP BY SELECT sum(ps.ps_supplycost * ps.ps_availqty) * 0.0001000000 FROM - '{}' ps, - '{}' s, - '{}' n + '{partsupp}' ps, + '{supplier}' s, + '{nation}' n WHERE ps.ps_suppkey = s.s_suppkey AND s.s_nationkey = n.n_nationkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql index 4e0fb885..35f00d96 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q12.sql @@ -15,7 +15,7 @@ SELECT 0 END) AS low_line_count FROM - '{}', '{}' + '{orders}', '{lineitem}' WHERE o_orderkey = l_orderkey AND l_shipmode IN ('MAIL', 'SHIP') diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql index 314d0189..e69d6192 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q13.sql @@ -6,8 +6,8 @@ FROM ( c_custkey, count(o_orderkey) FROM - '{}' - LEFT OUTER JOIN '{}' ON c_custkey = o_custkey + '{customer}' + LEFT OUTER JOIN '{orders}' ON c_custkey = o_custkey AND o_comment NOT LIKE '%special%requests%' GROUP BY c_custkey) AS c_orders (c_custkey, diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql index 50339498..e8074e0d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q14.sql @@ -6,7 +6,7 @@ SELECT 0 END) / sum(l_extendedprice * (1 - l_discount)) AS promo_revenue FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE l_partkey = p_partkey AND l_shipdate >= date '1995-09-01' diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql index b463ec3b..1fae40c6 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q15.sql @@ -5,13 +5,13 @@ SELECT s_phone, total_revenue FROM - '{}', + '{supplier}', ( SELECT l_suppkey AS supplier_no, sum(l_extendedprice * (1 - l_discount)) AS total_revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1996-01-01' AS date) AND l_shipdate < CAST('1996-04-01' AS date) @@ -27,7 +27,7 @@ WHERE l_suppkey AS supplier_no, sum(l_extendedprice * (1 - l_discount)) AS total_revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1996-01-01' AS date) AND l_shipdate < CAST('1996-04-01' AS date) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql index 0fe504fb..0ad1acc3 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q16.sql @@ -4,7 +4,7 @@ SELECT p_size, count(DISTINCT ps_suppkey) AS supplier_cnt FROM - '{}', '{}' + '{partsupp}', '{part}' WHERE p_partkey = ps_partkey AND p_brand <> 'Brand#45' @@ -14,7 +14,7 @@ WHERE SELECT s_suppkey FROM - '{}' + '{supplier}' WHERE s_comment LIKE '%Customer%Complaints%') GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql index 0ad02bb3..497fa221 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q17.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice) / 7.0 AS avg_yearly FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE p_partkey = l_partkey AND p_brand = 'Brand#23' @@ -10,6 +10,6 @@ WHERE SELECT 0.2 * avg(l_quantity) FROM - '{}' + '{lineitem}' WHERE l_partkey = p_partkey); diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql index 86a6727d..84f4f354 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q18.sql @@ -6,13 +6,13 @@ SELECT o_totalprice, sum(l_quantity) FROM - '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}' WHERE o_orderkey IN ( SELECT l_orderkey FROM - '{}' + '{lineitem}' GROUP BY l_orderkey HAVING diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql index d5fc6f2b..bf3342fb 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q19.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice * (1 - l_discount)) AS revenue FROM - '{}', '{}' + '{lineitem}', '{part}' WHERE (p_partkey = l_partkey AND p_brand = 'Brand#12' AND p_container IN ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG') diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql index 1fa1ff23..7fd14096 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q2.sql @@ -8,7 +8,7 @@ SELECT s_phone, s_comment FROM - '{}', '{}', '{}', '{}', '{}' + '{part}', '{supplier}', '{partsupp}', '{nation}', '{region}' WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey @@ -21,7 +21,7 @@ WHERE SELECT min(ps_supplycost) FROM - '{}', '{}', '{}', '{}' + '{partsupp}', '{supplier}', '{nation}', '{region}' WHERE p_partkey = ps_partkey AND s_suppkey = ps_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql index 34cc465e..cbd45d2d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q20.sql @@ -2,26 +2,26 @@ SELECT s_name, s_address FROM - '{}', '{}' + '{supplier}', '{nation}' WHERE s_suppkey IN ( SELECT ps_suppkey FROM - '{}' + '{partsupp}' WHERE ps_partkey IN ( SELECT p_partkey FROM - '{}' + '{part}' WHERE p_name LIKE 'forest%') AND ps_availqty > ( SELECT 0.5 * sum(l_quantity) FROM - '{}' + '{lineitem}' WHERE l_partkey = ps_partkey AND l_suppkey = ps_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql index cb767db3..e8672169 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q21.sql @@ -2,7 +2,7 @@ SELECT s_name, count(*) AS numwait FROM - '{}', '{}' l1, '{}', '{}' + '{supplier}', '{lineitem}' l1, '{orders}', '{nation}' WHERE s_suppkey = l1.l_suppkey AND o_orderkey = l1.l_orderkey @@ -12,7 +12,7 @@ WHERE SELECT * FROM - '{}' l2 + '{lineitem}' l2 WHERE l2.l_orderkey = l1.l_orderkey AND l2.l_suppkey <> l1.l_suppkey) @@ -20,7 +20,7 @@ WHERE SELECT * FROM - '{}' l3 + '{lineitem}' l3 WHERE l3.l_orderkey = l1.l_orderkey AND l3.l_suppkey <> l1.l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql index c9e31690..e0fe3f10 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q22.sql @@ -7,14 +7,14 @@ FROM ( substring(c_phone FROM 1 FOR 2) AS cntrycode, c_acctbal FROM - '{}' + '{customer}' WHERE substring(c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17') AND c_acctbal > ( SELECT avg(c_acctbal) FROM - '{}' + '{customer}' WHERE c_acctbal > 0.00 AND substring(c_phone FROM 1 FOR 2) IN ('13', '31', '23', '29', '30', '18', '17')) @@ -22,7 +22,7 @@ FROM ( SELECT * FROM - '{}' + '{orders}' WHERE o_custkey = c_custkey)) AS custsale GROUP BY diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql index 84877309..3e998043 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q3.sql @@ -4,7 +4,7 @@ SELECT o_orderdate, o_shippriority FROM - '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}' WHERE c_mktsegment = 'BUILDING' AND c_custkey = o_custkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql index 376e8d49..ea04a9b5 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q4.sql @@ -2,7 +2,7 @@ SELECT o_orderpriority, count(*) AS order_count FROM - '{}' + '{orders}' WHERE o_orderdate >= CAST('1993-07-01' AS date) AND o_orderdate < CAST('1993-10-01' AS date) @@ -10,7 +10,7 @@ WHERE SELECT * FROM - '{}' + '{lineitem}' WHERE l_orderkey = o_orderkey AND l_commitdate < l_receiptdate) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql index 22bb50c7..d12f46cb 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q5.sql @@ -2,7 +2,7 @@ SELECT n_name, sum(l_extendedprice * (1 - l_discount)) AS revenue FROM - '{}', '{}', '{}', '{}', '{}', '{}' + '{customer}', '{orders}', '{lineitem}', '{supplier}', '{nation}', '{region}' WHERE c_custkey = o_custkey AND l_orderkey = o_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql index abe1b225..dc195aa6 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q6.sql @@ -1,7 +1,7 @@ SELECT sum(l_extendedprice * l_discount) AS revenue FROM - '{}' + '{lineitem}' WHERE l_shipdate >= CAST('1994-01-01' AS date) AND l_shipdate < CAST('1995-01-01' AS date) diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql index a48faa50..54c7f8aa 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q7.sql @@ -10,7 +10,7 @@ FROM ( extract(year FROM l_shipdate) AS l_year, l_extendedprice * (1 - l_discount) AS volume FROM - '{}', '{}', '{}', '{}', '{}' n1, '{}' n2 + '{supplier}', '{lineitem}', '{orders}', '{customer}', '{nation}' n1, '{nation}' n2 WHERE s_suppkey = l_suppkey AND o_orderkey = l_orderkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql index 6a38bf07..f241b6e9 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q8.sql @@ -12,7 +12,7 @@ FROM ( l_extendedprice * (1 - l_discount) AS volume, n2.n_name AS nation FROM - '{}', '{}', '{}', '{}', '{}', '{}' n1, '{}' n2, '{}' + '{part}', '{supplier}', '{lineitem}', '{orders}', '{customer}', '{nation}' n1, '{nation}' n2, '{region}' WHERE p_partkey = l_partkey AND s_suppkey = l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql b/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql index 8586f5b9..7093a748 100644 --- a/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql +++ b/substrait_consumer/tests/integration/queries/tpch_sql/q9.sql @@ -8,7 +8,7 @@ FROM ( extract(year FROM o_orderdate) AS o_year, l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity AS amount FROM - '{}', '{}', '{}', '{}', '{}', '{}' + '{part}', '{supplier}', '{lineitem}', '{partsupp}', '{orders}', '{nation}' WHERE s_suppkey = l_suppkey AND ps_suppkey = l_suppkey diff --git a/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json b/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json index d4dea1d0..9ed211fc 100644 --- a/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json +++ b/substrait_consumer/tests/integration/queries/tpch_substrait_plans/query_03_plan.json @@ -113,40 +113,26 @@ } }, "baseSchema": { - "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], + "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], "struct": { "types": [{ "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { - "i64": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "i64": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "i64": { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "i32": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -163,36 +149,12 @@ "string": { "nullability": "NULLABILITY_REQUIRED" } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "date": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } - }, { - "string": { - "nullability": "NULLABILITY_REQUIRED" - } }], "nullability": "NULLABILITY_REQUIRED" } }, "namedTable": { - "names": ["LINEITEM"] + "names": ["CUSTOMER"] } } }, @@ -203,14 +165,14 @@ } }, "baseSchema": { - "names": ["C_CUSTKEY", "C_NAME", "C_ADDRESS", "C_NATIONKEY", "C_PHONE", "C_ACCTBAL", "C_MKTSEGMENT", "C_COMMENT"], + "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], "struct": { "types": [{ "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -218,23 +180,27 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "i32": { + "decimal": { + "scale": 2, + "precision": 15, "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "date": { "nullability": "NULLABILITY_REQUIRED" } }, { - "decimal": { - "scale": 2, - "precision": 15, + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } + }, { + "i32": { + "nullability": "NULLABILITY_REQUIRED" + } }, { "string": { "nullability": "NULLABILITY_REQUIRED" @@ -244,7 +210,7 @@ } }, "namedTable": { - "names": ["CUSTOMER"] + "names": ["ORDERS"] } } } @@ -257,7 +223,7 @@ } }, "baseSchema": { - "names": ["O_ORDERKEY", "O_CUSTKEY", "O_ORDERSTATUS", "O_TOTALPRICE", "O_ORDERDATE", "O_ORDERPRIORITY", "O_CLERK", "O_SHIPPRIORITY", "O_COMMENT"], + "names": ["L_ORDERKEY", "L_PARTKEY", "L_SUPPKEY", "L_LINENUMBER", "L_QUANTITY", "L_EXTENDEDPRICE", "L_DISCOUNT", "L_TAX", "L_RETURNFLAG", "L_LINESTATUS", "L_SHIPDATE", "L_COMMITDATE", "L_RECEIPTDATE", "L_SHIPINSTRUCT", "L_SHIPMODE", "L_COMMENT"], "struct": { "types": [{ "i64": { @@ -268,7 +234,11 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "string": { + "i64": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "i64": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -278,19 +248,49 @@ "nullability": "NULLABILITY_REQUIRED" } }, { - "date": { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "decimal": { + "scale": 2, + "precision": 15, + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } + }, { + "date": { + "nullability": "NULLABILITY_REQUIRED" + } }, { "string": { "nullability": "NULLABILITY_REQUIRED" } }, { - "i32": { + "string": { "nullability": "NULLABILITY_REQUIRED" } }, { @@ -302,7 +302,7 @@ } }, "namedTable": { - "names": ["ORDERS"] + "names": ["LINEITEM"] } } } @@ -329,7 +329,7 @@ "selection": { "directReference": { "structField": { - "field": 22 + "field": 6 } }, "rootReference": { @@ -359,7 +359,6 @@ "selection": { "directReference": { "structField": { - "field": 16 } }, "rootReference": { @@ -371,7 +370,7 @@ "selection": { "directReference": { "structField": { - "field": 25 + "field": 9 } }, "rootReference": { @@ -395,6 +394,7 @@ "selection": { "directReference": { "structField": { + "field": 17 } }, "rootReference": { @@ -406,7 +406,7 @@ "selection": { "directReference": { "structField": { - "field": 24 + "field": 8 } }, "rootReference": { @@ -430,7 +430,7 @@ "selection": { "directReference": { "structField": { - "field": 28 + "field": 12 } }, "rootReference": { @@ -470,7 +470,7 @@ "selection": { "directReference": { "structField": { - "field": 10 + "field": 27 } }, "rootReference": { @@ -505,6 +505,7 @@ "selection": { "directReference": { "structField": { + "field": 17 } }, "rootReference": { @@ -514,7 +515,7 @@ "selection": { "directReference": { "structField": { - "field": 28 + "field": 12 } }, "rootReference": { @@ -524,7 +525,7 @@ "selection": { "directReference": { "structField": { - "field": 31 + "field": 15 } }, "rootReference": { @@ -545,7 +546,7 @@ "selection": { "directReference": { "structField": { - "field": 5 + "field": 22 } }, "rootReference": { @@ -586,7 +587,7 @@ "selection": { "directReference": { "structField": { - "field": 6 + "field": 23 } }, "rootReference": { diff --git a/substrait_consumer/tests/integration/queries/tpch_test_cases.py b/substrait_consumer/tests/integration/queries/tpch_test_cases.py index 6579054e..5065702d 100644 --- a/substrait_consumer/tests/integration/queries/tpch_test_cases.py +++ b/substrait_consumer/tests/integration/queries/tpch_test_cases.py @@ -3,211 +3,245 @@ TPCH_QUERY_TESTS = ( { "test_name": "test_tpch_sql_1", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q1.sql"), "substrait_query": get_substrait_plan("query_01_plan.json"), }, { "test_name": "test_tpch_sql_2", - "file_names": [ - "part.parquet", - "supplier.parquet", - "partsupp.parquet", - "nation.parquet", - "region.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "partsupp": "partsupp.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q2.sql"), "substrait_query": get_substrait_plan("query_02_plan.json"), }, { "test_name": "test_tpch_sql_3", - "file_names": [ - "lineitem.parquet", - "customer.parquet", - "orders.parquet", - ], + "local_files": {}, + "named_tables": { + "lineitem": "lineitem.parquet", + "customer": "customer.parquet", + "orders": "orders.parquet", + }, "sql_query": get_sql("q3.sql"), "substrait_query": get_substrait_plan("query_03_plan.json"), }, { "test_name": "test_tpch_sql_4", - "file_names": ["orders.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet", "lineitem": "lineitem.parquet"}, "sql_query": get_sql("q4.sql"), "substrait_query": get_substrait_plan("query_04_plan.json"), }, { "test_name": "test_tpch_sql_5", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "supplier.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q5.sql"), "substrait_query": get_substrait_plan("query_05_plan.json"), }, { "test_name": "test_tpch_sql_6", - "file_names": ["lineitem.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet"}, "sql_query": get_sql("q6.sql"), "substrait_query": get_substrait_plan("query_06_plan.json"), }, { "test_name": "test_tpch_sql_7", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "customer.parquet", - "nation.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "customer": "customer.parquet", + "nation": "nation.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q7.sql"), "substrait_query": get_substrait_plan("query_07_plan.json"), }, { "test_name": "test_tpch_sql_8", - "file_names": [ - "part.parquet", - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "customer.parquet", - "nation.parquet", - "nation.parquet", - "region.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "customer": "customer.parquet", + "nation": "nation.parquet", + "nation": "nation.parquet", + "region": "region.parquet", + }, "sql_query": get_sql("q8.sql"), "substrait_query": get_substrait_plan("query_08_plan.json"), }, { "test_name": "test_tpch_sql_9", - "file_names": [ - "part.parquet", - "supplier.parquet", - "lineitem.parquet", - "partsupp.parquet", - "orders.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "part": "part.parquet", + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "partsupp": "partsupp.parquet", + "orders": "orders.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q9.sql"), "substrait_query": get_substrait_plan("query_09_plan.json"), }, { "test_name": "test_tpch_sql_10", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q10.sql"), "substrait_query": get_substrait_plan("query_10_plan.json"), }, { "test_name": "test_tpch_sql_11", - "file_names": [ - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - "partsupp.parquet", - "supplier.parquet", - "nation.parquet", - ], + "local_files": {}, + "named_tables": { + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "partsupp": "partsupp.parquet", + "supplier": "supplier.parquet", + "nation": "nation.parquet", + }, "sql_query": get_sql("q11.sql"), "substrait_query": get_substrait_plan("query_11_plan.json"), }, { "test_name": "test_tpch_sql_12", - "file_names": ["orders.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": {"orders": "orders.parquet", "lineitem": "lineitem.parquet"}, "sql_query": get_sql("q12.sql"), "substrait_query": get_substrait_plan("query_12_plan.json"), }, { "test_name": "test_tpch_sql_13", - "file_names": ["customer.parquet", "orders.parquet"], + "local_files": {}, + "named_tables": {"customer": "customer.parquet", "orders": "orders.parquet"}, "sql_query": get_sql("q13.sql"), "substrait_query": get_substrait_plan("query_13_plan.json"), }, { "test_name": "test_tpch_sql_14", - "file_names": ["lineitem.parquet", "part.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet", "part": "part.parquet"}, "sql_query": get_sql("q14.sql"), "substrait_query": get_substrait_plan("query_14_plan.json"), }, { "test_name": "test_tpch_sql_15", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q15.sql"), "substrait_query": get_substrait_plan("query_15_plan.json"), }, { "test_name": "test_tpch_sql_16", - "file_names": ["partsupp.parquet", "part.parquet", "supplier.parquet"], + "local_files": {}, + "named_tables": { + "partsupp": "partsupp.parquet", + "part": "part.parquet", + "supplier": "supplier.parquet", + }, "sql_query": get_sql("q16.sql"), "substrait_query": get_substrait_plan("query_16_plan.json"), }, { "test_name": "test_tpch_sql_17", - "file_names": ["lineitem.parquet", "part.parquet", "lineitem.parquet"], + "local_files": {}, + "named_tables": { + "lineitem": "lineitem.parquet", + "part": "part.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q17.sql"), "substrait_query": get_substrait_plan("query_17_plan.json"), }, { "test_name": "test_tpch_sql_18", - "file_names": [ - "customer.parquet", - "orders.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "orders": "orders.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q18.sql"), "substrait_query": get_substrait_plan("query_18_plan.json"), }, { "test_name": "test_tpch_sql_19", - "file_names": ["lineitem.parquet", "part.parquet"], + "local_files": {}, + "named_tables": {"lineitem": "lineitem.parquet", "part": "part.parquet"}, "sql_query": get_sql("q19.sql"), "substrait_query": get_substrait_plan("query_19_plan.json"), }, { "test_name": "test_tpch_sql_20", - "file_names": [ - "supplier.parquet", - "nation.parquet", - "partsupp.parquet", - "part.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "nation": "nation.parquet", + "partsupp": "partsupp.parquet", + "part": "part.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q20.sql"), "substrait_query": get_substrait_plan("query_20_plan.json"), }, { "test_name": "test_tpch_sql_21", - "file_names": [ - "supplier.parquet", - "lineitem.parquet", - "orders.parquet", - "nation.parquet", - "lineitem.parquet", - "lineitem.parquet", - ], + "local_files": {}, + "named_tables": { + "supplier": "supplier.parquet", + "lineitem": "lineitem.parquet", + "orders": "orders.parquet", + "nation": "nation.parquet", + "lineitem": "lineitem.parquet", + "lineitem": "lineitem.parquet", + }, "sql_query": get_sql("q21.sql"), "substrait_query": get_substrait_plan("query_21_plan.json"), }, { "test_name": "test_tpch_sql_22", - "file_names": ["customer.parquet", "customer.parquet", "orders.parquet"], + "local_files": {}, + "named_tables": { + "customer": "customer.parquet", + "customer": "customer.parquet", + "orders": "orders.parquet", + }, "sql_query": get_sql("q22.sql"), "substrait_query": get_substrait_plan("query_22_plan.json"), }, diff --git a/substrait_consumer/tests/integration/test_acero_tpch.py b/substrait_consumer/tests/integration/test_acero_tpch.py index 7f512b4c..8c31d8d2 100644 --- a/substrait_consumer/tests/integration/test_acero_tpch.py +++ b/substrait_consumer/tests/integration/test_acero_tpch.py @@ -3,12 +3,11 @@ import duckdb import pyarrow as pa import pytest -from pyarrow import compute from substrait_consumer.common import SubstraitUtils from substrait_consumer.consumers.acero_consumer import AceroConsumer -from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.verification import verify_equals from substrait_consumer.tests.integration.queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -26,7 +25,7 @@ def setup_teardown_class(request): cls.db_connection = duckdb.connect() cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") - cls.duckdb_consumer = DuckDBConsumer(cls.db_connection) + cls.duckdb_producer = DuckDBProducer(cls.db_connection) cls.acero_consumer = AceroConsumer() cls.utils = SubstraitUtils() @@ -38,7 +37,8 @@ def setup_teardown_class(request): def test_isthmus_substrait_plan( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -56,8 +56,10 @@ def test_isthmus_substrait_plan( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. substrait_query: @@ -68,7 +70,7 @@ def test_isthmus_substrait_plan( # Format the substrait query to include the parquet file paths. # Calculate the result of running the substrait query plan. consumer = AceroConsumer() - consumer.setup(self.db_connection, file_names) + consumer.setup(self.db_connection, local_files, named_tables) subtrait_query_result_tb = consumer.run_substrait_query( substrait_query @@ -77,8 +79,8 @@ def test_isthmus_substrait_plan( # Reformat the sql query to be used by duck db by inserting all the # parquet filepaths where the table names should be. # Calculate results to verify against by running the SQL query on DuckDB - sql_query = self.utils.format_sql_query(sql_query, file_names) - duckdb_query_result_tb = self.db_connection.query(f"{sql_query}").arrow() + sql_query = self.duckdb_producer.format_sql(sql_query) + duckdb_query_result_tb = self.duckdb_producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_query_result_tb.column_names] @@ -115,13 +117,14 @@ def test_isthmus_substrait_plan( def test_duckdb_substrait_plan( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, ) -> None: """ - 1. Load all the parquet files into DuckDB as separate tables. + 1. Load all the parquet files into DuckDB as separate named_tables. 2. Format the SQL query to work with DuckDB by inserting all the table names. 3. Execute the SQL on DuckDB. 4. Produce the substrait plan with duckdb @@ -132,28 +135,24 @@ def test_duckdb_substrait_plan( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.duckdb_consumer.load_tables_from_parquet( - file_names - ) - - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.duckdb_producer.setup(self.db_connection, local_files, named_tables) + self.acero_consumer.setup(self.db_connection, local_files, named_tables) # Convert the SQL into a substrait query plan - duckdb_substrait_plan = self.db_connection.get_substrait_json(sql_query) - proto_bytes = duckdb_substrait_plan.fetchone()[0] + proto_bytes = self.duckdb_producer.produce_substrait(sql_query) # Run the duckdb produced substrait plan against Acero subtrait_query_result_tb = self.acero_consumer.run_substrait_query(proto_bytes) # Calculate results to verify against by running the SQL query on DuckDB - duckdb_sql_result_tb = self.db_connection.query(f"{sql_query}").arrow() + duckdb_sql_result_tb = self.duckdb_producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names] diff --git a/substrait_consumer/tests/integration/test_duckdb_tpch.py b/substrait_consumer/tests/integration/test_duckdb_tpch.py index 8d291cda..c143feaf 100644 --- a/substrait_consumer/tests/integration/test_duckdb_tpch.py +++ b/substrait_consumer/tests/integration/test_duckdb_tpch.py @@ -3,6 +3,7 @@ from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.verification import verify_equals from .queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -22,6 +23,7 @@ def setup_teardown_class(request): cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") cls.consumer = DuckDBConsumer(cls.db_connection) + cls.producer = DuckDBProducer(cls.db_connection) yield @@ -31,13 +33,14 @@ def setup_teardown_class(request): def test_substrait_query( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, ) -> None: """ - 1. Load all the parquet files into DuckDB as separate tables. + 1. Load all the parquet files into DuckDB as separate named_tables. 2. Format the SQL query to work with DuckDB by inserting all the table names. 3. Execute the SQL on DuckDB. 4. Run the substrait query plan. @@ -47,28 +50,23 @@ def test_substrait_query( Parameters: test_name: Name of test. - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ - - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.consumer.load_tables_from_parquet( - file_names - ) - - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.consumer.setup(self.db_connection, local_files, named_tables) + self.producer.setup(self.db_connection, local_files, named_tables) # Convert the SQL into a substrait query plan and run the plan. - substrait_plan = self.db_connection.get_substrait_json(sql_query) - proto_bytes = substrait_plan.fetchone()[0] + proto_bytes = self.producer.produce_substrait(sql_query) subtrait_query_result_tb = self.consumer.run_substrait_query(proto_bytes) # Calculate results to verify against by running the SQL query on DuckDB - duckdb_sql_result_tb = self.db_connection.query(f"{sql_query}").arrow() + duckdb_sql_result_tb = self.producer.run_sql_query(sql_query) col_names = [x.lower() for x in subtrait_query_result_tb.column_names] exp_col_names = [x.lower() for x in duckdb_sql_result_tb.column_names] diff --git a/substrait_consumer/tests/integration/test_tpch_plans_valid.py b/substrait_consumer/tests/integration/test_tpch_plans_valid.py index f2735593..224b1b57 100644 --- a/substrait_consumer/tests/integration/test_tpch_plans_valid.py +++ b/substrait_consumer/tests/integration/test_tpch_plans_valid.py @@ -6,6 +6,7 @@ from substrait_consumer.consumers.duckdb_consumer import DuckDBConsumer from substrait_consumer.parametrization import custom_parametrization +from substrait_consumer.producers.duckdb_producer import DuckDBProducer from substrait_consumer.producers.isthmus_producer import IsthmusProducer from .queries.tpch_test_cases import TPCH_QUERY_TESTS @@ -27,6 +28,7 @@ def setup_teardown_class(request): cls.db_connection.execute("INSTALL substrait") cls.db_connection.execute("LOAD substrait") cls.duckdb_consumer = DuckDBConsumer(cls.db_connection) + cls.duckdb_producer = DuckDBProducer(cls.db_connection) yield @@ -37,7 +39,8 @@ def test_isthmus_substrait_plan_generation( self, snapshot, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -46,8 +49,7 @@ def test_isthmus_substrait_plan_generation( Generate the substrait plans using Isthmus. """ producer = IsthmusProducer() - producer.set_db_connection(self.db_connection) - sql_query = producer.format_sql(sql_query, file_names) + producer.setup(self.db_connection, local_files, named_tables) substrait_query = producer.produce_substrait(sql_query) snapshot.snapshot_dir = PLAN_SNAPSHOT_DIR @@ -58,7 +60,8 @@ def test_isthmus_substrait_plan_generation( def test_isthmus_substrait_plans_valid( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -86,7 +89,8 @@ def test_isthmus_substrait_plans_valid( def test_duckdb_substrait_plans_valid( self, test_name: str, - file_names: list, + local_files: dict[str, str], + named_tables: dict[str, str], sql_query: str, substrait_query: str, sort_results: bool = False, @@ -95,8 +99,10 @@ def test_duckdb_substrait_plans_valid( Run the Duckdb generated substrait plans through the substrait validator. Parameters: - file_names: - List of parquet files. + local_files: + A `dict` mapping format argument names to local files paths. + named_tables: + A `dict` mapping table names to local file paths. sql_query: SQL query. """ @@ -112,14 +118,8 @@ def test_duckdb_substrait_plans_valid( # too few field names config.override_diagnostic_level(4003, "error", "info") - # Load the parquet files into DuckDB and return all the table names as a list - table_names = self.duckdb_consumer.load_tables_from_parquet( - file_names - ) - # Format the sql query by inserting all the table names - sql_query = sql_query.format(*table_names) + self.duckdb_producer.setup(self.db_connection, local_files, named_tables) - duckdb_substrait_plan = self.db_connection.get_substrait(sql_query) - proto_bytes = duckdb_substrait_plan.fetchone()[0] + proto_bytes = self.duckdb_producer.produce_substrait(sql_query) sv.check_plan_valid(proto_bytes, config)