From 4bd53af927054dcc1ac073a6bae5998d6ff3d482 Mon Sep 17 00:00:00 2001 From: Lukas Plank Date: Thu, 23 Jan 2025 12:44:46 +0100 Subject: [PATCH] feat: implement query checking The change introduces a check_query callable which runs an extensible compose pipeline of query checkers. Note regarding QueryParseException: This custom exception is intended to be a thin wrapper around a pyparsing ParseException that RDFLib raises. This avoids introducing pyparsing as a dependency just to be able to test against this exception. I feel like RDFLib should not raise a pyparsing exception but provide a thin wrapper itself. See https://github.com/RDFLib/rdflib/issues/3057. The check_query function runs in SPARQLModelAdapter to enable fast failures on inapplicable queries. Note that this somewhat couples QueryConstructor to SPARQLModelAdapter; QueryConstructor should be marked private for this reason. Closes #116. Closes #126. --- rdfproxy/adapter.py | 3 +- rdfproxy/constructor.py | 1 + rdfproxy/utils/_exceptions.py | 15 +++++++ rdfproxy/utils/checkers/query_checker.py | 53 ++++++++++++++++++++++++ rdfproxy/utils/sparql_utils.py | 9 ++++ 5 files changed, 80 insertions(+), 1 deletion(-) create mode 100644 rdfproxy/utils/checkers/query_checker.py diff --git a/rdfproxy/adapter.py b/rdfproxy/adapter.py index c45252f..65ff13a 100644 --- a/rdfproxy/adapter.py +++ b/rdfproxy/adapter.py @@ -9,6 +9,7 @@ from rdfproxy.mapper import _ModelBindingsMapper from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy from rdfproxy.utils._types import _TModelInstance +from rdfproxy.utils.checkers.query_checker import check_query from rdfproxy.utils.models import Page, QueryParameters @@ -40,7 +41,7 @@ def __init__( sparql_strategy: type[SPARQLStrategy] = HttpxStrategy, ) -> None: self._target = target - self._query = query + self._query = check_query(query) self._model = model self.sparql_strategy = sparql_strategy(self._target) diff --git a/rdfproxy/constructor.py b/rdfproxy/constructor.py index 8054bd3..9845444 100644 --- a/rdfproxy/constructor.py +++ b/rdfproxy/constructor.py @@ -1,4 +1,5 @@ from rdfproxy.utils._types import _TModelInstance +from rdfproxy.utils.checkers.query_checker import check_query from rdfproxy.utils.models import QueryParameters from rdfproxy.utils.sparql_utils import ( add_solution_modifier, diff --git a/rdfproxy/utils/_exceptions.py b/rdfproxy/utils/_exceptions.py index f72db45..eb3b436 100644 --- a/rdfproxy/utils/_exceptions.py +++ b/rdfproxy/utils/_exceptions.py @@ -11,3 +11,18 @@ class InvalidGroupingKeyException(Exception): class QueryConstructionException(Exception): """Exception for indicating failed SPARQL query construction.""" + + +class UnsupportedQueryException(Exception): + """Exception for indicating that a given SPARQL query is not supported.""" + + +class QueryParseException(Exception): + """Exception for indicating that a given SPARQL query raised a parse error. + + This exception is intended to wrap and re-raise all exceptions + raised from parsing a SPARQL query with RDFLib's parseQuery function. + + parseQuery raises a pyparsing.exceptions.ParseException, + which would require to introduce pyparsing as a dependency just for testing. + """ diff --git a/rdfproxy/utils/checkers/query_checker.py b/rdfproxy/utils/checkers/query_checker.py new file mode 100644 index 0000000..2f83083 --- /dev/null +++ b/rdfproxy/utils/checkers/query_checker.py @@ -0,0 +1,53 @@ +"""Functionality for performing checks on SPARQL queries.""" + +import logging +from typing import TypeVar + +from rdflib.plugins.sparql.parser import parseQuery +from rdfproxy.utils._exceptions import QueryParseException, UnsupportedQueryException +from rdfproxy.utils.sparql_utils import query_has_outer_solution_modifier +from rdfproxy.utils.utils import compose_left + + +logger = logging.getLogger(__name__) + +_TQuery = TypeVar("_TQuery", bound=str) + + +def _check_select_query(query: _TQuery) -> _TQuery: + """Check if a query is parsable and a SELECT query.""" + logger.debug("Running parsable SELECT check on '%s'", query) + + try: + parsed = parseQuery(query) + except Exception as e: + raise QueryParseException(e) from e + else: + _, query_type = parsed + if query_type.name != "SelectQuery": + raise UnsupportedQueryException("Only SELECT queries are applicable.") + + return query + + +def _check_solution_modifiers(query: _TQuery) -> _TQuery: + """Check if a query has solution modifiers. + + SPARQL queries with solution modifiers are currently not supported. + See https://github.com/acdh-oeaw/rdfproxy/issues/126. + """ + logger.debug("Running solution modifier check on '%s'", query) + + if (modifier := query_has_outer_solution_modifier(query)) is not None: + logger.critical("Detected solution modifier '%s' in outer query.", modifier) + + raise UnsupportedQueryException( + "Solution modifiers for top-level queries are currently not supported." + ) + return query + + +def check_query(query: _TQuery) -> _TQuery: + """Run a series of checks on a query.""" + logger.debug("Running query checks on '%s'", query) + return compose_left(_check_select_query, _check_solution_modifiers)(query) diff --git a/rdfproxy/utils/sparql_utils.py b/rdfproxy/utils/sparql_utils.py index 4eafea3..8910287 100644 --- a/rdfproxy/utils/sparql_utils.py +++ b/rdfproxy/utils/sparql_utils.py @@ -116,3 +116,12 @@ def get_query_projection(query: str) -> list[Variable]: return var case _: # pragma: no cover raise Exception("Unable to obtain query projection.") + + +def query_has_outer_solution_modifier(query: str) -> re.Match | None: + """Check if a query has an outer solution modifier. + + Returns the matched solution modifier keyword or None.""" + pattern = r"(DISTINCT|ORDER\s+BY|LIMIT|OFFSET|GROUP\s+BY)\b(?=[^{}]*$)" + result = re.search(pattern, query, re.I) + return result