Skip to content

Commit

Permalink
feat: implement query checking
Browse files Browse the repository at this point in the history
The change introduces a check_query callable which runs an extensible
compose pipeline of query checkers.

Note regarding QueryParseException: This custom exception is intended
to be a thin wrapper around a pyparsing ParseException that RDFLib
raises.
This avoids introducing pyparsing as a dependency just to be able to
test against this exception. I feel like RDFLib should not raise a
pyparsing exception but provide a thin wrapper itself.
See RDFLib/rdflib#3057.

The check_query function runs in SPARQLModelAdapter to enable fast
failures on inapplicable queries. Note that this somewhat couples
QueryConstructor to SPARQLModelAdapter; QueryConstructor should be
marked private for this reason.

Closes #116. Closes #126.
  • Loading branch information
lu-pl committed Jan 27, 2025
1 parent b1ea529 commit e7c8a13
Show file tree
Hide file tree
Showing 5 changed files with 84 additions and 1 deletion.
3 changes: 2 additions & 1 deletion rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rdfproxy.mapper import _ModelBindingsMapper
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import Page, QueryParameters


Expand Down Expand Up @@ -40,7 +41,7 @@ def __init__(
sparql_strategy: type[SPARQLStrategy] = HttpxStrategy,
) -> None:
self._target = target
self._query = query
self._query = check_query(query)
self._model = model

self.sparql_strategy = sparql_strategy(self._target)
Expand Down
1 change: 1 addition & 0 deletions rdfproxy/constructor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import QueryParameters
from rdfproxy.utils.sparql_utils import (
add_solution_modifier,
Expand Down
15 changes: 15 additions & 0 deletions rdfproxy/utils/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,18 @@ class InvalidGroupingKeyException(Exception):

class QueryConstructionException(Exception):
"""Exception for indicating failed SPARQL query construction."""


class UnsupportedQueryException(Exception):
"""Exception for indicating that a given SPARQL query is not supported."""


class QueryParseException(Exception):
"""Exception for indicating that a given SPARQL query raised a parse error.
This exception is intended to wrap and re-raise all exceptions
raised from parsing a SPARQL query with RDFLib's parseQuery function.
parseQuery raises a pyparsing.exceptions.ParseException,
which would require to introduce pyparsing as a dependency just for testing.
"""
53 changes: 53 additions & 0 deletions rdfproxy/utils/checkers/query_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Functionality for performing checks on SPARQL queries."""

import logging
from typing import TypeVar

from rdflib.plugins.sparql.parser import parseQuery
from rdfproxy.utils._exceptions import QueryParseException, UnsupportedQueryException
from rdfproxy.utils.sparql_utils import query_has_outer_solution_modifier
from rdfproxy.utils.utils import compose_left


logger = logging.getLogger(__name__)

_TQuery = TypeVar("_TQuery", bound=str)


def _check_select_query(query: _TQuery) -> _TQuery:
"""Check if a query is parsable and a SELECT query."""
logger.debug("Running parsable SELECT check on '%s'", query)

try:
parsed = parseQuery(query)
except Exception as e:
raise QueryParseException(e) from e
else:
_, query_type = parsed
if query_type.name != "SelectQuery":
raise UnsupportedQueryException("Only SELECT queries are applicable.")

return query


def _check_solution_modifiers(query: _TQuery) -> _TQuery:
"""Check if a query has solution modifiers.
SPARQL queries with solution modifiers are currently not supported.
See https://github.com/acdh-oeaw/rdfproxy/issues/126.
"""
logger.debug("Running solution modifier check on '%s'", query)

if (modifier := query_has_outer_solution_modifier(query)) is not None:
logger.critical("Detected solution modifier '%s' in outer query.", modifier)

raise UnsupportedQueryException(
"Solution modifiers for top-level queries are currently not supported."
)
return query


def check_query(query: _TQuery) -> _TQuery:
"""Run a series of checks on a query."""
logger.debug("Running query checks on '%s'", query)
return compose_left(_check_select_query, _check_solution_modifiers)(query)
13 changes: 13 additions & 0 deletions rdfproxy/utils/sparql_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,16 @@ def get_query_projection(query: str) -> list[Variable]:
return var
case _: # pragma: no cover
raise Exception("Unable to obtain query projection.")


def query_has_outer_solution_modifier(query: str) -> re.Match | None:
"""Check if a query has an outer solution modifier.
Returns the matched solution modifier keyword or None.
The regex does not look for HAVING because it is tied to GROUP BY.
See https://www.w3.org/TR/sparql11-query/#rSolutionModifier.
"""
pattern = r"(ORDER\s+BY|LIMIT|OFFSET|GROUP\s+BY)\b(?=[^{}]*$)"
result = re.search(pattern, query, re.I)
return result

0 comments on commit e7c8a13

Please sign in to comment.