Skip to content

Commit

Permalink
feat: implement query checking
Browse files Browse the repository at this point in the history
The change introduces a check_query callable which runs an extensible
compose pipeline of query checkers.

Note regarding QueryParseException: This custom exception is intended
to be a thin wrapper around a pyparsing ParseException that RDFLib
raises. This avoids introducing pyparsing as a dependency just to be able to
test against this exception. I feel like RDFLib should not raise a
pyparsing exception but provide a thin wrapper itself.
See RDFLib/rdflib#3057.

The check_query function runs in SPARQLModelAdapter to enable fast
failures on inapplicable queries. Note that this somewhat couples
QueryConstructor to SPARQLModelAdapter; QueryConstructor should be
marked private for this reason.

Closes #116. Closes #126.
  • Loading branch information
lu-pl committed Jan 28, 2025
1 parent b1ea529 commit 6d41840
Show file tree
Hide file tree
Showing 4 changed files with 116 additions and 1 deletion.
3 changes: 2 additions & 1 deletion rdfproxy/adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from rdfproxy.mapper import _ModelBindingsMapper
from rdfproxy.sparql_strategies import HttpxStrategy, SPARQLStrategy
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import Page, QueryParameters


Expand Down Expand Up @@ -40,7 +41,7 @@ def __init__(
sparql_strategy: type[SPARQLStrategy] = HttpxStrategy,
) -> None:
self._target = target
self._query = query
self._query = check_query(query)
self._model = model

self.sparql_strategy = sparql_strategy(self._target)
Expand Down
1 change: 1 addition & 0 deletions rdfproxy/constructor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from rdfproxy.utils._types import _TModelInstance
from rdfproxy.utils.checkers.query_checker import check_query
from rdfproxy.utils.models import QueryParameters
from rdfproxy.utils.sparql_utils import (
add_solution_modifier,
Expand Down
15 changes: 15 additions & 0 deletions rdfproxy/utils/_exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,18 @@ class InvalidGroupingKeyException(Exception):

class QueryConstructionException(Exception):
"""Exception for indicating failed SPARQL query construction."""


class UnsupportedQueryException(Exception):
"""Exception for indicating that a given SPARQL query is not supported."""


class QueryParseException(Exception):
"""Exception for indicating that a given SPARQL query raised a parse error.
This exception is intended to wrap and re-raise all exceptions
raised from parsing a SPARQL query with RDFLib's parseQuery function.
parseQuery raises a pyparsing.exceptions.ParseException,
which would require to introduce pyparsing as a dependency just for testing.
"""
98 changes: 98 additions & 0 deletions rdfproxy/utils/checkers/query_checker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
"""Functionality for performing checks on SPARQL queries."""

import logging
from typing import TypeVar
from typing import no_type_check

from rdflib.plugins.sparql.parser import parseQuery
from rdflib.plugins.sparql.parserutils import CompValue
from rdfproxy.utils._exceptions import QueryParseException, UnsupportedQueryException
from rdfproxy.utils.utils import compose_left


logger = logging.getLogger(__name__)

_TQuery = TypeVar("_TQuery", bound=str)


def _parse_query(query: _TQuery) -> tuple[_TQuery, CompValue]:
"""Check if a query is parsable.
This is meant to be the first component in check_query.
"""
logger.debug("Running parse check.")

try:
_parsed = parseQuery(query)
except Exception as e:
raise QueryParseException(e) from e
else:
_, parse_object = _parsed
return query, parse_object


def _check_select_query(data: tuple[_TQuery, CompValue]):
"""Check if a SPARQL query is a SELECT query.
This is meant to run as a component in check_query.
"""
logger.debug("Running SELECT query check.")

_, parse_object = data
if parse_object.name != "SelectQuery":
raise UnsupportedQueryException("Only SELECT queries are applicable.")
return data


def _check_solution_modifiers(
data: tuple[_TQuery, CompValue],
) -> tuple[_TQuery, CompValue]:
"""Check if a SPARQL query has a solution modifier.
This is meant to run as a component in check_query.
"""
logger.debug("Running solution modifier check.")

_, parse_object = data

def _has_modifier():
for mod_name in ["limitoffset", "groupby", "having", "orderby"]:
if (mod := getattr(parse_object, mod_name)) is not None:
return mod
return False

if mod := _has_modifier():
logger.critical("Detected solution modifier '%s' in outer query.", mod)
raise UnsupportedQueryException(
"Solution modifiers for top-level queries are currently not supported."
)

return data


def _get_query_string(data: tuple[_TQuery, CompValue]) -> _TQuery:
"""Return the query from a query/parse_object tuple.
This is meant to be the last component in check_query.
"""
query, _ = data
return query


@no_type_check
def check_query(query: _TQuery) -> _TQuery:
"""Check a SPARQL query by running a compose pipeline of checks.
The pipeline expects a SPARQL query string and
will return that string if all checks pass.
_parse_query is meant to be the first component
and _get_query_string is meant to be the last component.
"""
logger.debug("Running query check pipeline on '%s'", query)
return compose_left(
_parse_query,
_check_select_query,
_check_solution_modifiers,
_get_query_string,
)(query)

0 comments on commit 6d41840

Please sign in to comment.