From 75ab0467c1b10bc1bc04c10521953d1f0088589d Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Mon, 23 Dec 2024 14:55:33 -0500 Subject: [PATCH 1/2] [ENG-6284] render tsv/csv (#834) allow rendering search responses as lines of tab-separated or comma-separated values main point: - add simple_tsv and simple_csv renderers in trove.render - - can be seen with query param acceptMediatype=text/tab-separated-values or acceptMediatype=text/csv - - get default columns from static DEFAULT_TABULAR_SEARCH_COLUMN_PATHS in trove.vocab.osfmap - allow "download" responses -- add withFileName=foo query param to get a response with Content-Disposition: attachment and a filename based on "foo" - allow absurd page sizes changes made along the way: - introduce ProtoRendering as renderer output type, to better decouple rendering from view logic - - include StreamableRendering for responses that might could be streamed, like csv/tsv (tho it's not currently handled any differently from SimpleRendering) - - reshape BaseRenderer (and each existing renderer) to have a consistent call signature (and return ProtoRendering) - - - replace trove.render.get_renderer with trove.render.get_renderer_type -- instantiate the renderer with response data - add trove.views._responder with common logic for building a django HttpResponse for a ProtoRendering - - consistently handles withFileName/Content-Disposition - move some osf-specific constants to trove.vocab.osfmap for easier reuse - pull out some abstractable logic: - - from existing trove.render.simple_json into trove.render._simple_trovesearch (for renderers that include only the list of search results) - - from existing tests.trove.derive._base into tests.trove._input_output_tests (for tests following the same simple input/output pattern as deriver and renderer tests) - add tests.trove.render to cover the new renderers simple_tsv and simple_csv, as well as the existing renderers jsonapi, simple_json, jsonld, and turtle - - minimally update existing renderers to create consistent output --- requirements.txt | 2 +- share/search/index_strategy/_base.py | 10 +- .../index_strategy/_trovesearch_util.py | 22 +- .../index_strategy/trove_indexcard_flats.py | 54 ++- .../index_strategy/trovesearch_denorm.py | 56 +-- .../_common_trovesearch_tests.py | 39 +- tests/trove/_input_output_tests.py | 69 ++++ tests/trove/derive/_base.py | 74 +--- tests/trove/derive/test_osfmap_json.py | 2 +- tests/trove/derive/test_sharev2_elastic.py | 9 +- tests/trove/render/__init__.py | 0 tests/trove/render/_base.py | 82 +++++ tests/trove/render/_inputs.py | 114 ++++++ tests/trove/render/test_jsonapi_renderer.py | 249 +++++++++++++ tests/trove/render/test_jsonld_renderer.py | 233 ++++++++++++ .../trove/render/test_simple_csv_renderer.py | 24 ++ .../trove/render/test_simple_json_renderer.py | 61 ++++ .../trove/render/test_simple_tsv_renderer.py | 24 ++ tests/trove/render/test_turtle_renderer.py | 116 ++++++ trove/exceptions.py | 4 + trove/extract/legacy_sharev2.py | 5 +- trove/render/__init__.py | 34 +- trove/render/_base.py | 101 +++--- trove/render/_rendering.py | 47 +++ trove/render/_simple_trovesearch.py | 109 ++++++ trove/render/html_browse.py | 112 +++--- trove/render/jsonapi.py | 53 +-- trove/render/jsonld.py | 19 +- trove/render/simple_csv.py | 160 ++++++++ trove/render/simple_json.py | 88 ++--- trove/render/simple_tsv.py | 10 + trove/render/turtle.py | 8 +- trove/trovesearch/page_cursor.py | 75 +++- .../{search_response.py => search_handle.py} | 125 ++++--- trove/trovesearch/search_params.py | 93 ++++- trove/trovesearch/trovesearch_gathering.py | 342 ++++++++++++------ trove/views/_responder.py | 72 ++++ trove/views/browse.py | 16 +- trove/views/indexcard.py | 30 +- trove/views/search.py | 162 +++++---- trove/views/vocab.py | 28 +- trove/vocab/mediatypes.py | 20 + trove/vocab/osfmap.py | 25 ++ trove/vocab/trove.py | 24 +- 44 files changed, 2366 insertions(+), 636 deletions(-) create mode 100644 tests/trove/_input_output_tests.py create mode 100644 tests/trove/render/__init__.py create mode 100644 tests/trove/render/_base.py create mode 100644 tests/trove/render/_inputs.py create mode 100644 tests/trove/render/test_jsonapi_renderer.py create mode 100644 tests/trove/render/test_jsonld_renderer.py create mode 100644 tests/trove/render/test_simple_csv_renderer.py create mode 100644 tests/trove/render/test_simple_json_renderer.py create mode 100644 tests/trove/render/test_simple_tsv_renderer.py create mode 100644 tests/trove/render/test_turtle_renderer.py create mode 100644 trove/render/_rendering.py create mode 100644 trove/render/_simple_trovesearch.py create mode 100644 trove/render/simple_csv.py create mode 100644 trove/render/simple_tsv.py rename trove/trovesearch/{search_response.py => search_handle.py} (51%) create mode 100644 trove/views/_responder.py diff --git a/requirements.txt b/requirements.txt index ec6cf1faf..bee50f6de 100644 --- a/requirements.txt +++ b/requirements.txt @@ -43,4 +43,4 @@ xmltodict==0.12.0 # MIT # Allows custom-rendered IDs, hiding null values, and including data in error responses git+https://github.com/cos-forks/django-rest-framework-json-api.git@v4.2.1+cos0 -git+https://github.com/aaxelb/primitive_metadata.git@0.2024.09 +git+https://github.com/aaxelb/primitive_metadata.git@0.2024.14 diff --git a/share/search/index_strategy/_base.py b/share/search/index_strategy/_base.py index 92ce83c8f..bafec1fa4 100644 --- a/share/search/index_strategy/_base.py +++ b/share/search/index_strategy/_base.py @@ -12,9 +12,9 @@ CardsearchParams, ValuesearchParams, ) -from trove.trovesearch.search_response import ( - CardsearchResponse, - ValuesearchResponse, +from trove.trovesearch.search_handle import ( + CardsearchHandle, + ValuesearchHandle, ) @@ -219,10 +219,10 @@ def pls_stop_keeping_live(self): def pls_handle_search__sharev2_backcompat(self, request_body=None, request_queryparams=None) -> dict: raise NotImplementedError(f'{self.__class__.__name__} does not implement pls_handle_search__sharev2_backcompat (either implement it or don\'t use this strategy for backcompat)') - def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchResponse: + def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchHandle: raise NotImplementedError - def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse: + def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchHandle: raise NotImplementedError def pls_get_mappings(self) -> dict: diff --git a/share/search/index_strategy/_trovesearch_util.py b/share/search/index_strategy/_trovesearch_util.py index b14422b21..fbc09c8f3 100644 --- a/share/search/index_strategy/_trovesearch_util.py +++ b/share/search/index_strategy/_trovesearch_util.py @@ -19,17 +19,15 @@ ) from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri from trove.vocab.namespaces import ( - DCTERMS, - FOAF, - OSFMAP, OWL, RDF, - RDFS, - SKOS, TROVE, XSD, ) -from trove.vocab.osfmap import is_date_property +from trove.vocab.osfmap import ( + is_date_property, + SKIPPABLE_PROPERTIES, +) _logger = logging.getLogger(__name__) @@ -38,16 +36,6 @@ ### # constants -SKIPPABLE_PROPERTIES = ( - OSFMAP.contains, # too much, not helpful - OWL.sameAs, # handled special -) - -TITLE_PROPERTIES = (DCTERMS.title,) -NAME_PROPERTIES = (FOAF.name, OSFMAP.fileName) -LABEL_PROPERTIES = (RDFS.label, SKOS.prefLabel, SKOS.altLabel) -NAMELIKE_PROPERTIES = (*TITLE_PROPERTIES, *NAME_PROPERTIES, *LABEL_PROPERTIES) - KEYWORD_LENGTH_MAX = 8191 # skip keyword terms that might exceed lucene's internal limit # (see https://www.elastic.co/guide/en/elasticsearch/reference/current/ignore-above.html) KEYWORD_MAPPING = {'type': 'keyword', 'ignore_above': KEYWORD_LENGTH_MAX} @@ -160,7 +148,7 @@ def __post_init__(self): self.integer_values[_walk_path].add(_walk_obj) elif isinstance(_walk_obj, rdf.Literal): if XSD.integer in _walk_obj.datatype_iris: - self.integer_values[_walk_path].add(_walk_obj) + self.integer_values[_walk_path].add(int(_walk_obj.unicode_value)) if {RDF.string, RDF.langString}.intersection(_walk_obj.datatype_iris): self.text_values[_walk_path].add(_walk_obj) # try for date in a date property, regardless of the above diff --git a/share/search/index_strategy/trove_indexcard_flats.py b/share/search/index_strategy/trove_indexcard_flats.py index b9bfbd33b..dceb272df 100644 --- a/share/search/index_strategy/trove_indexcard_flats.py +++ b/share/search/index_strategy/trove_indexcard_flats.py @@ -31,24 +31,20 @@ SortParam, GLOB_PATHSTEP, ) -from trove.trovesearch.search_response import ( - CardsearchResponse, - ValuesearchResponse, +from trove.trovesearch.search_handle import ( + CardsearchHandle, + ValuesearchHandle, TextMatchEvidence, CardsearchResult, ValuesearchResult, PropertypathUsage, ) from trove.util.iris import get_sufficiently_unique_iri, is_worthwhile_iri, iri_path_as_keyword -from trove.vocab.osfmap import is_date_property +from trove.vocab import osfmap from trove.vocab.namespaces import RDF, OWL from ._trovesearch_util import ( latest_rdf_for_indexcard_pks, GraphWalk, - TITLE_PROPERTIES, - NAME_PROPERTIES, - LABEL_PROPERTIES, - NAMELIKE_PROPERTIES, KEYWORD_LENGTH_MAX, ) @@ -288,7 +284,7 @@ def pls_handle_search__sharev2_backcompat(self, request_body=None, request_query params=(request_queryparams or {}), ) - def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchResponse: + def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchHandle: _cursor = self._cardsearch_cursor(cardsearch_params) _sort = self._cardsearch_sort(cardsearch_params.sort_list) _query = self._cardsearch_query( @@ -306,7 +302,7 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear aggs=self._cardsearch_aggs(cardsearch_params), sort=_sort, from_=_from_offset, - size=_cursor.page_size, + size=_cursor.bounded_page_size, source=False, # no need to get _source; _id is enough ) if settings.DEBUG: @@ -318,11 +314,11 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear ) except elasticsearch8.TransportError as error: raise exceptions.IndexStrategyError() from error # TODO: error messaging - return self._cardsearch_response(cardsearch_params, _es8_response, _cursor) + return self._cardsearch_handle(cardsearch_params, _es8_response, _cursor) - def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse: + def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchHandle: _cursor = OffsetCursor.from_cursor(valuesearch_params.page_cursor) - _is_date_search = is_date_property(valuesearch_params.valuesearch_propertypath[-1]) + _is_date_search = osfmap.is_date_property(valuesearch_params.valuesearch_propertypath[-1]) _search_kwargs = dict( query=self._cardsearch_query( valuesearch_params.cardsearch_filter_set, @@ -347,7 +343,7 @@ def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> Value ) except elasticsearch8.TransportError as error: raise exceptions.IndexStrategyError() from error # TODO: error messaging - return self._valuesearch_response(valuesearch_params, _es8_response, _cursor) + return self._valuesearch_handle(valuesearch_params, _es8_response, _cursor) ### # query implementation @@ -449,7 +445,7 @@ def _valuesearch_iri_aggs(self, valuesearch_params: ValuesearchParams, cursor: O _nested_terms_agg = { 'field': 'nested_iri.iri_value', # WARNING: terribly inefficient pagination (part one) - 'size': cursor.start_offset + cursor.page_size + 1, + 'size': cursor.start_offset + cursor.bounded_page_size + 1, } _iris = list(valuesearch_params.valuesearch_iris()) if _iris: @@ -526,7 +522,7 @@ def _valuesearch_date_aggs(self, valuesearch_params: ValuesearchParams): } return _aggs - def _valuesearch_response( + def _valuesearch_handle( self, valuesearch_params: ValuesearchParams, es8_response: dict, @@ -537,31 +533,33 @@ def _valuesearch_response( _buckets = _iri_aggs['value_at_propertypath']['iri_values']['buckets'] _bucket_count = len(_buckets) # WARNING: terribly inefficient pagination (part two) - _page_end_index = cursor.start_offset + cursor.page_size + _page_end_index = cursor.start_offset + cursor.bounded_page_size _bucket_page = _buckets[cursor.start_offset:_page_end_index] # discard prior pages cursor.total_count = ( MANY_MORE if (_bucket_count > _page_end_index) # agg includes one more, if there else _bucket_count ) - return ValuesearchResponse( + return ValuesearchHandle( cursor=cursor, search_result_page=[ self._valuesearch_iri_result(_iri_bucket) for _iri_bucket in _bucket_page ], + search_params=valuesearch_params, ) else: # assume date _year_buckets = ( es8_response['aggregations']['in_nested_date'] ['value_at_propertypath']['count_by_year']['buckets'] ) - return ValuesearchResponse( + return ValuesearchHandle( cursor=PageCursor(len(_year_buckets)), search_result_page=[ self._valuesearch_date_result(_year_bucket) for _year_bucket in _year_buckets ], + search_params=valuesearch_params, ) def _valuesearch_iri_result(self, iri_bucket): @@ -664,7 +662,7 @@ def _iter_nested_date_filters(self, search_filter) -> Iterator[dict]: else: raise ValueError(f'invalid date filter operator (got {search_filter.operator})') - def _cardsearch_sort(self, sort_list: tuple[SortParam]): + def _cardsearch_sort(self, sort_list: tuple[SortParam, ...]): if not sort_list: return None return [ @@ -683,12 +681,12 @@ def _cardsearch_sort(self, sort_list: tuple[SortParam]): for _sortparam in sort_list ] - def _cardsearch_response( + def _cardsearch_handle( self, cardsearch_params: CardsearchParams, es8_response: dict, cursor: OffsetCursor, - ) -> CardsearchResponse: + ) -> CardsearchHandle: _es8_total = es8_response['hits']['total'] if _es8_total['relation'] != 'eq': cursor.total_count = MANY_MORE @@ -717,11 +715,11 @@ def _cardsearch_response( for _bucket in es8_response['aggregations']['related_propertypath_usage']['buckets']: _path = tuple(json.loads(_bucket['key'])) _relatedproperty_by_path[_path].usage_count += _bucket['doc_count'] - return CardsearchResponse( + return CardsearchHandle( cursor=cursor, search_result_page=_results, related_propertypath_results=_relatedproperty_list, - cardsearch_params=cardsearch_params, + search_params=cardsearch_params, ) def _gather_textmatch_evidence(self, es8_hit) -> Iterable[TextMatchEvidence]: @@ -833,7 +831,7 @@ def _inner_hits(self, *, highlight_query=None) -> dict: def _should_skip_card(indexcard_rdf, rdfdoc): # skip cards without some value for name/title/label - return not any(rdfdoc.q(indexcard_rdf.focus_iri, NAMELIKE_PROPERTIES)) + return not any(rdfdoc.q(indexcard_rdf.focus_iri, osfmap.NAMELIKE_PROPERTIES)) def _bucketlist(agg_result: dict) -> list[str]: @@ -911,17 +909,17 @@ def for_iri_at_path(cls, path: tuple[str, ...], iri: str, rdfdoc): # TODO: don't discard language for name/title/label name_text=frozenset( _text.unicode_value - for _text in rdfdoc.q(iri, NAME_PROPERTIES) + for _text in rdfdoc.q(iri, osfmap.NAME_PROPERTIES) if isinstance(_text, primitive_rdf.Literal) ), title_text=frozenset( _text.unicode_value - for _text in rdfdoc.q(iri, TITLE_PROPERTIES) + for _text in rdfdoc.q(iri, osfmap.TITLE_PROPERTIES) if isinstance(_text, primitive_rdf.Literal) ), label_text=frozenset( _text.unicode_value - for _text in rdfdoc.q(iri, LABEL_PROPERTIES) + for _text in rdfdoc.q(iri, osfmap.LABEL_PROPERTIES) if isinstance(_text, primitive_rdf.Literal) ), ) diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index 83402272e..a40242112 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -36,15 +36,15 @@ ValuesearchParams, is_globpath, ) -from trove.trovesearch.search_response import ( - CardsearchResponse, +from trove.trovesearch.search_handle import ( + CardsearchHandle, CardsearchResult, PropertypathUsage, TextMatchEvidence, - ValuesearchResponse, + ValuesearchHandle, ValuesearchResult, ) -from trove.vocab.osfmap import is_date_property +from trove.vocab import osfmap from trove.vocab.namespaces import OWL, RDF from . import _trovesearch_util as ts @@ -202,7 +202,15 @@ def pls_handle_search__sharev2_backcompat(self, request_body=None, request_query ) # abstract method from IndexStrategy.SpecificIndex - def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchResponse: + def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> CardsearchHandle: + # cases to handle: + # - sort by field value (streamable) + # - sort by relevance to text (non-streamable) + # - random sort (...non-streamable?) + # - first page (full random) + # - subsequent page (reproducibly randomm) + # (for streaming pages, skip aggs and such on subsequents) + # maybe start with a "header" request (no hits, minimal aggs) _querybuilder = _CardsearchQueryBuilder(cardsearch_params) _search_kwargs = _querybuilder.build() if settings.DEBUG: @@ -220,17 +228,17 @@ def pls_handle_cardsearch(self, cardsearch_params: CardsearchParams) -> Cardsear ) except elasticsearch8.TransportError as error: raise exceptions.IndexStrategyError() from error # TODO: error messaging - return self.index_strategy._cardsearch_response( + return self.index_strategy._cardsearch_handle( cardsearch_params, _es8_response, _querybuilder.response_cursor, ) # abstract method from IndexStrategy.SpecificIndex - def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchResponse: + def pls_handle_valuesearch(self, valuesearch_params: ValuesearchParams) -> ValuesearchHandle: _path = valuesearch_params.valuesearch_propertypath _cursor = OffsetCursor.from_cursor(valuesearch_params.page_cursor) - _is_date_search = is_date_property(_path[-1]) + _is_date_search = osfmap.is_date_property(_path[-1]) _query = ( _build_date_valuesearch(valuesearch_params) if _is_date_search @@ -275,7 +283,7 @@ def should_skip(self) -> bool: # skip cards that belong to an obsolete suid with a later duplicate _suid.has_forecompat_replacement() # ...or that are without some value for name/title/label - or not any(self.rdfdoc.q(self.focus_iri, ts.NAMELIKE_PROPERTIES)) + or not any(self.rdfdoc.q(self.focus_iri, osfmap.NAMELIKE_PROPERTIES)) ) def build_docs(self) -> Iterator[tuple[str, dict]]: @@ -319,9 +327,9 @@ def _iri_value_subdoc(self, iri: str) -> dict: _shortwalk = self._fullwalk.shortwalk_from(iri) return { **self._paths_and_values(_shortwalk), - 'value_name': list(self._texts_at_properties(_shortwalk, ts.NAME_PROPERTIES)), - 'value_title': list(self._texts_at_properties(_shortwalk, ts.TITLE_PROPERTIES)), - 'value_label': list(self._texts_at_properties(_shortwalk, ts.LABEL_PROPERTIES)), + 'value_name': list(self._texts_at_properties(_shortwalk, osfmap.NAME_PROPERTIES)), + 'value_title': list(self._texts_at_properties(_shortwalk, osfmap.TITLE_PROPERTIES)), + 'value_label': list(self._texts_at_properties(_shortwalk, osfmap.LABEL_PROPERTIES)), 'at_card_propertypaths': [ ts.propertypath_as_keyword(_path) for _path in self._fullwalk.paths_by_iri[iri] @@ -408,42 +416,44 @@ def _valuesearch_iris_response( valuesearch_params: ValuesearchParams, es8_response: dict, cursor: OffsetCursor, - ) -> ValuesearchResponse: + ) -> ValuesearchHandle: _iri_aggs = es8_response['aggregations'].get('agg_valuesearch_iris') _buckets = _iri_aggs['buckets'] _bucket_count = len(_buckets) # WARNING: terribly hacky pagination (part two) - _page_end_index = cursor.start_offset + cursor.page_size + _page_end_index = cursor.start_offset + cursor.bounded_page_size _bucket_page = _buckets[cursor.start_offset:_page_end_index] # discard prior pages cursor.total_count = ( MANY_MORE if (_bucket_count > _page_end_index) # agg includes one more, if there else _bucket_count ) - return ValuesearchResponse( + return ValuesearchHandle( cursor=cursor, search_result_page=[ self._valuesearch_iri_result(_iri_bucket) for _iri_bucket in _bucket_page ], + search_params=valuesearch_params, ) def _valuesearch_dates_response( self, valuesearch_params: ValuesearchParams, es8_response: dict, - ) -> ValuesearchResponse: + ) -> ValuesearchHandle: _year_buckets = ( es8_response['aggregations'] ['agg_valuesearch_dates'] ['buckets'] ) - return ValuesearchResponse( + return ValuesearchHandle( cursor=PageCursor(len(_year_buckets)), search_result_page=[ self._valuesearch_date_result(_year_bucket) for _year_bucket in _year_buckets ], + search_params=valuesearch_params, ) def _valuesearch_iri_result(self, iri_bucket) -> ValuesearchResult: @@ -464,12 +474,12 @@ def _valuesearch_date_result(self, date_bucket) -> ValuesearchResult: match_count=date_bucket['doc_count'], ) - def _cardsearch_response( + def _cardsearch_handle( self, cardsearch_params: CardsearchParams, es8_response: dict, cursor: OffsetCursor, - ) -> CardsearchResponse: + ) -> CardsearchHandle: _es8_total = es8_response['hits']['total'] if _es8_total['relation'] != 'eq': cursor.total_count = MANY_MORE @@ -498,11 +508,11 @@ def _cardsearch_response( } for _bucket in es8_response['aggregations']['agg_related_propertypath_usage']['buckets']: _relatedproperty_by_pathkey[_bucket['key']].usage_count += _bucket['doc_count'] - return CardsearchResponse( + return CardsearchHandle( cursor=cursor, search_result_page=_results, related_propertypath_results=_relatedproperty_list, - cardsearch_params=cardsearch_params, + search_params=cardsearch_params, ) def _gather_textmatch_evidence(self, card_iri, es8_hit) -> Iterator[TextMatchEvidence]: @@ -676,7 +686,7 @@ def build(self): 'aggs': self._cardsearch_aggs(), 'sort': list(self._cardsearch_sorts()) or None, 'from_': self._cardsearch_start_offset(), - 'size': self.response_cursor.page_size, + 'size': self.response_cursor.bounded_page_size, } @functools.cached_property @@ -805,7 +815,7 @@ def _build_iri_valuesearch(params: ValuesearchParams, cursor: OffsetCursor) -> d 'terms': { 'field': 'iri_value.single_focus_iri', # WARNING: terribly hacky pagination (part one) - 'size': cursor.start_offset + cursor.page_size + 1, + 'size': cursor.start_offset + cursor.bounded_page_size + 1, }, 'aggs': { 'agg_type_iri': {'terms': { diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index 53de73720..81461a34e 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -1,8 +1,8 @@ from typing import Iterable, Iterator -import dataclasses from datetime import date, timedelta import math from urllib.parse import urlencode +from unittest import mock from primitive_metadata import primitive_rdf as rdf @@ -10,7 +10,7 @@ from share.search import messages from trove import models as trove_db from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams -from trove.trovesearch.search_response import PropertypathUsage +from trove.trovesearch.search_handle import PropertypathUsage from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT from ._with_real_services import RealElasticTestCase @@ -122,18 +122,18 @@ def test_cardsearch_pagination(self): _result_iris: set[str] = set() _page_count = 0 while True: - _cardsearch_response = self.current_index.pls_handle_cardsearch( + _cardsearch_handle = self.current_index.pls_handle_cardsearch( CardsearchParams.from_querystring(_querystring), ) _page_iris = { self._indexcard_focus_by_uuid[_result.card_uuid] - for _result in _cardsearch_response.search_result_page + for _result in _cardsearch_handle.search_result_page } self.assertFalse(_result_iris.intersection(_page_iris)) self.assertLessEqual(len(_page_iris), _page_size) _result_iris.update(_page_iris) _page_count += 1 - _next_cursor = _cardsearch_response.cursor.next_cursor() + _next_cursor = _cardsearch_handle.cursor.next_cursor() if _next_cursor is None: break _querystring = urlencode({'page[cursor]': _next_cursor.as_queryparam_value()}) @@ -142,20 +142,21 @@ def test_cardsearch_pagination(self): def test_cardsearch_related_properties(self): self._fill_test_data_for_querying() - _cardsearch_params = dataclasses.replace( - CardsearchParams.from_querystring(''), - related_property_paths=( + with mock.patch( + 'trove.trovesearch.search_params.suggested_property_paths', + return_value=( (DCTERMS.creator,), (DCTERMS.references,), (BLARG.nada,), ), - ) - _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) - self.assertEqual(_cardsearch_response.related_propertypath_results, [ - PropertypathUsage((DCTERMS.creator,), 3), - PropertypathUsage((DCTERMS.references,), 2), - PropertypathUsage((BLARG.nada,), 0), - ]) + ): + _cardsearch_params = CardsearchParams.from_querystring('') + _cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params) + self.assertEqual(_cardsearch_handle.related_propertypath_results, [ + PropertypathUsage((DCTERMS.creator,), 3), + PropertypathUsage((DCTERMS.references,), 2), + PropertypathUsage((BLARG.nada,), 0), + ]) def test_valuesearch(self): self._fill_test_data_for_querying() @@ -211,11 +212,11 @@ def _assert_cardsearch_iris(self, queryparams: dict, expected_focus_iris: Iterab _querystring = urlencode(queryparams) _cardsearch_params = CardsearchParams.from_querystring(_querystring) assert isinstance(_cardsearch_params, CardsearchParams) - _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) + _cardsearch_handle = self.current_index.pls_handle_cardsearch(_cardsearch_params) # assumes all results fit on one page _actual_result_iris: set[str] | list[str] = [ self._indexcard_focus_by_uuid[_result.card_uuid] - for _result in _cardsearch_response.search_result_page + for _result in _cardsearch_handle.search_result_page ] # test sort order only when expected results are ordered if isinstance(expected_focus_iris, set): @@ -226,11 +227,11 @@ def _assert_valuesearch_values(self, queryparams, expected_values): _querystring = urlencode(queryparams) _valuesearch_params = ValuesearchParams.from_querystring(_querystring) assert isinstance(_valuesearch_params, ValuesearchParams) - _valuesearch_response = self.current_index.pls_handle_valuesearch(_valuesearch_params) + _valuesearch_handle = self.current_index.pls_handle_valuesearch(_valuesearch_params) # assumes all results fit on one page _actual_values = { _result.value_iri or _result.value_value - for _result in _valuesearch_response.search_result_page + for _result in _valuesearch_handle.search_result_page } self.assertEqual(expected_values, _actual_values, msg=f'?{_querystring}') diff --git a/tests/trove/_input_output_tests.py b/tests/trove/_input_output_tests.py new file mode 100644 index 000000000..db7e822ff --- /dev/null +++ b/tests/trove/_input_output_tests.py @@ -0,0 +1,69 @@ +import abc +import pprint +from unittest import TestCase +import typing + + +class BasicInputOutputTestCase(TestCase): + '''base for tests that have a simple/repetitive input/output pattern + ''' + maxDiff = None # usually want the full diff for these tests, tho can override if you prefer + + # expected on subclasses: + inputs: typing.ClassVar[ + dict[str, typing.Any] + ] + expected_outputs: typing.ClassVar[ + # keys should match `inputs` keys (enforce with types? maybe someday) + dict[str, typing.Any] + ] + + # required in subclasses + @abc.abstractmethod + def compute_output(self, given_input: typing.Any) -> typing.Any: + raise NotImplementedError + + # (optional override, for when equality isn't so easy) + def assert_outputs_equal(self, expected_output: typing.Any, actual_output: typing.Any) -> None: + self.assertEqual(expected_output, actual_output) + + # (optional override, for when logic is more complicated) + def run_input_output_test(self, given_input, expected_output): + _actual_output = self.compute_output(given_input) + self.assert_outputs_equal(expected_output, _actual_output) + + # (optional override, for when logic is more complicated) + def missing_case(self, name: str, given_input): + _cls = self.__class__ + _actual_output = self.compute_output(given_input) + raise NotImplementedError('\n'.join(( + 'missing test case!', + f'\tadd "{name}" to {_cls.__module__}.{_cls.__qualname__}.expected_outputs', + '\tactual output, fwiw:', + pprint.pformat(_actual_output), + ))) + + ### + # private details + + def __init_subclass__(cls, **kwargs): + super().__init_subclass__(**kwargs) + # HACK: assign `test_*` method only on concrete subclasses, + # so the test runner doesn't try instantiating a base class + if getattr(cls, 'inputs', None) and getattr(cls, 'expected_outputs', None): + cls.test_inputs_match_outputs = cls._test_inputs_match_outputs # type: ignore[attr-defined] + + # the only actual test method -- assigned to concrete subclasses in __init_subclass__ + def _test_inputs_match_outputs(self): + for _name, _input, _expected_output in self._iter_cases(): + with self.subTest(name=_name): + self.run_input_output_test(_input, _expected_output) + + def _iter_cases(self) -> typing.Iterator[tuple[str, typing.Any, typing.Any]]: + # yields (name, input, expected_output) tuples + for _name, _input in self.inputs.items(): + try: + _expected_output = self.expected_outputs[_name] + except KeyError: + self.missing_case(_name, _input) + yield (_name, _input, _expected_output) diff --git a/tests/trove/derive/_base.py b/tests/trove/derive/_base.py index b15a6d20b..bf07e659f 100644 --- a/tests/trove/derive/_base.py +++ b/tests/trove/derive/_base.py @@ -1,48 +1,35 @@ import datetime -from unittest import mock, TestCase -import typing +from unittest import mock from primitive_metadata import primitive_rdf as rdf +from trove.derive._base import IndexcardDeriver +from tests.trove._input_output_tests import BasicInputOutputTestCase from ._inputs import DERIVER_TEST_DOCS, DeriverTestDoc SHOULD_SKIP = object() # for deriver inputs that should be skipped -class BaseIndexcardDeriverTest(TestCase): - maxDiff = None +class BaseIndexcardDeriverTest(BasicInputOutputTestCase): + inputs = DERIVER_TEST_DOCS # (leave this one alone) - ####### - # implement these things: + # required on subclasses: `deriver_class` and `expected_outputs` + deriver_class: type[IndexcardDeriver] + # expected_outputs: dict[str, typing.Any] + # ^ (from BasicInputOutputTestCase) must have the same keys as + # `DERIVER_TEST_DOCS` and values that are either `SHOULD_SKIP` + # (when `deriver.should_skip()` should return true) or a value + # that can be compared against `deriver.derive_card_as_text()` - # a subclass of IndexcardDeriver - deriver_class: type + def compute_output(self, given_input): + return self._get_deriver(given_input).derive_card_as_text() - # dictionary with the same keys as `DERIVER_TEST_DOCS` and values that - # are either `SHOULD_SKIP` (above) or strings that will be passed as - # `expected_text` to `derived_texts_equal` - expected_outputs: dict - - # (optional override, for when equality isn't so easy) - def assert_derived_texts_equal(self, expected_text: str, actual_text: str) -> None: - self.assertEqual(expected_text, actual_text) - - ####### - # don't override anything else - - test_should_skip: typing.Callable[['BaseIndexcardDeriverTest'], None] - test_derive_card_as_text: typing.Callable[['BaseIndexcardDeriverTest'], None] - - def __init_subclass__(cls): - # add test methods on subclasses (but not the base class!) - cls.test_should_skip = _test_should_skip - cls.test_derive_card_as_text = _test_derive_card_as_text - - def setUp(self): - _patcher = mock.patch('share.util.IDObfuscator.encode', new=lambda x: x.id) - _patcher.start() - self.addCleanup(_patcher.stop) + def run_input_output_test(self, given_input, expected_output): + if expected_output is SHOULD_SKIP: + self.assertTrue(self._get_deriver(given_input).should_skip()) + else: + super().run_input_output_test(given_input, expected_output) def _get_deriver(self, input_doc: DeriverTestDoc): _mock_suid = mock.Mock() @@ -62,26 +49,3 @@ def _get_deriver(self, input_doc: DeriverTestDoc): _mock_indexcard_rdf.indexcard.id = '--indexcard-id--' _mock_indexcard_rdf.indexcard.source_record_suid = _mock_suid return self.deriver_class(_mock_indexcard_rdf) - - def _iter_test_cases(self): - for _input_key, _input_doc in DERIVER_TEST_DOCS.items(): - _expected_output = self.expected_outputs.get(_input_key) - if _expected_output is None: - raise NotImplementedError(f'{self.__class__.__qualname__}.expected_outputs["{_input_key}"]') - with self.subTest(input_key=_input_key): - yield (_input_key, self._get_deriver(_input_doc), _expected_output) - - -def _test_should_skip(self: BaseIndexcardDeriverTest) -> None: - for _input_key, _deriver, _expected_output in self._iter_test_cases(): - self.assertEqual( - bool(_expected_output is SHOULD_SKIP), - _deriver.should_skip(), - ) - - -def _test_derive_card_as_text(self: BaseIndexcardDeriverTest) -> None: - for _input_key, _deriver, _expected_output in self._iter_test_cases(): - if _expected_output is not SHOULD_SKIP: - _output = _deriver.derive_card_as_text() - self.assert_derived_texts_equal(_expected_output, _output) diff --git a/tests/trove/derive/test_osfmap_json.py b/tests/trove/derive/test_osfmap_json.py index 23061af5f..b408e07e3 100644 --- a/tests/trove/derive/test_osfmap_json.py +++ b/tests/trove/derive/test_osfmap_json.py @@ -7,7 +7,7 @@ class TestOsfmapJsonDeriver(BaseIndexcardDeriverTest): deriver_class = OsfmapJsonDeriver - def assert_derived_texts_equal(self, expected, actual): + def assert_outputs_equal(self, expected, actual): self.assertEqual(expected, json.loads(actual)) expected_outputs = { diff --git a/tests/trove/derive/test_sharev2_elastic.py b/tests/trove/derive/test_sharev2_elastic.py index 1c8aef708..dd0510d14 100644 --- a/tests/trove/derive/test_sharev2_elastic.py +++ b/tests/trove/derive/test_sharev2_elastic.py @@ -1,4 +1,5 @@ import json +from unittest import mock from trove.derive.sharev2_elastic import ShareV2ElasticDeriver @@ -8,7 +9,13 @@ class TestShareV2ElasticDeriver(BaseIndexcardDeriverTest): deriver_class = ShareV2ElasticDeriver - def assert_derived_texts_equal(self, expected, actual): + def setUp(self): + # un-obfuscated ids, please + _patcher = mock.patch('share.util.IDObfuscator.encode', new=lambda x: x.id) + _patcher.start() + self.addCleanup(_patcher.stop) + + def assert_outputs_equal(self, expected, actual): self.assertEqual(expected, json.loads(actual)) expected_outputs = { diff --git a/tests/trove/render/__init__.py b/tests/trove/render/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/trove/render/_base.py b/tests/trove/render/_base.py new file mode 100644 index 000000000..626de4b85 --- /dev/null +++ b/tests/trove/render/_base.py @@ -0,0 +1,82 @@ +import json + +from primitive_metadata import ( + gather, + primitive_rdf as rdf, +) + +from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy +from trove.render._base import BaseRenderer +from trove.render._rendering import ProtoRendering +from trove.vocab.namespaces import RDF +from tests.trove._input_output_tests import BasicInputOutputTestCase +from ._inputs import UNRENDERED_RDF, UNRENDERED_SEARCH_RDF, RdfCase + + +class FakeGatherCache(gather.GatherCache): + def already_gathered(self, *args, **kwargs): + return True # prevent gathering + + +class FakeGathering(gather.Gathering): + def ask_exhaustively(self, *args, **kwargs): + # skip exhaustion for these tests (note: only works for non-streaming) + for _obj in self.ask(*args, **kwargs): + yield (_obj, self.cache.gathered) + + +def _make_fake_gathering(tripledict, renderer_type): + _organizer = trovesearch_by_indexstrategy + return FakeGathering( + norms=_organizer.norms, + organizer=_organizer, + gatherer_kwargs={ + 'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI, + }, + cache=FakeGatherCache(gathered=rdf.RdfGraph(tripledict)) + ) + + +class TroveRendererTests(BasicInputOutputTestCase): + inputs = UNRENDERED_RDF + + # required on subclasses: `renderer_class` and `expected_outputs` + renderer_class: type[BaseRenderer] + # expected_outputs: dict[str, typing.Any] (from BasicInputOutputTestCase) + + def compute_output(self, given_input: RdfCase): + _renderer = self.renderer_class( + response_focus=gather.Focus.new( + given_input.focus, + given_input.tripledict.get(given_input.focus, {}).get(RDF.type), + ), + response_gathering=_make_fake_gathering(given_input.tripledict, self.renderer_class), + ) + return _renderer.render_document() + + def assert_outputs_equal(self, expected_output, actual_output) -> None: + if expected_output is None: + print(repr(actual_output)) + raise NotImplementedError + self.assertEqual(expected_output.mediatype, actual_output.mediatype) + self.assertEqual( + self._get_rendered_output(expected_output), + self._get_rendered_output(actual_output), + ) + + def _get_rendered_output(self, rendering: ProtoRendering): + # for now, they always iter strings (update if/when bytes are in play) + return ''.join(rendering.iter_content()) # type: ignore[arg-type] + + +class TrovesearchRendererTests(TroveRendererTests): + inputs = UNRENDERED_SEARCH_RDF + + +class TroveJsonRendererTests(TroveRendererTests): + def _get_rendered_output(self, rendering: ProtoRendering): + return json.loads(super()._get_rendered_output(rendering)) + + +class TrovesearchJsonRendererTests(TroveJsonRendererTests, TrovesearchRendererTests): + pass diff --git a/tests/trove/render/_inputs.py b/tests/trove/render/_inputs.py new file mode 100644 index 000000000..0a97a22c6 --- /dev/null +++ b/tests/trove/render/_inputs.py @@ -0,0 +1,114 @@ +import dataclasses +import datetime +import json + +import primitive_metadata.primitive_rdf as rdf + +from trove.vocab.namespaces import ( + DCAT, + DCTERMS, + FOAF, + RDF, + TROVE, +) + +BLARG = rdf.IriNamespace('http://blarg.example/vocab/') + + +@dataclasses.dataclass +class RdfCase: + focus: str + tripledict: rdf.RdfTripleDictionary + + +UNRENDERED_RDF = { + 'simple_card': RdfCase(BLARG.aCard, { + BLARG.aCard: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + FOAF.primaryTopic: {BLARG.anItem}, + TROVE.focusIdentifier: {rdf.literal(BLARG.anItem)}, + DCTERMS.issued: {rdf.literal(datetime.date(2024, 1, 1))}, + DCTERMS.modified: {rdf.literal(datetime.date(2024, 1, 1))}, + TROVE.resourceMetadata: {rdf.literal( + json.dumps({'@id': BLARG.anItem, 'title': 'an item, yes'}), + datatype_iris=RDF.JSON, + )}, + }, + }), + 'various_types': RdfCase(BLARG.aSubject, { + BLARG.aSubject: { + RDF.type: {BLARG.aType}, + BLARG.hasIri: {BLARG.anIri}, + BLARG.hasRdfStringLiteral: {rdf.literal('an rdf:string literal')}, + BLARG.hasRdfLangStringLiteral: {rdf.literal('a rdf:langString literal', language='en')}, + BLARG.hasIntegerLiteral: {rdf.literal(17)}, + BLARG.hasDateLiteral: {rdf.literal(datetime.date(2024, 1, 1))}, + BLARG.hasStrangeLiteral: {rdf.literal('a literal of strange datatype', datatype_iris=BLARG.aStrangeDatatype)}, + }, + }), +} + + +UNRENDERED_SEARCH_RDF = { + 'no_results': RdfCase(BLARG.aSearch, { + BLARG.aSearch: { + RDF.type: {TROVE.Cardsearch}, + TROVE.totalResultCount: {rdf.literal(0)}, + }, + }), + 'few_results': RdfCase(BLARG.aSearchFew, { + BLARG.aSearchFew: { + RDF.type: {TROVE.Cardsearch}, + TROVE.totalResultCount: {rdf.literal(3)}, + TROVE.searchResultPage: { + rdf.sequence(( + rdf.blanknode({ + RDF.type: {TROVE.SearchResult}, + TROVE.indexCard: {BLARG.aCard}, + }), + rdf.blanknode({ + RDF.type: {TROVE.SearchResult}, + TROVE.indexCard: {BLARG.aCardd}, + }), + rdf.blanknode({ + RDF.type: {TROVE.SearchResult}, + TROVE.indexCard: {BLARG.aCarddd}, + }), + )), + }, + }, + BLARG.aCard: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + FOAF.primaryTopic: {BLARG.anItem}, + TROVE.focusIdentifier: {rdf.literal(BLARG.anItem)}, + DCTERMS.issued: {rdf.literal(datetime.date(2024, 1, 1))}, + DCTERMS.modified: {rdf.literal(datetime.date(2024, 1, 1))}, + TROVE.resourceMetadata: {rdf.literal( + json.dumps({'@id': BLARG.anItem, 'title': 'an item, yes'}), + datatype_iris=RDF.JSON, + )}, + }, + BLARG.aCardd: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + FOAF.primaryTopic: {BLARG.anItemm}, + TROVE.focusIdentifier: {rdf.literal(BLARG.anItemm)}, + DCTERMS.issued: {rdf.literal(datetime.date(2024, 2, 2))}, + DCTERMS.modified: {rdf.literal(datetime.date(2024, 2, 2))}, + TROVE.resourceMetadata: {rdf.literal( + json.dumps({'@id': BLARG.anItemm, 'title': 'an itemm, yes'}), + datatype_iris=RDF.JSON, + )}, + }, + BLARG.aCarddd: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + FOAF.primaryTopic: {BLARG.anItemmm}, + TROVE.focusIdentifier: {rdf.literal(BLARG.anItemmm)}, + DCTERMS.issued: {rdf.literal(datetime.date(2024, 3, 3))}, + DCTERMS.modified: {rdf.literal(datetime.date(2024, 3, 3))}, + TROVE.resourceMetadata: {rdf.literal( + json.dumps({'@id': BLARG.anItemmm, 'title': 'an itemmm, yes'}), + datatype_iris=RDF.JSON, + )}, + }, + }), +} diff --git a/tests/trove/render/test_jsonapi_renderer.py b/tests/trove/render/test_jsonapi_renderer.py new file mode 100644 index 000000000..414d02796 --- /dev/null +++ b/tests/trove/render/test_jsonapi_renderer.py @@ -0,0 +1,249 @@ +import json + +from trove.render.jsonapi import RdfJsonapiRenderer +from trove.render._rendering import SimpleRendering +from ._inputs import BLARG +from . import _base + + +def _jsonapi_item_sortkey(jsonapi_item: dict): + return (jsonapi_item.get('type'), jsonapi_item.get('id')) + + +class _BaseJsonapiRendererTest(_base.TroveJsonRendererTests): + renderer_class = RdfJsonapiRenderer + + def _get_rendered_output(self, rendering): + _json = super()._get_rendered_output(rendering) + _included = _json.get('included') + if _included: + # order of includes does not matter + _included.sort(key=_jsonapi_item_sortkey) + return _json + + +class TestJsonapiRenderer(_BaseJsonapiRendererTest): + expected_outputs = { + 'simple_card': SimpleRendering( + mediatype='application/vnd.api+json', + rendered_content=json.dumps({ + "data": { + "id": "68808d2c76cd5f7ff4e0f470592da8f02be1f615b05a143cc3821c5288e13f11", + "type": "index-card", + "attributes": { + "resourceIdentifier": [ + BLARG.anItem + ], + "resourceMetadata": { + "@id": BLARG.anItem, + "title": "an item, yes" + } + }, + "links": { + "self": BLARG.aCard + }, + "meta": { + "foaf:primaryTopic": [ + BLARG.anItem + ], + "dcterms:issued": [ + "2024-01-01" + ], + "dcterms:modified": [ + "2024-01-01" + ] + }, + } + }), + ), + 'various_types': SimpleRendering( + mediatype='application/vnd.api+json', + rendered_content=json.dumps({ + "data": { + "id": "11f60e4d2fceb50ca695c3c77dcd7983ff78116ff2e7a2f315800c8ca645f469", + "type": BLARG.aType, + "meta": { + BLARG.hasIri: [BLARG.anIri], + BLARG.hasRdfStringLiteral: ["an rdf:string literal"], + BLARG.hasRdfLangStringLiteral: ['a rdf:langString literal'], + BLARG.hasIntegerLiteral: [17], + BLARG.hasDateLiteral: ["2024-01-01"], + BLARG.hasStrangeLiteral: ['a literal of strange datatype'], + }, + "links": {"self": BLARG.aSubject}, + } + }), + ), + } + + +class TestJsonapiSearchRenderer(_BaseJsonapiRendererTest, _base.TrovesearchJsonRendererTests): + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='application/vnd.api+json', + rendered_content=json.dumps({ + "data": { + "id": "4b79207d8ecd4817c36b75b16cee6c4a1874774cfbcfbd0caede339148403325", + "type": "index-card-search", + "attributes": { + "totalResultCount": 0, + }, + "links": { + "self": BLARG.aSearch, + } + } + }), + ), + 'few_results': SimpleRendering( + mediatype='application/vnd.api+json', + rendered_content=json.dumps({ + "data": { + "id": "79183793c0eea20ca6338d71c936deee113b94641ee77346fb66f9c4bcebfe0a", + "type": "index-card-search", + "attributes": { + "totalResultCount": 3 + }, + "relationships": { + "searchResultPage": { + "data": [ + { + "id": "dc0604c7e9c07576b57646119784de65e7204fc7c860cc1b9be8ebec5f2b96ba", + "type": "search-result" + }, + { + "id": "367b30e8a0eece555ac15fda82bb28f535f1f8beb97397c01162d619cd7058bc", + "type": "search-result" + }, + { + "id": "26afa96fdbd189e4c4aeac921a42e9d3f09eb94b59ffd4b9ad300c524536cc97", + "type": "search-result" + } + ] + } + }, + "links": { + "self": BLARG.aSearchFew + } + }, + "included": [ + { + "id": "dc0604c7e9c07576b57646119784de65e7204fc7c860cc1b9be8ebec5f2b96ba", + "type": "search-result", + "relationships": { + "indexCard": { + "data": { + "id": "68808d2c76cd5f7ff4e0f470592da8f02be1f615b05a143cc3821c5288e13f11", + "type": "index-card" + } + } + } + }, + { + "id": "26afa96fdbd189e4c4aeac921a42e9d3f09eb94b59ffd4b9ad300c524536cc97", + "type": "search-result", + "relationships": { + "indexCard": { + "data": { + "id": "db657130943f3c9f4cc527b23a6a246b095f62673f2cc7fc906d5914678bd337", + "type": "index-card" + } + } + } + }, + { + "id": "367b30e8a0eece555ac15fda82bb28f535f1f8beb97397c01162d619cd7058bc", + "type": "search-result", + "relationships": { + "indexCard": { + "data": { + "id": "4e6134629cc3117a123cee8a8dc633a46401c9725f01d63f689d7b84f2422359", + "type": "index-card" + } + } + } + }, + { + "id": "68808d2c76cd5f7ff4e0f470592da8f02be1f615b05a143cc3821c5288e13f11", + "type": "index-card", + "meta": { + "foaf:primaryTopic": [ + BLARG.anItem + ], + "dcterms:issued": [ + "2024-01-01" + ], + "dcterms:modified": [ + "2024-01-01" + ] + }, + "attributes": { + "resourceIdentifier": [ + BLARG.anItem + ], + "resourceMetadata": { + "@id": BLARG.anItem, + "title": "an item, yes" + } + }, + "links": { + "self": BLARG.aCard + } + }, + { + "id": "db657130943f3c9f4cc527b23a6a246b095f62673f2cc7fc906d5914678bd337", + "type": "index-card", + "meta": { + "foaf:primaryTopic": [ + BLARG.anItemmm + ], + "dcterms:issued": [ + "2024-03-03" + ], + "dcterms:modified": [ + "2024-03-03" + ] + }, + "attributes": { + "resourceIdentifier": [ + BLARG.anItemmm + ], + "resourceMetadata": { + "@id": BLARG.anItemmm, + "title": "an itemmm, yes" + } + }, + "links": { + "self": BLARG.aCarddd + } + }, + { + "id": "4e6134629cc3117a123cee8a8dc633a46401c9725f01d63f689d7b84f2422359", + "type": "index-card", + "meta": { + "foaf:primaryTopic": [ + BLARG.anItemm + ], + "dcterms:issued": [ + "2024-02-02" + ], + "dcterms:modified": [ + "2024-02-02" + ] + }, + "attributes": { + "resourceIdentifier": [ + BLARG.anItemm + ], + "resourceMetadata": { + "@id": BLARG.anItemm, + "title": "an itemm, yes" + } + }, + "links": { + "self": BLARG.aCardd + } + } + ], + }), + ), + } diff --git a/tests/trove/render/test_jsonld_renderer.py b/tests/trove/render/test_jsonld_renderer.py new file mode 100644 index 000000000..8741d8aba --- /dev/null +++ b/tests/trove/render/test_jsonld_renderer.py @@ -0,0 +1,233 @@ +import json + +from trove.render.jsonld import RdfJsonldRenderer +from trove.render._rendering import SimpleRendering +from ._inputs import BLARG +from . import _base + + +class TestJsonldRenderer(_base.TroveJsonRendererTests): + renderer_class = RdfJsonldRenderer + + expected_outputs = { + 'simple_card': SimpleRendering( + mediatype='application/ld+json', + rendered_content=json.dumps({ + "@id": BLARG.aCard, + "dcterms:issued": [ + { + "@type": "xsd:date", + "@value": "2024-01-01" + } + ], + "dcterms:modified": [ + { + "@type": "xsd:date", + "@value": "2024-01-01" + } + ], + "foaf:primaryTopic": [ + BLARG.anItem + ], + "rdf:type": [ + "trove:Indexcard", + "dcat:CatalogRecord" + ], + "trove:focusIdentifier": [ + { + "@value": BLARG.anItem + } + ], + "trove:resourceMetadata": { + "@id": BLARG.anItem, + "title": "an item, yes" + } + }), + ), + 'various_types': SimpleRendering( + mediatype='application/ld+json', + rendered_content=json.dumps({ + "@id": BLARG.aSubject, + BLARG.hasDateLiteral: [ + { + "@type": "xsd:date", + "@value": "2024-01-01" + } + ], + BLARG.hasIntegerLiteral: [ + { + "@type": "xsd:integer", + "@value": "17" + } + ], + BLARG.hasIri: [ + BLARG.anIri + ], + BLARG.hasRdfLangStringLiteral: [ + { + "@language": "en", + "@value": "a rdf:langString literal" + } + ], + BLARG.hasRdfStringLiteral: [ + { + "@value": "an rdf:string literal" + } + ], + BLARG.hasStrangeLiteral: [ + { + "@type": BLARG.aStrangeDatatype, + "@value": "a literal of strange datatype" + } + ], + "rdf:type": [BLARG.aType], + }), + ), + } + + +class TestJsonldSearchRenderer(_base.TrovesearchJsonRendererTests): + renderer_class = RdfJsonldRenderer + + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='application/ld+json', + rendered_content=json.dumps({ + "@id": BLARG.aSearch, + "rdf:type": [ + "trove:Cardsearch" + ], + "trove:totalResultCount": { + "@type": "xsd:integer", + "@value": "0" + } + }), + ), + 'few_results': SimpleRendering( + mediatype='application/ld+json', + rendered_content=json.dumps({ + "@id": BLARG.aSearchFew, + "rdf:type": [ + "trove:Cardsearch" + ], + "trove:totalResultCount": { + "@type": "xsd:integer", + "@value": "3" + }, + "trove:searchResultPage": [ + { + "@list": [ + { + "rdf:type": [ + "trove:SearchResult" + ], + "trove:indexCard": { + "@id": BLARG.aCard, + "dcterms:issued": [ + { + "@type": "xsd:date", + "@value": "2024-01-01" + } + ], + "dcterms:modified": [ + { + "@type": "xsd:date", + "@value": "2024-01-01" + } + ], + "foaf:primaryTopic": [ + BLARG.anItem + ], + "rdf:type": [ + "trove:Indexcard", + "dcat:CatalogRecord" + ], + "trove:focusIdentifier": [ + { + "@value": BLARG.anItem + } + ], + "trove:resourceMetadata": { + "@id": BLARG.anItem, + "title": "an item, yes" + } + } + }, + { + "rdf:type": [ + "trove:SearchResult" + ], + "trove:indexCard": { + "@id": BLARG.aCardd, + "dcterms:issued": [ + { + "@type": "xsd:date", + "@value": "2024-02-02" + } + ], + "dcterms:modified": [ + { + "@type": "xsd:date", + "@value": "2024-02-02" + } + ], + "foaf:primaryTopic": [ + BLARG.anItemm + ], + "rdf:type": [ + "trove:Indexcard", + "dcat:CatalogRecord" + ], + "trove:focusIdentifier": [ + { + "@value": BLARG.anItemm + } + ], + "trove:resourceMetadata": { + "@id": BLARG.anItemm, + "title": "an itemm, yes" + } + } + }, + { + "rdf:type": [ + "trove:SearchResult" + ], + "trove:indexCard": { + "@id": BLARG.aCarddd, + "dcterms:issued": [ + { + "@type": "xsd:date", + "@value": "2024-03-03" + } + ], + "dcterms:modified": [ + { + "@type": "xsd:date", + "@value": "2024-03-03" + } + ], + "foaf:primaryTopic": [ + BLARG.anItemmm + ], + "rdf:type": [ + "trove:Indexcard", + "dcat:CatalogRecord" + ], + "trove:focusIdentifier": [ + { + "@value": BLARG.anItemmm + } + ], + "trove:resourceMetadata": { + "@id": BLARG.anItemmm, + "title": "an itemmm, yes" + } + } + } + ] + } + ], + }), + ), + } diff --git a/tests/trove/render/test_simple_csv_renderer.py b/tests/trove/render/test_simple_csv_renderer.py new file mode 100644 index 000000000..00f3291c8 --- /dev/null +++ b/tests/trove/render/test_simple_csv_renderer.py @@ -0,0 +1,24 @@ +from trove.render.simple_csv import TrovesearchSimpleCsvRenderer +from trove.render._rendering import SimpleRendering +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestSimpleCsvRenderer(_base.TrovesearchRendererTests): + renderer_class = TrovesearchSimpleCsvRenderer + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='text/csv', + rendered_content='@id\r\n', + ), + 'few_results': SimpleRendering( + mediatype='text/csv', + rendered_content=''.join(( + '@id,title\r\n', + 'http://blarg.example/vocab/anItem,"an item, yes"\r\n', + 'http://blarg.example/vocab/anItemm,"an itemm, yes"\r\n', + 'http://blarg.example/vocab/anItemmm,"an itemmm, yes"\r\n', + )), + ), + } diff --git a/tests/trove/render/test_simple_json_renderer.py b/tests/trove/render/test_simple_json_renderer.py new file mode 100644 index 000000000..2d85d5b9e --- /dev/null +++ b/tests/trove/render/test_simple_json_renderer.py @@ -0,0 +1,61 @@ +import json + +from trove.render.simple_json import TrovesearchSimpleJsonRenderer +from trove.render._rendering import SimpleRendering +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestSimpleJsonRenderer(_base.TrovesearchJsonRendererTests): + renderer_class = TrovesearchSimpleJsonRenderer + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='application/json', + rendered_content=json.dumps({ + "data": [], + "links": {}, + "meta": { + "total": 0 + } + }), + ), + 'few_results': SimpleRendering( + mediatype='application/json', + rendered_content=json.dumps({ + "data": [ + { + "@id": "http://blarg.example/vocab/anItem", + "title": "an item, yes", + "foaf:primaryTopicOf": [ + { + "@id": "http://blarg.example/vocab/aCard" + } + ] + }, + { + "@id": "http://blarg.example/vocab/anItemm", + "title": "an itemm, yes", + "foaf:primaryTopicOf": [ + { + "@id": "http://blarg.example/vocab/aCardd" + } + ] + }, + { + "@id": "http://blarg.example/vocab/anItemmm", + "title": "an itemmm, yes", + "foaf:primaryTopicOf": [ + { + "@id": "http://blarg.example/vocab/aCarddd" + } + ] + } + ], + "links": {}, + "meta": { + "total": 3 + } + }), + ), + } diff --git a/tests/trove/render/test_simple_tsv_renderer.py b/tests/trove/render/test_simple_tsv_renderer.py new file mode 100644 index 000000000..7ee25e15d --- /dev/null +++ b/tests/trove/render/test_simple_tsv_renderer.py @@ -0,0 +1,24 @@ +from trove.render.simple_tsv import TrovesearchSimpleTsvRenderer +from trove.render._rendering import SimpleRendering +from . import _base + + +# note: trovesearch only -- this renderer doesn't do arbitrary rdf + +class TestSimpleTsvRenderer(_base.TrovesearchRendererTests): + renderer_class = TrovesearchSimpleTsvRenderer + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='text/tab-separated-values', + rendered_content='@id\r\n', + ), + 'few_results': SimpleRendering( + mediatype='text/tab-separated-values', + rendered_content=''.join(( + '@id\ttitle\r\n', + 'http://blarg.example/vocab/anItem\tan item, yes\r\n', + 'http://blarg.example/vocab/anItemm\tan itemm, yes\r\n', + 'http://blarg.example/vocab/anItemmm\tan itemmm, yes\r\n', + )), + ), + } diff --git a/tests/trove/render/test_turtle_renderer.py b/tests/trove/render/test_turtle_renderer.py new file mode 100644 index 000000000..c4e44f31e --- /dev/null +++ b/tests/trove/render/test_turtle_renderer.py @@ -0,0 +1,116 @@ +from primitive_metadata import primitive_rdf as rdf + +from trove.render.turtle import RdfTurtleRenderer +from trove.render._rendering import SimpleRendering +from . import _base + + +class _BaseTurtleRendererTest(_base.TroveRendererTests): + renderer_class = RdfTurtleRenderer + + def _get_rendered_output(self, rendering): + return rdf.tripledict_from_turtle(super()._get_rendered_output(rendering)) + + +class TestTurtleRenderer(_BaseTurtleRendererTest): + expected_outputs = { + 'simple_card': SimpleRendering( + mediatype='text/turtle', + rendered_content=''' +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix trove: . +@prefix rdf: . +@prefix xsd: . + + a dcat:CatalogRecord, trove:Indexcard ; + dcterms:issued "2024-01-01"^^xsd:date ; + dcterms:modified "2024-01-01"^^xsd:date ; + foaf:primaryTopic ; + trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . +''', + ), + 'various_types': SimpleRendering( + mediatype='text/turtle', + rendered_content=''' +@prefix blarg: . +@prefix rdf: . +@prefix xsd: . + +blarg:aSubject a blarg:aType ; + blarg:hasDateLiteral "2024-01-01"^^xsd:date ; + blarg:hasIntegerLiteral 17 ; + blarg:hasIri blarg:anIri ; + blarg:hasRdfLangStringLiteral "a rdf:langString literal"@en ; + blarg:hasRdfStringLiteral "an rdf:string literal"^^rdf:string ; + blarg:hasStrangeLiteral "a literal of strange datatype"^^blarg:aStrangeDatatype . +''', + ), + } + + +class TestTurtleTrovesearchRenderer(_BaseTurtleRendererTest, _base.TrovesearchRendererTests): + expected_outputs = { + 'no_results': SimpleRendering( + mediatype='text/turtle', + rendered_content=''' +@prefix trove: . +@prefix xsd: . + + a trove:Cardsearch ; + trove:totalResultCount 0 . +''', + ), + 'few_results': SimpleRendering( + mediatype='text/turtle', + rendered_content=''' +@prefix dcat: . +@prefix dcterms: . +@prefix foaf: . +@prefix trove: . +@prefix rdf: . +@prefix xsd: . + + a trove:Cardsearch ; + trove:searchResultPage [ + a rdf:Seq ; + rdf:_1 [ + a trove:SearchResult ; + trove:indexCard + ] ; + rdf:_2 [ + a trove:SearchResult ; + trove:indexCard + ] ; + rdf:_3 [ + a trove:SearchResult ; + trove:indexCard + ] + ] ; + trove:totalResultCount 3 . + + a dcat:CatalogRecord, trove:Indexcard ; + dcterms:issued "2024-01-01"^^xsd:date ; + dcterms:modified "2024-01-01"^^xsd:date ; + foaf:primaryTopic ; + trove:focusIdentifier "http://blarg.example/vocab/anItem"^^rdf:string ; + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItem\\", \\"title\\": \\"an item, yes\\"}"^^rdf:JSON . + + a dcat:CatalogRecord, trove:Indexcard ; + dcterms:issued "2024-02-02"^^xsd:date ; + dcterms:modified "2024-02-02"^^xsd:date ; + foaf:primaryTopic ; + trove:focusIdentifier "http://blarg.example/vocab/anItemm"^^rdf:string ; + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemm\\", \\"title\\": \\"an itemm, yes\\"}"^^rdf:JSON . + + a dcat:CatalogRecord, trove:Indexcard ; + dcterms:issued "2024-03-03"^^xsd:date ; + dcterms:modified "2024-03-03"^^xsd:date ; + foaf:primaryTopic ; + trove:focusIdentifier "http://blarg.example/vocab/anItemmm"^^rdf:string ; + trove:resourceMetadata "{\\"@id\\": \\"http://blarg.example/vocab/anItemmm\\", \\"title\\": \\"an itemmm, yes\\"}"^^rdf:JSON . +''', + ), + } diff --git a/trove/exceptions.py b/trove/exceptions.py index 7935c0511..4a9ddd286 100644 --- a/trove/exceptions.py +++ b/trove/exceptions.py @@ -93,6 +93,10 @@ class CannotRenderMediatype(ResponseRenderingError): http_status = http.HTTPStatus.NOT_ACCEPTABLE +class CannotRenderStreamTwice(ResponseRenderingError): + pass + + ### # primitive rdf diff --git a/trove/extract/legacy_sharev2.py b/trove/extract/legacy_sharev2.py index 6add0221a..896c62251 100644 --- a/trove/extract/legacy_sharev2.py +++ b/trove/extract/legacy_sharev2.py @@ -54,7 +54,10 @@ def extract_rdf_from_sharev2graph(self, sharev2graph): primitive_rdf.literal('sharev2-normd'), ), norms=OSFMAP_NORMS, - gatherer_kwargnames={'mnode', 'source_config'}, + gatherer_params={ + 'mnode': SHAREv2.MutableNode, + 'source_config': SHAREv2.SourceConfig, + }, ) diff --git a/trove/render/__init__.py b/trove/render/__init__.py index 637d948b1..351ac791f 100644 --- a/trove/render/__init__.py +++ b/trove/render/__init__.py @@ -1,51 +1,49 @@ from django import http from trove import exceptions as trove_exceptions -from trove.vocab.trove import TROVE_API_THESAURUS -from trove.vocab.namespaces import NAMESPACES_SHORTHAND from ._base import BaseRenderer from .jsonapi import RdfJsonapiRenderer from .html_browse import RdfHtmlBrowseRenderer from .turtle import RdfTurtleRenderer from .jsonld import RdfJsonldRenderer +from .simple_csv import TrovesearchSimpleCsvRenderer from .simple_json import TrovesearchSimpleJsonRenderer +from .simple_tsv import TrovesearchSimpleTsvRenderer -__all__ = ('get_renderer',) +__all__ = ('get_renderer_type',) RENDERERS: tuple[type[BaseRenderer], ...] = ( RdfHtmlBrowseRenderer, RdfJsonapiRenderer, RdfTurtleRenderer, RdfJsonldRenderer, + TrovesearchSimpleCsvRenderer, TrovesearchSimpleJsonRenderer, + TrovesearchSimpleTsvRenderer, ) RENDERER_BY_MEDIATYPE = { - _renderer_cls.MEDIATYPE: _renderer_cls - for _renderer_cls in RENDERERS + _renderer_type.MEDIATYPE: _renderer_type + for _renderer_type in RENDERERS } -DEFAULT_RENDERER = RdfJsonapiRenderer # the most stable one +DEFAULT_RENDERER_TYPE = RdfJsonapiRenderer # the most stable one -def get_renderer(request: http.HttpRequest): +def get_renderer_type(request: http.HttpRequest) -> type[BaseRenderer]: # TODO: recognize .extension? - _chosen_renderer_cls = None + _chosen_renderer_type = None _requested_mediatype = request.GET.get('acceptMediatype') if _requested_mediatype: try: - _chosen_renderer_cls = RENDERER_BY_MEDIATYPE[_requested_mediatype] + _chosen_renderer_type = RENDERER_BY_MEDIATYPE[_requested_mediatype] except KeyError: raise trove_exceptions.CannotRenderMediatype(_requested_mediatype) else: - for _mediatype, _renderer_cls in RENDERER_BY_MEDIATYPE.items(): + for _mediatype, _renderer_type in RENDERER_BY_MEDIATYPE.items(): if request.accepts(_mediatype): - _chosen_renderer_cls = _renderer_cls + _chosen_renderer_type = _renderer_type break - if _chosen_renderer_cls is None: - _chosen_renderer_cls = DEFAULT_RENDERER - return _chosen_renderer_cls( - iri_shorthand=NAMESPACES_SHORTHAND, - thesaurus=TROVE_API_THESAURUS, - request=request, - ) + if _chosen_renderer_type is None: + _chosen_renderer_type = DEFAULT_RENDERER_TYPE + return _chosen_renderer_type diff --git a/trove/render/_base.py b/trove/render/_base.py index 2110c511b..996ff6744 100644 --- a/trove/render/_base.py +++ b/trove/render/_base.py @@ -1,63 +1,78 @@ import abc +import dataclasses +import functools import json -from typing import Optional, ClassVar +from typing import ClassVar -from django import http -from primitive_metadata import primitive_rdf as rdf +from primitive_metadata import ( + gather, + primitive_rdf as rdf, +) from trove import exceptions as trove_exceptions from trove.vocab import mediatypes +from trove.vocab.namespaces import NAMESPACES_SHORTHAND +from trove.vocab.trove import TROVE_API_THESAURUS +from ._rendering import ProtoRendering, SimpleRendering +@dataclasses.dataclass class BaseRenderer(abc.ABC): + """for creating a serialized rendering of an api response modeled as rdf""" + # required in subclasses MEDIATYPE: ClassVar[str] - # should be set when render_error_document is overridden: - ERROR_MEDIATYPE: ClassVar[str] = mediatypes.JSONAPI # should be set when the renderer expects a specific derived metadata format INDEXCARD_DERIVER_IRI: ClassVar[str | None] = None + # when True, the renderer renders only what's already been gathered + # (set False if the renderer knows what to request) + PASSIVE_RENDER: ClassVar[bool] = True - def __init__( - self, *, - iri_shorthand: rdf.IriShorthand, - thesaurus: rdf.RdfTripleDictionary, - request: Optional[http.HttpRequest] = None, - ): - self.iri_shorthand = iri_shorthand - self.thesaurus = rdf.RdfGraph(thesaurus) - self.request = request - - def render_response( - self, - response_data: rdf.RdfTripleDictionary, - response_focus_iri: str, - **response_kwargs, - ): - return http.HttpResponse( - content=self.render_document(rdf.RdfGraph(response_data), response_focus_iri), - content_type=self.MEDIATYPE, - **response_kwargs, - ) + # instance fields + response_focus: gather.Focus + response_gathering: gather.Gathering + iri_shorthand: rdf.IriShorthand = NAMESPACES_SHORTHAND + thesaurus_tripledict: rdf.RdfTripleDictionary = dataclasses.field(default_factory=lambda: TROVE_API_THESAURUS) - def render_error_response(self, error: trove_exceptions.TroveError): - return http.HttpResponse( - content=self.render_error_document(error), - content_type=self.ERROR_MEDIATYPE, - status=error.http_status, - ) + @functools.cached_property + def thesaurus(self): + return rdf.RdfGraph(self.thesaurus_tripledict) - @abc.abstractmethod - def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: + @functools.cached_property + def response_data(self): + return rdf.RdfGraph(self.response_tripledict) + + @functools.cached_property + def response_tripledict(self) -> rdf.RdfTripleDictionary: + # TODO: self.response_gathering.ask_all_about or a default ask... + return self.response_gathering.leaf_a_record() + + def simple_render_document(self) -> str: raise NotImplementedError - def render_error_document(self, error: trove_exceptions.TroveError) -> str: + def render_document(self) -> ProtoRendering: + try: + _content = self.simple_render_document() + except NotImplementedError: + raise NotImplementedError(f'class "{type(self)}" must implement either `render_document` or `simple_render_document`') + else: + return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 + mediatype=self.MEDIATYPE, + rendered_content=_content, + ) + + @classmethod + def render_error_document(cls, error: trove_exceptions.TroveError) -> ProtoRendering: # may override, but default to jsonapi - return json.dumps( - {'errors': [{ # https://jsonapi.org/format/#error-objects - 'status': error.http_status, - 'code': error.error_location, - 'title': error.__class__.__name__, - 'detail': str(error), - }]}, - indent=2, + return SimpleRendering( # type: ignore[return-value] # until ProtoRendering(typing.Protocol) with py3.12 + mediatype=mediatypes.JSONAPI, + rendered_content=json.dumps( + {'errors': [{ # https://jsonapi.org/format/#error-objects + 'status': error.http_status, + 'code': error.error_location, + 'title': error.__class__.__name__, + 'detail': str(error), + }]}, + indent=2, + ), ) diff --git a/trove/render/_rendering.py b/trove/render/_rendering.py new file mode 100644 index 000000000..52e5f9e2c --- /dev/null +++ b/trove/render/_rendering.py @@ -0,0 +1,47 @@ +import abc +import dataclasses +from typing import Iterator + +from trove import exceptions as trove_exceptions + + +class ProtoRendering(abc.ABC): + '''base class for all renderings + + (TODO: typing.Protocol (when py3.12+)) + ''' + + @property + @abc.abstractmethod + def mediatype(self) -> str: + '''`mediatype`: required readable attribute + ''' + raise NotImplementedError + + @abc.abstractmethod + def iter_content(self) -> Iterator[str | bytes | memoryview]: + '''`iter_content`: (only) required method + ''' + yield from () + + +@dataclasses.dataclass +class SimpleRendering: # implements ProtoRendering + mediatype: str + rendered_content: str = '' + + def iter_content(self): + yield self.rendered_content + + +@dataclasses.dataclass +class StreamableRendering: # implements ProtoRendering + mediatype: str + content_stream: Iterator[str | bytes | memoryview] + _started_already: bool = False + + def iter_content(self): + if self._started_already: + raise trove_exceptions.CannotRenderStreamTwice + self._started_already = True + yield from self.content_stream diff --git a/trove/render/_simple_trovesearch.py b/trove/render/_simple_trovesearch.py new file mode 100644 index 000000000..f49cbfe50 --- /dev/null +++ b/trove/render/_simple_trovesearch.py @@ -0,0 +1,109 @@ +import json +from typing import Iterator, Any + +from primitive_metadata import primitive_rdf as rdf + +from trove import exceptions as trove_exceptions +from trove.vocab.jsonapi import JSONAPI_LINK_OBJECT +from trove.vocab.namespaces import TROVE, RDF +from ._base import BaseRenderer +from ._rendering import ProtoRendering, SimpleRendering + + +class SimpleTrovesearchRenderer(BaseRenderer): + '''for "simple" search api responses (including only result metadata) + + (very entangled with trove/trovesearch/trovesearch_gathering.py) + ''' + PASSIVE_RENDER = False # knows the properties it cares about + _page_links: set + __already_iterated_cards = False + + def simple_unicard_rendering(self, card_iri: str, osfmap_json: dict) -> str: + raise NotImplementedError + + def simple_multicard_rendering(self, cards: Iterator[tuple[str, dict]]) -> str: + raise NotImplementedError + + def unicard_rendering(self, card_iri: str, osfmap_json: dict) -> ProtoRendering: + return SimpleRendering( # type: ignore[return-value] + mediatype=self.MEDIATYPE, + rendered_content=self.simple_unicard_rendering(card_iri, osfmap_json), + ) + + def multicard_rendering(self, card_pages: Iterator[dict[str, dict]]) -> ProtoRendering: + _cards = ( + (_card_iri, _card_contents) + for _page in card_pages + for _card_iri, _card_contents in _page.items() + ) + return SimpleRendering( # type: ignore[return-value] + mediatype=self.MEDIATYPE, + rendered_content=self.simple_multicard_rendering(_cards), + ) + + def render_document(self) -> ProtoRendering: + _focustypes = set(self.response_gathering.ask(RDF.type, focus=self.response_focus)) + if (TROVE.Cardsearch in _focustypes) or (TROVE.Valuesearch in _focustypes): + return self.multicard_rendering(self._iter_card_pages()) + if TROVE.Indexcard in _focustypes: + return self.unicard_rendering( + self.response_focus.iri, + self._get_card_content(self.response_focus.iri), + ) + raise trove_exceptions.UnsupportedRdfType(_focustypes) + + def _iter_card_pages(self) -> Iterator[dict[str, Any]]: + assert not self.__already_iterated_cards + self.__already_iterated_cards = True + self._page_links = set() + for _page, _page_graph in self.response_gathering.ask_exhaustively( + TROVE.searchResultPage, focus=self.response_focus + ): + if (RDF.type, JSONAPI_LINK_OBJECT) in _page: + self._page_links.add(_page) + elif rdf.is_container(_page): + _cardpage = [] + for _search_result in rdf.container_objects(_page): + try: + _card = next( + _obj + for _pred, _obj in _search_result + if _pred == TROVE.indexCard + ) + except StopIteration: + pass # skip malformed + else: + _cardpage.append(_card) + yield { + self._get_card_iri(_card): self._get_card_content(_card, _page_graph) + for _card in _cardpage + } + + def _get_card_iri(self, card: str | rdf.RdfBlanknode) -> str: + return card if isinstance(card, str) else '' + + def _get_card_content( + self, + card: str | rdf.RdfBlanknode, + graph: rdf.RdfGraph | None = None, + ) -> dict: + if isinstance(card, str): + _card_content = ( + next(self.response_gathering.ask(TROVE.resourceMetadata, focus=card)) + if graph is None + else next(graph.q(card, TROVE.resourceMetadata)) + ) + elif isinstance(card, frozenset): + _card_content = next( + _obj + for _pred, _obj in card + if _pred == TROVE.resourceMetadata + ) + else: + raise trove_exceptions.ExpectedIriOrBlanknode(card) + if isinstance(_card_content, rdf.QuotedGraph): + return _card_content + if isinstance(_card_content, rdf.Literal) and (RDF.JSON in _card_content.datatype_iris): + return json.loads(_card_content.unicode_value) + raise ValueError(card) diff --git a/trove/render/html_browse.py b/trove/render/html_browse.py index 4e6df7640..c7dceaf0e 100644 --- a/trove/render/html_browse.py +++ b/trove/render/html_browse.py @@ -1,4 +1,5 @@ import contextlib +import dataclasses import datetime import markdown2 import random @@ -11,6 +12,7 @@ ) from django.contrib.staticfiles.storage import staticfiles_storage +from django.http import QueryDict from django.urls import reverse from primitive_metadata import primitive_rdf @@ -22,42 +24,58 @@ from ._base import BaseRenderer STABLE_MEDIATYPES = (mediatypes.JSONAPI,) -UNSTABLE_MEDIATYPES = (mediatypes.TURTLE, mediatypes.JSONLD, mediatypes.JSON,) +UNSTABLE_MEDIATYPES = ( + mediatypes.TURTLE, + mediatypes.JSONLD, + # TODO: below are only for search/index-card views + mediatypes.JSON, + mediatypes.TSV, + mediatypes.CSV, +) +@dataclasses.dataclass class RdfHtmlBrowseRenderer(BaseRenderer): MEDIATYPE = 'text/html; charset=utf-8' - def __init__(self, **kwargs): - super().__init__(**kwargs) - self.rendered_element = None - self.__current_element = None - self.__visiting_iris = None - self.__heading_depth = None - - def render_document(self, data: primitive_rdf.RdfGraph, focus_iri: str) -> str: - self.data = data - with self.__rendering(): - with self.__nest('head'): - self.__leaf('link', attrs={ - 'rel': 'stylesheet', - 'href': staticfiles_storage.url('css/browse.css'), - }) - _body_attrs = { - 'class': 'BrowseWrapper', - 'style': f'--random-turn: {random.random()}turn;', - } - with self.__nest('body', attrs=_body_attrs): - self.__render_subj(focus_iri), - self.__render_mediatype_links() - # TODO:
with unvisited triples in self.data (unreachable from focus_iri) + def simple_render_document(self) -> str: + _html_builder = _HtmlBuilder(self.response_tripledict, self.response_focus.single_iri(), self.iri_shorthand) + _html_str = etree_tostring(_html_builder.html_element, encoding='unicode', method='html') return ''.join(( '', # TODO: can etree put the doctype in? - etree_tostring(self.rendered_element, encoding='unicode', method='html'), + _html_str, )) - ### - # private rdf-rendering helpers + +@dataclasses.dataclass +class _HtmlBuilder: + all_data: primitive_rdf.RdfTripleDictionary + focus_iri: str + iri_shorthand: primitive_rdf.IriShorthand + html_element: Element = dataclasses.field(init=False) + __current_data: primitive_rdf.RdfTripleDictionary = dataclasses.field(init=False) + __current_element: Element = dataclasses.field(init=False) + __visiting_iris: set[str] = dataclasses.field(init=False) + __heading_depth: int = 0 + + def __post_init__(self): + # TODO: lang (according to request -- also translate) + self.html_element = self.__current_element = Element('html') + self.__current_data = self.all_data + self.__visiting_iris = set() + with self.__nest('head'): + self.__leaf('link', attrs={ + 'rel': 'stylesheet', + 'href': staticfiles_storage.url('css/browse.css'), + }) + _body_attrs = { + 'class': 'BrowseWrapper', + 'style': f'--random-turn: {random.random()}turn;', + } + with self.__nest('body', attrs=_body_attrs): + self.__render_subj(self.focus_iri), + self.__render_mediatype_links() + # TODO:
with unvisited triples in self.data (unreachable from focus_iri) def __render_mediatype_links(self): with self.__nest('nav', attrs={'class': 'VisibleNest Browse__card'}): @@ -68,14 +86,15 @@ def __render_mediatype_links(self): self.__mediatype_link(_mediatype) def __mediatype_link(self, mediatype: str): - _qparams = self.request.GET.copy() + (_scheme, _netloc, _path, _query, _fragment) = urlsplit(self.focus_iri) + _qparams = QueryDict(_query, mutable=True) _qparams['acceptMediatype'] = mediatype _href = urlunsplit(( - self.request.scheme, - self.request.get_host(), - self.request.path, + _scheme, + _netloc, + _path, _qparams.urlencode(), - '', + _fragment, )) self.__leaf('a', text=mediatype, attrs={'href': _href}) if mediatype in UNSTABLE_MEDIATYPES: @@ -88,7 +107,7 @@ def __mediatype_link(self, mediatype: str): _link.tail = ')' def __render_subj(self, subj_iri: str, start_collapsed=False): - _twopledict = self.data.tripledict.get(subj_iri, {}) + _twopledict = self.__current_data.get(subj_iri, {}) with self.__visiting(subj_iri): with self.__h_tag() as _h_tag: with self.__nest( @@ -124,7 +143,7 @@ def __twoples(self, twopledict: primitive_rdf.RdfTwopleDictionary): def __obj(self, obj: primitive_rdf.RdfObject): if isinstance(obj, str): # iri # TODO: detect whether indexcard? - if obj in self.data.tripledict: + if obj in self.__current_data: if obj in self.__visiting_iris: self.__leaf_link(obj) # TODO: consider else: @@ -177,18 +196,6 @@ def __quoted_graph(self, quoted_graph: primitive_rdf.QuotedGraph): ### # private html-building helpers - @contextlib.contextmanager - def __rendering(self): - # TODO: lang (according to request -- also translate) - self.__current_element = Element('html') - self.__visiting_iris = set() - try: - yield - self.rendered_element = self.__current_element - finally: - self.__current_element = None - self.__visiting_iris = None - @contextlib.contextmanager def __visiting(self, iri: str): assert iri not in self.__visiting_iris @@ -212,14 +219,14 @@ def __h_tag(self): @contextlib.contextmanager def __quoted_data(self, quoted_data: dict): - _outer_data = self.data + _outer_data = self.__current_data _outer_visiting_iris = self.__visiting_iris - self.data = primitive_rdf.RdfGraph(quoted_data) + self.__current_data = quoted_data self.__visiting_iris = set() try: yield finally: - self.data = _outer_data + self.__current_data = _outer_data self.__visiting_iris = _outer_visiting_iris @contextlib.contextmanager @@ -246,18 +253,13 @@ def __leaf(self, tag_name, *, text=None, attrs=None): def __nest_link(self, iri: str, *, attrs=None): return self.__nest('a', attrs={ **(attrs or {}), - 'href': self.__href_for_iri(iri), + 'href': trove_browse_link(iri), }) def __leaf_link(self, iri: str, *, attrs=None): with self.__nest_link(iri, attrs=attrs) as _link: _link.text = self.iri_shorthand.compact_iri(iri) - def __href_for_iri(self, iri: str): - if self.request and (self.request.get_host() == urlsplit(iri).netloc): - return iri - return trove_browse_link(iri) - def __label_for_iri(self, iri: str): # TODO: get actual label in requested language _shorthand = self.iri_shorthand.compact_iri(iri) diff --git a/trove/render/jsonapi.py b/trove/render/jsonapi.py index 3f8e3f40a..e233fe1a4 100644 --- a/trove/render/jsonapi.py +++ b/trove/render/jsonapi.py @@ -1,8 +1,9 @@ import contextlib +import dataclasses import datetime import hashlib import json -from typing import Iterable, Union +from typing import Iterable, Union, Any from primitive_metadata import primitive_rdf @@ -19,9 +20,9 @@ OWL, RDF, TROVE, + XSD, ) from trove.vocab.trove import ( - TROVE_API_THESAURUS, trove_indexcard_namespace, ) from ._base import BaseRenderer @@ -32,6 +33,7 @@ _IriOrBlanknode = Union[str, frozenset] +@dataclasses.dataclass class RdfJsonapiRenderer(BaseRenderer): '''render rdf data into jsonapi resources, guided by a given rdf vocabulary @@ -53,19 +55,13 @@ class RdfJsonapiRenderer(BaseRenderer): MEDIATYPE = mediatypes.JSONAPI INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] + _identifier_object_cache: dict = dataclasses.field(default_factory=dict) + _id_namespace_set: Iterable[primitive_rdf.IriNamespace] = (trove_indexcard_namespace(),) __to_include: set[primitive_rdf.RdfObject] | None = None - def __init__(self, **kwargs): - super().__init__(**kwargs) - self._vocab = primitive_rdf.RdfGraph(TROVE_API_THESAURUS) - self._identifier_object_cache = {} - # TODO: move "id namespace" to vocab (property on each type) - self._id_namespace_set = [trove_indexcard_namespace()] - - def render_document(self, data: primitive_rdf.RdfGraph, focus_iri: str) -> str: - self._data = data + def simple_render_document(self) -> str: return json.dumps( - self.render_dict(focus_iri), + self.render_dict(self.response_focus.single_iri()), indent=2, # TODO: pretty-print query param? ) @@ -95,7 +91,7 @@ def render_dict(self, primary_iris: Union[str, Iterable[str]]) -> dict: def render_resource_object(self, iri_or_blanknode: _IriOrBlanknode) -> dict: _resource_object = {**self.render_identifier_object(iri_or_blanknode)} _twopledict = ( - (self._data.tripledict.get(iri_or_blanknode) or {}) + (self.response_data.tripledict.get(iri_or_blanknode) or {}) if isinstance(iri_or_blanknode, str) else primitive_rdf.twopledict_from_twopleset(iri_or_blanknode) ) @@ -111,7 +107,7 @@ def render_identifier_object(self, iri_or_blanknode: _IriOrBlanknode): return self._identifier_object_cache[iri_or_blanknode] except KeyError: if isinstance(iri_or_blanknode, str): - _type_iris = list(self._data.q(iri_or_blanknode, RDF.type)) + _type_iris = list(self.response_data.q(iri_or_blanknode, RDF.type)) _id_obj = { 'id': self._resource_id_for_iri(iri_or_blanknode), 'type': self._single_typename(_type_iris), @@ -145,7 +141,7 @@ def _single_typename(self, type_iris: list[str]): def _membername_for_iri(self, iri: str): try: - _membername = next(self._vocab.q(iri, JSONAPI_MEMBERNAME)) + _membername = next(self.thesaurus.q(iri, JSONAPI_MEMBERNAME)) except StopIteration: pass else: @@ -154,10 +150,21 @@ def _membername_for_iri(self, iri: str): raise trove_exceptions.ExpectedLiteralObject((iri, JSONAPI_MEMBERNAME, _membername)) return self.iri_shorthand.compact_iri(iri) - def _resource_id_for_blanknode(self, blanknode: frozenset): - # content-addressed blanknode id (maybe-TODO: care about hash stability, - # tho don't need it with cached render_identifier_object implementation) - return hashlib.sha256(str(blanknode).encode()).hexdigest() + def _resource_id_for_blanknode(self, blanknode: frozenset, /): + # content-addressed blanknode id + _serializable_twoples = [] + for _pred, _obj in blanknode: + _serializable_obj: Any + if isinstance(_obj, primitive_rdf.Literal): + _serializable_obj = [_obj.unicode_value, *sorted(_obj.datatype_iris)] + elif isinstance(_obj, (str, int, float)): + _serializable_obj = _obj + elif isinstance(_obj, frozenset): + _serializable_obj = self._resource_id_for_blanknode(_obj) + else: + raise ValueError(_obj) + _serializable_twoples.append((_pred, _serializable_obj)) + return hashlib.sha256(json.dumps(sorted(_serializable_twoples)).encode()).hexdigest() def _resource_id_for_iri(self, iri: str): for _iri_namespace in self._id_namespace_set: @@ -167,8 +174,8 @@ def _resource_id_for_iri(self, iri: str): return hashlib.sha256(iri.encode()).hexdigest() def _render_field(self, predicate_iri, object_set, *, into: dict): - _is_relationship = (predicate_iri, RDF.type, JSONAPI_RELATIONSHIP) in self._vocab - _is_attribute = (predicate_iri, RDF.type, JSONAPI_ATTRIBUTE) in self._vocab + _is_relationship = (predicate_iri, RDF.type, JSONAPI_RELATIONSHIP) in self.thesaurus + _is_attribute = (predicate_iri, RDF.type, JSONAPI_ATTRIBUTE) in self.thesaurus _field_key = self._membername_for_iri(predicate_iri) _doc_key = 'meta' # unless configured for jsonapi, default to unstructured 'meta' if ':' not in _field_key: @@ -184,7 +191,7 @@ def _render_field(self, predicate_iri, object_set, *, into: dict): into.setdefault(_doc_key, {})[_field_key] = _fieldvalue def _one_or_many(self, predicate_iri: str, datalist: list): - _only_one = (predicate_iri, RDF.type, OWL.FunctionalProperty) in self._vocab + _only_one = (predicate_iri, RDF.type, OWL.FunctionalProperty) in self.thesaurus if _only_one: if len(datalist) > 1: raise trove_exceptions.OwlObjection(f'multiple objects for to-one relation <{predicate_iri}>: {datalist}') @@ -280,6 +287,8 @@ def _render_attribute_datum(self, rdfobject: primitive_rdf.RdfObject) -> dict | if isinstance(rdfobject, primitive_rdf.Literal): if RDF.JSON in rdfobject.datatype_iris: return json.loads(rdfobject.unicode_value) + if XSD.integer in rdfobject.datatype_iris: + return int(rdfobject.unicode_value) return rdfobject.unicode_value # TODO: decide how to represent language elif isinstance(rdfobject, str): try: # maybe it's a jsonapi resource diff --git a/trove/render/jsonld.py b/trove/render/jsonld.py index 6f07a4073..9ac61554b 100644 --- a/trove/render/jsonld.py +++ b/trove/render/jsonld.py @@ -22,9 +22,9 @@ class RdfJsonldRenderer(BaseRenderer): __visiting_iris: set | None = None - def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: + def simple_render_document(self) -> str: return json.dumps( - self.render_jsonld(data, focus_iri), + self.render_jsonld(self.response_data, self.response_focus.single_iri()), indent=2, sort_keys=True, ) @@ -147,13 +147,9 @@ def _list_or_single_value(self, predicate_iri: str, objectlist: list): return None else: return _only_obj - if predicate_iri in _PREDICATES_OF_FLEXIBLE_CARDINALITY: - return ( - objectlist - if len(objectlist) != 1 - else objectlist[0] - ) - return objectlist + if predicate_iri in _PREDICATES_OF_FLEXIBLE_CARDINALITY and len(objectlist) == 1: + return objectlist[0] + return sorted(objectlist, key=_naive_sort_key) @contextlib.contextmanager def __visiting(self, iri: str): @@ -165,3 +161,8 @@ def __visiting(self, iri: str): def __already_visiting(self, iri: str) -> bool: return bool(self.__visiting_iris and (iri in self.__visiting_iris)) + + +def _naive_sort_key(jsonable_obj): + _json = json.dumps(jsonable_obj) + return (len(_json), _json) diff --git a/trove/render/simple_csv.py b/trove/render/simple_csv.py new file mode 100644 index 000000000..b94949c02 --- /dev/null +++ b/trove/render/simple_csv.py @@ -0,0 +1,160 @@ +from __future__ import annotations +import csv +import functools +import dataclasses +import typing + +from trove.vocab import mediatypes +from trove.vocab import osfmap +from trove.vocab.namespaces import TROVE +from ._simple_trovesearch import SimpleTrovesearchRenderer +from ._rendering import StreamableRendering + + +Jsonpath = typing.Iterable[str] # path of json keys + +_MULTIVALUE_DELIMITER = ' ; ' # possible improvement: smarter in-value delimiting? +_VALUE_KEY_PREFERENCE = ('@value', '@id', 'name', 'prefLabel', 'label') + + +class TrovesearchSimpleCsvRenderer(SimpleTrovesearchRenderer): + MEDIATYPE = mediatypes.CSV + INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] + CSV_DIALECT = csv.excel + + def unicard_rendering(self, card_iri: str, osfmap_json: dict): + self.multicard_rendering(card_pages=[{card_iri: osfmap_json}]) + + def multicard_rendering(self, card_pages: typing.Iterable[dict[str, dict]]): + _doc = TabularDoc(card_pages) + return StreamableRendering( + mediatype=self.MEDIATYPE, + content_stream=csv_stream(self.CSV_DIALECT, _doc.header(), _doc.rows()), + ) + + +def csv_stream(csv_dialect, header: list, rows: typing.Iterator[list]) -> typing.Iterator[str]: + _writer = csv.writer(_Echo(), dialect=csv_dialect) + yield _writer.writerow(header) + for _row in rows: + yield _writer.writerow(_row) + + +@dataclasses.dataclass +class TabularDoc: + card_pages: typing.Iterator[dict[str, dict]] + _started: bool = False + + @functools.cached_property + def field_paths(self) -> tuple[Jsonpath, ...]: + # TODO: use jsonapi's "sparse fieldsets" to allow selecting + # https://jsonapi.org/format/#fetching-sparse-fieldsets + return tuple(( + ('@id',), + *self._nonempty_field_paths() + )) + + @functools.cached_property + def first_page(self) -> dict[str, dict]: + return next(self.card_pages, {}) + + def _iter_card_pages(self): + assert not self._started + self._started = True + if self.first_page: + yield self.first_page + yield from self.card_pages + + def header(self) -> list[str]: + return ['.'.join(_path) for _path in self.field_paths] + + def rows(self) -> typing.Iterator[list[str]]: + for _page in self._iter_card_pages(): + for _card_iri, _osfmap_json in _page.items(): + yield self._row_values(_osfmap_json) + + def _nonempty_field_paths(self) -> typing.Iterator[Jsonpath]: + for _path in osfmap.DEFAULT_TABULAR_SEARCH_COLUMN_PATHS: + _jsonpath = _osfmap_jsonpath(_path) + _path_is_present = any( + _has_value(_card, _jsonpath) + for _card in self.first_page.values() + ) + if _path_is_present: + yield _jsonpath + + def _row_values(self, osfmap_json: dict) -> list[str]: + return [ + self._row_field_value(osfmap_json, _field_path) + for _field_path in self.field_paths + ] + + def _row_field_value(self, osfmap_json: dict, field_path: Jsonpath) -> str: + _rendered_values = [ + _render_tabularly(_obj) + for _obj in _iter_values(osfmap_json, field_path) + ] + if len(_rendered_values) == 1: + return _rendered_values[0] # preserve type for single numbers + # for multiple values, can only be a string + return _MULTIVALUE_DELIMITER.join(map(str, _rendered_values)) + + +def _osfmap_jsonpath(iri_path: typing.Iterable[str]) -> Jsonpath: + _shorthand = osfmap.osfmap_shorthand() + return tuple( + _shorthand.compact_iri(_pathstep) + for _pathstep in iri_path + ) + + +def _has_value(osfmap_json: dict, path: Jsonpath) -> bool: + try: + next(_iter_values(osfmap_json, path)) + except StopIteration: + return False + else: + return True + + +def _iter_values(osfmap_json: dict, path: Jsonpath) -> typing.Iterator: + assert path + (_step, *_rest) = path + _val = osfmap_json.get(_step) + if _rest: + if isinstance(_val, dict): + yield from _iter_values(_val, _rest) + elif isinstance(_val, list): + for _val_obj in _val: + yield from _iter_values(_val_obj, _rest) + else: + if isinstance(_val, list): + yield from _val + elif _val is not None: + yield _val + + +def _render_tabularly(json_val): + if isinstance(json_val, (str, int, float)): + return json_val + if isinstance(json_val, dict): + for _key in _VALUE_KEY_PREFERENCE: + _val = json_val.get(_key) + if isinstance(_val, list): + return ( + _render_tabularly(_val[0]) + if _val + else None + ) + if _val is not None: + return _val + return None + + +class _Echo: + '''a write-only file-like object, to convince `csv.csvwriter.writerow` to return strings + + from https://docs.djangoproject.com/en/5.1/howto/outputting-csv/#streaming-large-csv-files + ''' + def write(self, line: str): + return line diff --git a/trove/render/simple_json.py b/trove/render/simple_json.py index 68f16362c..60271a701 100644 --- a/trove/render/simple_json.py +++ b/trove/render/simple_json.py @@ -2,87 +2,49 @@ from primitive_metadata import primitive_rdf as rdf -from trove import exceptions as trove_exceptions from trove.vocab.jsonapi import ( JSONAPI_LINK_OBJECT, JSONAPI_MEMBERNAME, ) from trove.vocab import mediatypes from trove.vocab.namespaces import TROVE, RDF -from ._base import BaseRenderer +from ._simple_trovesearch import SimpleTrovesearchRenderer -class TrovesearchSimpleJsonRenderer(BaseRenderer): +class TrovesearchSimpleJsonRenderer(SimpleTrovesearchRenderer): '''for "simple json" search api -- very entangled with trove/trovesearch/trovesearch_gathering.py ''' MEDIATYPE = mediatypes.JSON INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def render_document(self, data: rdf.RdfGraph, focus_iri: str) -> str: - _focustypes = set(data.q(focus_iri, RDF.type)) - if TROVE.Cardsearch in _focustypes: - _jsonable = self._render_cardsearch(data, focus_iri) - elif TROVE.Valuesearch in _focustypes: - _jsonable = self._render_valuesearch(data, focus_iri) - elif TROVE.Indexcard in _focustypes: - _jsonable = self._render_card(data, focus_iri) - else: - raise trove_exceptions.UnsupportedRdfType(_focustypes) + def simple_unicard_rendering(self, card_iri, osfmap_json): return json.dumps({ - 'data': _jsonable, - 'links': self._render_links(data, focus_iri), - 'meta': self._render_meta(data, focus_iri), + 'data': self._render_card_content(card_iri, osfmap_json), + 'links': self._render_links(), + 'meta': self._render_meta(), }, indent=2) - def _render_cardsearch(self, graph: rdf.RdfGraph, cardsearch_iri: str): - return self._render_searchresultpage(graph, cardsearch_iri) - - def _render_valuesearch(self, graph: rdf.RdfGraph, valuesearch_iri: str): - return self._render_searchresultpage(graph, valuesearch_iri) - - def _render_searchresultpage(self, graph: rdf.RdfGraph, focus_iri: str): - # just each card's contents - _results_sequence = next( - _page - for _page in graph.q(focus_iri, TROVE.searchResultPage) - if rdf.is_container(_page) # filter out page links - ) - return [ - self._render_result(graph, _search_result_blanknode) - for _search_result_blanknode in rdf.sequence_objects_in_order(_results_sequence) - ] - - def _render_result(self, graph: rdf.RdfGraph, search_result_blanknode: rdf.RdfBlanknode): - _card = next( - _obj - for _pred, _obj in search_result_blanknode - if _pred == TROVE.indexCard - ) - return self._render_card(graph, _card) + def simple_multicard_rendering(self, cards): + return json.dumps({ + 'data': [ + self._render_card_content(_card_iri, _osfmap_json) + for _card_iri, _osfmap_json in cards + ], + 'links': self._render_links(), + 'meta': self._render_meta(), + }, indent=2) - def _render_card(self, graph: rdf.RdfGraph, card: rdf.RdfObject): - # just the card contents - if isinstance(card, str): - _card_contents = next(graph.q(card, TROVE.resourceMetadata)) - elif isinstance(card, frozenset): - _card_contents = next( - _obj - for _pred, _obj in card - if _pred == TROVE.resourceMetadata - ) - else: - raise trove_exceptions.ExpectedIriOrBlanknode(card) - assert isinstance(_card_contents, rdf.Literal) - assert RDF.JSON in _card_contents.datatype_iris - _json_contents = json.loads(_card_contents.unicode_value) - if isinstance(card, str): - self._add_twople(_json_contents, 'foaf:primaryTopicOf', card) - return _json_contents + def _render_card_content(self, card_iri: str, osfmap_json: dict): + self._add_twople(osfmap_json, 'foaf:primaryTopicOf', card_iri) + return osfmap_json - def _render_meta(self, graph: rdf.RdfGraph, focus_iri: str): + def _render_meta(self): _meta: dict[str, int | str] = {} try: - _total = next(graph.q(focus_iri, TROVE.totalResultCount)) + _total = next(self.response_gathering.ask( + TROVE.totalResultCount, + focus=self.response_focus, + )) if isinstance(_total, int): _meta['total'] = _total elif isinstance(_total, rdf.Literal): @@ -93,9 +55,9 @@ def _render_meta(self, graph: rdf.RdfGraph, focus_iri: str): pass return _meta - def _render_links(self, graph: rdf.RdfGraph, focus_iri: str): + def _render_links(self): _links = {} - for _pagelink in graph.q(focus_iri, TROVE.searchResultPage): + for _pagelink in self._page_links: _twopledict = rdf.twopledict_from_twopleset(_pagelink) if JSONAPI_LINK_OBJECT in _twopledict.get(RDF.type, ()): (_membername,) = _twopledict[JSONAPI_MEMBERNAME] diff --git a/trove/render/simple_tsv.py b/trove/render/simple_tsv.py new file mode 100644 index 000000000..60eb4023b --- /dev/null +++ b/trove/render/simple_tsv.py @@ -0,0 +1,10 @@ +import csv + +from trove.vocab import mediatypes + +from .simple_csv import TrovesearchSimpleCsvRenderer + + +class TrovesearchSimpleTsvRenderer(TrovesearchSimpleCsvRenderer): + MEDIATYPE = mediatypes.TSV + CSV_DIALECT: type[csv.Dialect] = csv.excel_tab diff --git a/trove/render/turtle.py b/trove/render/turtle.py index c035e773a..fb2d6e352 100644 --- a/trove/render/turtle.py +++ b/trove/render/turtle.py @@ -6,7 +6,11 @@ class RdfTurtleRenderer(BaseRenderer): MEDIATYPE = 'text/turtle' + # include indexcard metadata as JSON literals (because QuotedGraph is non-standard) INDEXCARD_DERIVER_IRI = TROVE['derive/osfmap_json'] - def render_document(self, rdf_graph: rdf.RdfGraph, focus_iri: str): - return rdf.turtle_from_tripledict(rdf_graph.tripledict, focus=focus_iri) + def simple_render_document(self) -> str: + return rdf.turtle_from_tripledict( + self.response_data.tripledict, + focus=self.response_focus.single_iri(), + ) diff --git a/trove/trovesearch/page_cursor.py b/trove/trovesearch/page_cursor.py index 0428b78d5..33aa7f8f6 100644 --- a/trove/trovesearch/page_cursor.py +++ b/trove/trovesearch/page_cursor.py @@ -3,6 +3,7 @@ import dataclasses import enum import json +import math import typing from trove.exceptions import InvalidPageCursorValue @@ -11,14 +12,18 @@ __all__ = ('PageCursor', 'OffsetCursor', 'ReproduciblyRandomSampleCursor') -MANY_MORE = -1 +MANY_MORE = math.inf MAX_OFFSET = 9997 +DEFAULT_PAGE_SIZE = 13 +MAX_PAGE_SIZE = 101 +UNBOUNDED_PAGE_SIZE = math.inf # json-serialized as "Infinity" + @dataclasses.dataclass class PageCursor: - page_size: int - total_count: int = MANY_MORE + page_size: int | float = DEFAULT_PAGE_SIZE + total_count: int | float = MANY_MORE @classmethod def from_queryparam_value(cls, cursor_value: str) -> typing.Self: @@ -36,6 +41,14 @@ def from_cursor(cls, other_cursor: PageCursor) -> typing.Self: return dataclasses.replace(other_cursor) # simple copy return cls(*dataclasses.astuple(other_cursor)) + @property + def bounded_page_size(self) -> int: + return ( + MAX_PAGE_SIZE + if self.page_size > MAX_PAGE_SIZE + else int(self.page_size) + ) + def as_queryparam_value(self) -> str: _cls_key = _PageCursorTypes(type(self)).name _as_json = json.dumps([_cls_key, *dataclasses.astuple(self)]) @@ -65,29 +78,31 @@ def first_cursor(self) -> typing.Self | None: @dataclasses.dataclass class OffsetCursor(PageCursor): - # page_size: int (from PageCursor) - # total_count: int (from PageCursor) + # page_size: int | float (from PageCursor) + # total_count: int | float (from PageCursor) start_offset: int = 0 def is_valid(self) -> bool: + _end_offset = ( + self.total_count + if self.bounded_page_size == self.page_size + else min(self.total_count, self.page_size) + ) return ( super().is_valid() and 0 <= self.start_offset <= MAX_OFFSET - and ( - self.total_count == MANY_MORE - or self.start_offset < self.total_count - ) + and self.start_offset < _end_offset ) def is_first_page(self) -> bool: return self.start_offset == 0 def next_cursor(self): - _next = dataclasses.replace(self, start_offset=(self.start_offset + self.page_size)) + _next = dataclasses.replace(self, start_offset=int(self.start_offset + self.bounded_page_size)) return (_next if _next.is_valid() else None) def prev_cursor(self): - _prev = dataclasses.replace(self, start_offset=(self.start_offset - self.page_size)) + _prev = dataclasses.replace(self, start_offset=int(self.start_offset - self.bounded_page_size)) return (_prev if _prev.is_valid() else None) def first_cursor(self): @@ -117,8 +132,46 @@ def prev_cursor(self): ) +@dataclasses.dataclass +class SearchAfterCursor(PageCursor): + # page_size: int (from PageCursor) + # total_count: int (from PageCursor) + search_after: list | None = None + next_search_after: list | None = None + prev_search_after: list | None = None + + def is_first_page(self) -> bool: + return self.search_after is None + + def next_cursor(self): + _next = dataclasses.replace( + self, + search_after=self.next_search_after, + next_search_after=None, + ) + return (_next if _next.is_valid() else None) + + def prev_cursor(self): + _prev = dataclasses.replace( + self, + search_after=self.prev_search_after, + next_search_after=self.search_after, + ) + return (_prev if _prev.is_valid() else None) + + def first_cursor(self): + _first = dataclasses.replace( + self, + search_after=None, + next_search_after=None, + prev_search_after=None, + ) + return (_first if _first.is_valid() else None) + + class _PageCursorTypes(enum.Enum): '''registry of cursor types into which cursor values can be deserialized''' PC = PageCursor OC = OffsetCursor RRSC = ReproduciblyRandomSampleCursor + SAC = SearchAfterCursor diff --git a/trove/trovesearch/search_response.py b/trove/trovesearch/search_handle.py similarity index 51% rename from trove/trovesearch/search_response.py rename to trove/trovesearch/search_handle.py index 19bbdfe6c..45a3449be 100644 --- a/trove/trovesearch/search_response.py +++ b/trove/trovesearch/search_handle.py @@ -1,5 +1,6 @@ +from __future__ import annotations import dataclasses -from typing import Literal, Iterable, Union, Optional +import typing from primitive_metadata import primitive_rdf @@ -7,24 +8,80 @@ PageCursor, ReproduciblyRandomSampleCursor, ) -from trove.trovesearch.search_params import CardsearchParams +from trove.trovesearch.search_params import BaseTroveParams from trove.vocab.namespaces import TROVE from trove.vocab.trove import trove_indexcard_namespace -BoundedCount = Union[ - int, # exact count, if less than ten thousands - Literal[TROVE['ten-thousands-and-more']], -] +@dataclasses.dataclass +class BasicSearchHandle: + cursor: PageCursor + search_params: BaseTroveParams + handler: typing.Callable[[BaseTroveParams], typing.Self] | None = None + + @property + def total_result_count(self) -> primitive_rdf.Literal: + return ( + TROVE['ten-thousands-and-more'] + if self.cursor.has_many_more() + else self.cursor.total_count + ) + + def get_next_streaming_handle(self) -> typing.Self | None: + raise NotImplementedError -# TODO: add `metadata={OWL.sameAs: ...}` to each field; use dataclass-to-rdf instead of gatherers +@dataclasses.dataclass +class CardsearchHandle(BasicSearchHandle): + search_result_page: typing.Iterable[CardsearchResult] = () + related_propertypath_results: list[PropertypathUsage] = dataclasses.field(default_factory=list) + + def __post_init__(self): + _cursor = self.cursor + _page = self.search_result_page + if ( # TODO: move this logic into the... cursor? + isinstance(_cursor, ReproduciblyRandomSampleCursor) + and _cursor.is_first_page() + and _page is not None + ): + if _cursor.first_page_ids: + # revisiting first page; reproduce original random order + _ordering_by_id = { + _id: _i + for (_i, _id) in enumerate(_cursor.first_page_ids) + } + self.search_result_page = sorted( + _page, + key=lambda _r: _ordering_by_id[_r.card_id], + ) + elif not _cursor.has_many_more(): + # visiting first page for the first time + _cursor.first_page_ids = [_result.card_id for _result in _page] + return _page + + def get_next_streaming_handle(self) -> typing.Self | None: + _next_cursor = self.cursor.next_cursor() + if (_next_cursor is not None) and (self.handler is not None): + _next_params = dataclasses.replace( + self.search_params, + page_cursor=_next_cursor, + include=frozenset([(TROVE.searchResultPage,)]), + ) + if self.handler is not None: + return self.handler(_next_params) + return None + + +@dataclasses.dataclass +class ValuesearchHandle(BasicSearchHandle): + search_result_page: typing.Iterable[ValuesearchResult] = () + @dataclasses.dataclass class TextMatchEvidence: property_path: tuple[str, ...] matching_highlight: primitive_rdf.Literal - card_iri: Optional[str] # may be left implicit + card_iri: typing.Optional[str] # may be left implicit @dataclasses.dataclass @@ -56,61 +113,23 @@ class PropertypathUsage: class ValuesearchResult: value_iri: str | None value_value: str | None = None - value_type: Iterable[str] = () - name_text: Iterable[str] = () - title_text: Iterable[str] = () - label_text: Iterable[str] = () + value_type: typing.Iterable[str] = () + name_text: typing.Iterable[str] = () + title_text: typing.Iterable[str] = () + label_text: typing.Iterable[str] = () match_count: int = 0 total_count: int = 0 def __post_init__(self): - assert self.value_iri or self.value_value, ( + assert (self.value_iri is not None) or (self.value_value is not None), ( f'either value_iri or value_value required (on {self})' ) ### -# paged responses +# types -@dataclasses.dataclass -class PagedResponse: - cursor: PageCursor - - @property - def total_result_count(self) -> BoundedCount: - return ( - TROVE['ten-thousands-and-more'] - if (self.cursor is None) or self.cursor.has_many_more() - else self.cursor.total_count - ) - - -@dataclasses.dataclass -class CardsearchResponse(PagedResponse): - search_result_page: list[CardsearchResult] - related_propertypath_results: list['PropertypathUsage'] - cardsearch_params: CardsearchParams - - def __post_init__(self): - _cursor = self.cursor - if ( - isinstance(_cursor, ReproduciblyRandomSampleCursor) - and _cursor.is_first_page() - ): - if _cursor.first_page_ids: - # revisiting first page; reproduce original random order - _ordering_by_id = { - _id: _i - for (_i, _id) in enumerate(_cursor.first_page_ids) - } - self.search_result_page.sort(key=lambda _r: _ordering_by_id[_r.card_id]) - elif not _cursor.has_many_more(): - _cursor.first_page_ids = [_result.card_id for _result in self.search_result_page] - - -@dataclasses.dataclass -class ValuesearchResponse(PagedResponse): - search_result_page: Iterable[ValuesearchResult] +TrovesearchHandler = typing.Callable[[BaseTroveParams], BasicSearchHandle] ### diff --git a/trove/trovesearch/search_params.py b/trove/trovesearch/search_params.py index c4ecabbd1..027ed5756 100644 --- a/trove/trovesearch/search_params.py +++ b/trove/trovesearch/search_params.py @@ -2,6 +2,7 @@ import collections import dataclasses import enum +import functools import itertools import logging import typing @@ -11,7 +12,10 @@ from primitive_metadata import primitive_rdf from trove import exceptions as trove_exceptions -from trove.trovesearch.page_cursor import PageCursor +from trove.trovesearch.page_cursor import ( + DEFAULT_PAGE_SIZE, + PageCursor, +) from trove.util.queryparams import ( QueryparamDict, QueryparamName, @@ -26,7 +30,7 @@ OSFMAP_THESAURUS, ) from trove.vocab.trove import trove_shorthand -from trove.vocab.namespaces import RDF, TROVE, OWL, NAMESPACES_SHORTHAND +from trove.vocab.namespaces import RDF, TROVE, OWL, NAMESPACES_SHORTHAND, FOAF, DCTERMS logger = logging.getLogger(__name__) @@ -47,10 +51,6 @@ # optional prefix for "sort" values DESCENDING_SORT_PREFIX = '-' -# for "page[size]" values -DEFAULT_PAGE_SIZE = 13 -MAX_PAGE_SIZE = 101 - # between each step in a property path "foo.bar.baz" PROPERTYPATH_DELIMITER = '.' @@ -59,6 +59,41 @@ ONE_GLOB_PROPERTYPATH: Propertypath = (GLOB_PATHSTEP,) DEFAULT_PROPERTYPATH_SET: PropertypathSet = frozenset([ONE_GLOB_PROPERTYPATH]) +DEFAULT_INCLUDES_BY_TYPE = { + TROVE.Cardsearch: { + (TROVE.searchResultPage,), + (TROVE.relatedPropertyList,), + }, + TROVE.Valuesearch: { + (TROVE.searchResultPage,), + }, + TROVE.SearchResult: { + (TROVE.indexCard,), + }, +} + +DEFAULT_FIELDS_BY_TYPE = { + TROVE.Indexcard: { + (TROVE.resourceMetadata,), + (TROVE.focusIdentifier,), + (DCTERMS.issued,), + (DCTERMS.modified,), + (FOAF.primaryTopic), + }, + TROVE.Cardsearch: { + (TROVE.totalResultCount,), + (TROVE.cardSearchText,), + (TROVE.cardSearchFilter,), + }, + TROVE.Valuesearch: { + (TROVE.propertyPath,), + (TROVE.valueSearchText,), + (TROVE.valueSearchFilter,), + (TROVE.cardSearchText,), + (TROVE.cardSearchFilter,), + }, +} + class ValueType(enum.Enum): # note: enum values are iris @@ -79,6 +114,7 @@ def shortnames(cls): def to_shortname(self) -> str: return trove_shorthand().compact_iri(self.value) + ### # dataclasses for parsed search-api query parameters @@ -102,7 +138,7 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: # subclasses should override and add their fields to super().parse_queryparams(queryparams) return { 'iri_shorthand': cls._gather_shorthand(queryparams), - 'include': cls._gather_include(queryparams), + 'include': frozenset(cls._gather_include(queryparams)), 'accept_mediatype': _get_single_value(queryparams, QueryparamName('acceptMediatype')), } @@ -131,8 +167,10 @@ def _gather_shorthand(cls, queryparams: QueryparamDict): @classmethod def _gather_include(cls, queryparams: QueryparamDict): - # TODO: for _qp_name, _iri in queryparams.get('include', []): - return frozenset() + return itertools.chain.from_iterable( + _parse_propertypath_set(_include_value) + for _, _include_value in queryparams.get('include', []) + ) @dataclasses.dataclass(frozen=True) @@ -470,7 +508,6 @@ class CardsearchParams(BaseTroveParams): index_strategy_name: str | None sort_list: tuple[SortParam, ...] page_cursor: PageCursor - related_property_paths: tuple[Propertypath, ...] @classmethod def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: @@ -482,10 +519,24 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: 'index_strategy_name': _get_single_value(queryparams, QueryparamName('indexStrategy')), 'sort_list': SortParam.from_sort_queryparams(queryparams), 'page_cursor': _get_page_cursor(queryparams), - 'include': None, # TODO - 'related_property_paths': _get_related_property_paths(_filter_set), } + @classmethod + def _gather_include(cls, queryparams: QueryparamDict): + _explicit_includes = set(super()._gather_include(queryparams)) + return itertools.chain( + _explicit_includes or DEFAULT_INCLUDES_BY_TYPE[TROVE.Cardsearch], + DEFAULT_FIELDS_BY_TYPE[TROVE.Cardsearch], + ) + + @functools.cached_property + def related_property_paths(self) -> tuple[Propertypath, ...]: + return ( + _get_related_property_paths(self.cardsearch_filter_set) + if (TROVE.relatedPropertyList,) in self.include + else () + ) + def to_querydict(self) -> QueryDict: _querydict = super().to_querydict() for _qp_name, _qp_value in Textsegment.queryparams_from_textsegments('cardSearchText', self.cardsearch_textsegment_set): @@ -526,6 +577,14 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: 'valuesearch_filter_set': SearchFilter.from_queryparam_family(queryparams, 'valueSearchFilter'), } + @classmethod + def _gather_include(cls, queryparams: QueryparamDict): + _explicit_includes = set(super()._gather_include(queryparams)) + return itertools.chain( + _explicit_includes or DEFAULT_INCLUDES_BY_TYPE[TROVE.Valuesearch], + DEFAULT_FIELDS_BY_TYPE[TROVE.Valuesearch], + ) + def __post_init__(self): if is_date_property(self.valuesearch_propertypath[-1]): # date-value limitations @@ -671,11 +730,11 @@ def _get_page_cursor(queryparams: QueryparamDict) -> PageCursor: _cursor_value = _get_single_value(queryparams, QueryparamName('page', ('cursor',))) if _cursor_value: return PageCursor.from_queryparam_value(_cursor_value) + _size_value = _get_single_value(queryparams, QueryparamName('page', ('size',))) + if _size_value is None: + return PageCursor() try: - _size = int( # TODO: 400 response on non-int value - _get_single_value(queryparams, QueryparamName('page', ('size',))) - or DEFAULT_PAGE_SIZE - ) + _size = int(_size_value) except ValueError: raise trove_exceptions.InvalidQueryParamValue('page[size]') - return PageCursor(page_size=min(_size, MAX_PAGE_SIZE)) + return PageCursor(page_size=_size) diff --git a/trove/trovesearch/trovesearch_gathering.py b/trove/trovesearch/trovesearch_gathering.py index f91969b50..ca4870e82 100644 --- a/trove/trovesearch/trovesearch_gathering.py +++ b/trove/trovesearch/trovesearch_gathering.py @@ -1,6 +1,7 @@ import dataclasses import logging import urllib.parse +from typing import ClassVar from primitive_metadata.primitive_rdf import ( Literal, @@ -10,10 +11,9 @@ sequence, ) from primitive_metadata import gather -from primitive_metadata.primitive_rdf import literal_json +from primitive_metadata import primitive_rdf as rdf from trove import models as trove_db -from trove import exceptions as trove_exceptions from trove.derive.osfmap_json import _RdfOsfmapJsonldRenderer from trove.trovesearch.page_cursor import PageCursor from trove.trovesearch.search_params import ( @@ -22,7 +22,11 @@ propertypath_key, propertypath_set_key, ) -from trove.trovesearch.search_response import ValuesearchResult +from trove.trovesearch.search_handle import ( + CardsearchHandle, + ValuesearchHandle, + ValuesearchResult, +) from trove.vocab.namespaces import RDF, FOAF, DCTERMS, RDFS, DCAT, TROVE from trove.vocab.jsonapi import ( JSONAPI_LINK_OBJECT, @@ -62,90 +66,155 @@ literal('trove search', language='en'), ), norms=TROVE_GATHERING_NORMS, - gatherer_kwargnames={'search_params', 'specific_index', 'deriver_iri'}, + gatherer_params={ + 'deriver_iri': TROVE.deriverIRI, + }, ) +class _TypedFocus(gather.Focus): + TYPE_IRI: ClassVar[str] # (expected on subclasses) + ADDITIONAL_TYPE_IRIS: ClassVar[tuple[str, ...]] = () # (optional on subclasses) + + @classmethod + def new(cls, *, type_iris=(), **kwargs): + return super().new( + # add type_iri to new Focus instance + type_iris={ + cls.TYPE_IRI, + *getattr(cls, 'ADDITIONAL_TYPE_IRIS', ()), + *type_iris + }, + **kwargs, + ) + + +@dataclasses.dataclass(frozen=True) +class CardsearchFocus(_TypedFocus): + TYPE_IRI = TROVE.Cardsearch + + # additional dataclass fields + search_params: CardsearchParams = dataclasses.field(compare=False) + search_handle: CardsearchHandle = dataclasses.field(compare=False) + + +@dataclasses.dataclass(frozen=True) +class ValuesearchFocus(_TypedFocus): + TYPE_IRI = TROVE.Valuesearch + + # additional dataclass fields + search_params: ValuesearchParams = dataclasses.field(compare=False) + search_handle: ValuesearchHandle = dataclasses.field(compare=False) + + +class IndexcardFocus(_TypedFocus): + TYPE_IRI = TROVE.Indexcard + ADDITIONAL_TYPE_IRIS = (DCAT.CatalogRecord,) + + # TODO: per-field text search in rdf # @trovesearch_by_indexstrategy.gatherer(TROVE.cardSearchText) -# def gather_cardsearch_text(focus, *, specific_index, search_params, deriver_iri): +# def gather_cardsearch_text(focus: CardsearchFocus, **kwargs): # yield (TROVE.cardSearchText, literal(search_params.cardsearch_text)) # # # @trovesearch_by_indexstrategy.gatherer(TROVE.valueSearchText) -# def gather_valuesearch_text(focus, *, specific_index, search_params, deriver_iri): +# def gather_valuesearch_text(focus, **kwargs): # yield (TROVE.valueSearchText, literal(search_params.valuesearch_text)) @trovesearch_by_indexstrategy.gatherer(TROVE.propertyPath, focustype_iris={TROVE.Valuesearch}) -def gather_valuesearch_propertypath(focus, *, search_params, **kwargs): - yield from _single_propertypath_twoples(search_params.valuesearch_propertypath) +def gather_valuesearch_propertypath(focus: ValuesearchFocus, **kwargs): + yield from _single_propertypath_twoples(focus.search_params.valuesearch_propertypath) @trovesearch_by_indexstrategy.gatherer(TROVE.valueSearchFilter) -def gather_valuesearch_filter(focus, *, search_params, **kwargs): - for _filter in search_params.valuesearch_filter_set: +def gather_valuesearch_filter(focus, **kwargs): + for _filter in focus.search_params.valuesearch_filter_set: yield (TROVE.valueSearchFilter, _filter_as_blanknode(_filter)) +@trovesearch_by_indexstrategy.gatherer(TROVE.totalResultCount) +def gather_count(focus: CardsearchFocus, **kwargs): + yield (TROVE.totalResultCount, focus.search_handle.total_result_count) + + @trovesearch_by_indexstrategy.gatherer( - TROVE.totalResultCount, TROVE.searchResultPage, - TROVE.cardSearchFilter, focustype_iris={TROVE.Cardsearch}, + cache_bound=1, # only the first page gets cached ) -def gather_cardsearch(focus, *, specific_index, search_params, **kwargs): - assert isinstance(search_params, CardsearchParams) - # defer to the IndexStrategy implementation to do the search - _cardsearch_resp = specific_index.pls_handle_cardsearch(search_params) - # resulting index-cards - yield (TROVE.totalResultCount, _cardsearch_resp.total_result_count) - _result_page = [] - for _result in _cardsearch_resp.search_result_page: - yield (_result.card_iri, RDF.type, TROVE.Indexcard) - _text_evidence_twoples = ( - (TROVE.matchEvidence, frozenset(( - (RDF.type, TROVE.TextMatchEvidence), - (TROVE.matchingHighlight, _evidence.matching_highlight), - (TROVE.evidenceCardIdentifier, literal(_evidence.card_iri)), - *_single_propertypath_twoples(_evidence.property_path), +def gather_cardsearch_page(focus: CardsearchFocus, *, deriver_iri, **kwargs): + # each searchResultPage a sequence of search results + _current_handle: CardsearchHandle | None = focus.search_handle + while _current_handle is not None: + _result_page = [] + _card_descriptions_by_iri = _load_card_descriptions(_current_handle.search_result_page, deriver_iri) + for _result in _current_handle.search_result_page or (): + _text_evidence_twoples = ( + (TROVE.matchEvidence, frozenset(( + (RDF.type, TROVE.TextMatchEvidence), + (TROVE.matchingHighlight, _evidence.matching_highlight), + (TROVE.evidenceCardIdentifier, literal(_evidence.card_iri)), + *_single_propertypath_twoples(_evidence.property_path), + ))) + for _evidence in _result.text_match_evidence + ) + _result_page.append(frozenset(( + (RDF.type, TROVE.SearchResult), + (TROVE.indexCard, _result.card_iri), + *_text_evidence_twoples, ))) - for _evidence in _result.text_match_evidence - ) - _result_page.append(frozenset(( - (RDF.type, TROVE.SearchResult), - (TROVE.indexCard, _result.card_iri), - *_text_evidence_twoples, - ))) - yield (TROVE.searchResultPage, sequence(_result_page)) + try: + _card_description = _card_descriptions_by_iri[_result.card_iri] + except KeyError: + pass + else: + yield from rdf.iter_tripleset(_card_description.tripledict) + yield (TROVE.searchResultPage, sequence(_result_page)) + _current_handle = _current_handle.get_next_streaming_handle() + + +@trovesearch_by_indexstrategy.gatherer(TROVE.searchResultPage) +def gather_page_links(focus, **kwargs): # links to more pages of results - yield from _search_page_links(focus, search_params, _cardsearch_resp) + yield from _search_page_links(focus, focus.search_params) + + +@trovesearch_by_indexstrategy.gatherer( + TROVE.relatedPropertyList, + focustype_iris={TROVE.Cardsearch}, +) +def gather_related_properties(focus, **kwargs): # info about related properties (for refining/filtering further) _prop_usage_counts = { _prop_result.property_path: _prop_result.usage_count - for _prop_result in _cardsearch_resp.related_propertypath_results + for _prop_result in focus.search_handle.related_propertypath_results } _relatedproperty_list = [ _related_property_result(_propertypath, _prop_usage_counts.get(_propertypath, 0)) - for _propertypath in search_params.related_property_paths + for _propertypath in focus.search_params.related_property_paths ] if _relatedproperty_list: yield (TROVE.relatedPropertyList, sequence(_relatedproperty_list)) + + +@trovesearch_by_indexstrategy.gatherer(TROVE.cardSearchFilter) +def gather_cardsearch_filter(focus, **kwargs): # filter-values from search params - for _filter in search_params.cardsearch_filter_set: + for _filter in focus.search_params.cardsearch_filter_set: yield (TROVE.cardSearchFilter, _filter_as_blanknode(_filter)) @trovesearch_by_indexstrategy.gatherer( + TROVE.searchResultPage, focustype_iris={TROVE.Valuesearch}, ) -def gather_valuesearch(focus, *, specific_index, search_params, **kwargs): - assert isinstance(search_params, ValuesearchParams) - _valuesearch_resp = specific_index.pls_handle_valuesearch(search_params) +def gather_valuesearch_page(focus: ValuesearchFocus, **kwargs): _result_page = [] _value_iris = { _result.value_iri - for _result in _valuesearch_resp.search_result_page + for _result in focus.search_handle.search_result_page or () if _result.value_iri } if _value_iris: @@ -166,7 +235,7 @@ def gather_valuesearch(focus, *, specific_index, search_params, **kwargs): ) else: _value_indexcards = [] - for _result in _valuesearch_resp.search_result_page: + for _result in focus.search_handle.search_result_page or (): _indexcard_obj = None if _result.value_iri in _value_iris: for _indexcard in _value_indexcards: @@ -176,6 +245,7 @@ def gather_valuesearch(focus, *, specific_index, search_params, **kwargs): ): _indexcard_obj = _indexcard.get_iri() yield (_indexcard_obj, RDF.type, TROVE.Indexcard) # so gather_card runs + # TODO: batch-load cards instead break # found the indexcard if _indexcard_obj is None: # no actual indexcard; put what we know in a blanknode-indexcard @@ -185,73 +255,129 @@ def gather_valuesearch(focus, *, specific_index, search_params, **kwargs): TROVE.cardsearchResultCount: {_result.match_count}, TROVE.indexCard: {_indexcard_obj}, })) - yield (TROVE.totalResultCount, _valuesearch_resp.total_result_count) yield (TROVE.searchResultPage, sequence(_result_page)) - yield from _search_page_links(focus, search_params, _valuesearch_resp) @trovesearch_by_indexstrategy.gatherer( - focustype_iris={TROVE.Indexcard}, + TROVE.totalResultCount, + focustype_iris={TROVE.Valuesearch}, ) -def gather_card(focus, *, deriver_iri, **kwargs): - # TODO: batch gatherer -- load all cards in one query - yield (RDF.type, DCAT.CatalogRecord) - _indexcard_namespace = trove_indexcard_namespace() - try: - _indexcard_iri = next( - _iri - for _iri in focus.iris - if _iri in _indexcard_namespace - ) - except StopIteration: - raise trove_exceptions.IriMismatch(f'could not find indexcard iri in {focus.iris} (looking for {_indexcard_namespace})') - _indexcard_uuid = iri_minus_namespace( - _indexcard_iri, - namespace=_indexcard_namespace, +def gather_valuesearch_count(focus, **kwargs): + yield (TROVE.totalResultCount, focus.search_handle.total_result_count) + + +# @trovesearch_by_indexstrategy.gatherer( +# focustype_iris={TROVE.Indexcard}, +# ) +# def gather_card(focus, *, deriver_iri, **kwargs): +# yield from _card_triples(...) +# _indexcard_namespace = trove_indexcard_namespace() +# try: +# _indexcard_iri = next( +# _iri +# for _iri in focus.iris +# if _iri in _indexcard_namespace +# ) +# except StopIteration: +# raise trove_exceptions.IriMismatch(f'could not find indexcard iri in {focus.iris} (looking for {_indexcard_namespace})') + + +def _load_card_descriptions(search_result_page, deriver_iri) -> dict[str, rdf.RdfGraph]: + _card_iris = {_result.card_iri for _result in search_result_page} + return ( + _load_card_descriptions_nonderived(_card_iris) + if deriver_iri is None + else _load_card_descriptions_derived(_card_iris, deriver_iri) ) - if deriver_iri is None: # include data as a quoted graph - _indexcard_rdf = ( - trove_db.LatestIndexcardRdf.objects - .filter(indexcard__uuid=_indexcard_uuid) - .select_related('indexcard') - .prefetch_related('indexcard__focus_identifier_set') - .get() - ) - yield (DCTERMS.issued, _indexcard_rdf.indexcard.created.date()) - yield (DCTERMS.modified, _indexcard_rdf.modified.date()) - for _identifier in _indexcard_rdf.indexcard.focus_identifier_set.all(): - _iri = _identifier.as_iri() - yield (FOAF.primaryTopic, _iri) - yield (TROVE.focusIdentifier, literal(_iri)) + + +def _load_card_descriptions_nonderived(card_iris) -> dict[str, rdf.RdfGraph]: + _card_namespace = trove_indexcard_namespace() + _indexcard_uuids = { + iri_minus_namespace(_card_iri, namespace=_card_namespace) + for _card_iri in card_iris + } + _indexcard_rdf_qs = ( + trove_db.LatestIndexcardRdf.objects + .filter(indexcard__uuid__in=_indexcard_uuids) + .select_related('indexcard') + .prefetch_related('indexcard__focus_identifier_set') + ) + _by_card_iri = {} + for _indexcard_rdf in _indexcard_rdf_qs: + _indexcard_iri = _indexcard_rdf.indexcard.get_iri() _quoted_graph = _indexcard_rdf.as_quoted_graph() _quoted_graph.add( (_quoted_graph.focus_iri, FOAF.primaryTopicOf, _indexcard_iri), ) - yield (TROVE.resourceMetadata, _quoted_graph) - else: # include pre-formatted data from a DerivedIndexcard - _derived_indexcard = ( - trove_db.DerivedIndexcard.objects - .filter( - upriver_indexcard__uuid=_indexcard_uuid, - deriver_identifier__in=( - trove_db.ResourceIdentifier.objects - .queryset_for_iri(deriver_iri) - ), - ) - .select_related('upriver_indexcard') - .prefetch_related('upriver_indexcard__focus_identifier_set') - .get() + _by_card_iri[_indexcard_iri] = _describe_indexcard_nonderived( + _indexcard_iri, _indexcard_rdf ) - yield (DCTERMS.issued, _derived_indexcard.upriver_indexcard.created.date()) - yield (DCTERMS.modified, _derived_indexcard.modified.date()) - for _identifier in _derived_indexcard.upriver_indexcard.focus_identifier_set.all(): - _iri = _identifier.as_iri() - yield (FOAF.primaryTopic, _iri) - yield (TROVE.focusIdentifier, literal(_iri)) - yield ( - TROVE.resourceMetadata, - _derived_indexcard.as_rdf_literal(), + return _by_card_iri + + +def _load_card_descriptions_derived(card_iris, deriver_iri: str) -> dict[str, rdf.RdfGraph]: + _card_namespace = trove_indexcard_namespace() + _indexcard_uuids = { + iri_minus_namespace(_card_iri, namespace=_card_namespace) + for _card_iri in card_iris + } + # include pre-formatted data from a DerivedIndexcard + _derived_indexcard_qs = ( + trove_db.DerivedIndexcard.objects + .filter( + upriver_indexcard__uuid__in=_indexcard_uuids, + deriver_identifier__in=( + trove_db.ResourceIdentifier.objects + .queryset_for_iri(deriver_iri) + ), ) + .select_related('upriver_indexcard') + .prefetch_related('upriver_indexcard__focus_identifier_set') + ) + _by_card_iri = {} + for _derived in _derived_indexcard_qs: + _indexcard_iri = _derived.upriver_indexcard.get_iri() + _by_card_iri[_indexcard_iri] = _describe_indexcard_derived(_indexcard_iri, _derived) + return _by_card_iri + + +def _describe_indexcard_nonderived( + indexcard_iri: str, + indexcard_rdf: trove_db.IndexcardRdf, +) -> rdf.RdfGraph: + _card_description = rdf.RdfGraph({ + indexcard_iri: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + TROVE.resourceMetadata: {indexcard_rdf.as_quoted_graph()}, + DCTERMS.issued: {indexcard_rdf.indexcard.created.date()}, + DCTERMS.modified: {indexcard_rdf.modified.date()}, + }, + }) + for _identifier in indexcard_rdf.indexcard.focus_identifier_set.all(): + _iri = _identifier.as_iri() + _card_description.add((indexcard_iri, FOAF.primaryTopic, _iri)) + _card_description.add((indexcard_iri, TROVE.focusIdentifier, literal(_iri))) + return _card_description + + +def _describe_indexcard_derived( + indexcard_iri: str, + derived_indexcard: trove_db.DerivedIndexcard, +) -> rdf.RdfGraph: + _card_description = rdf.RdfGraph({ + indexcard_iri: { + RDF.type: {TROVE.Indexcard, DCAT.CatalogRecord}, + TROVE.resourceMetadata: {derived_indexcard.as_rdf_literal()}, + DCTERMS.issued: {derived_indexcard.upriver_indexcard.created.date()}, + DCTERMS.modified: {derived_indexcard.modified.date()}, + }, + }) + for _identifier in derived_indexcard.upriver_indexcard.focus_identifier_set.all(): + _iri = _identifier.as_iri() + _card_description.add((indexcard_iri, FOAF.primaryTopic, _iri)) + _card_description.add((indexcard_iri, TROVE.focusIdentifier, literal(_iri))) + return _card_description ### @@ -267,7 +393,7 @@ def _filter_as_blanknode(search_filter) -> frozenset: if search_filter.operator.is_iri_operator(): _valueinfo = _osfmap_or_unknown_iri_as_json(_value) else: - _valueinfo = literal_json({'@value': _value}) + _valueinfo = rdf.literal_json({'@value': _value}) _filter_twoples.append((TROVE.filterValue, _valueinfo)) return frozenset(_filter_twoples) @@ -276,7 +402,7 @@ def _osfmap_or_unknown_iri_as_json(iri: str): try: _twopledict = OSFMAP_THESAURUS[iri] except KeyError: - return literal_json({'@id': iri}) + return rdf.literal_json({'@id': iri}) else: return _osfmap_json({iri: _twopledict}, focus_iri=iri) @@ -304,19 +430,19 @@ def _valuesearch_result_as_indexcard_blanknode(result: ValuesearchResult) -> fro def _osfmap_json(tripledict, focus_iri): - return literal_json( + return rdf.literal_json( _RdfOsfmapJsonldRenderer().tripledict_as_nested_jsonld(tripledict, focus_iri) ) def _osfmap_twople_json(twopledict): - return literal_json( + return rdf.literal_json( _RdfOsfmapJsonldRenderer().twopledict_as_jsonld(twopledict) ) def _osfmap_path(property_path): - return literal_json([ + return rdf.literal_json([ osfmap_shorthand().compact_iri(_iri) for _iri in property_path ]) @@ -359,7 +485,7 @@ def _related_property_result(property_path: tuple[str, ...], count: int): )) -def _search_page_links(search_focus, search_params, search_response): +def _search_page_links(search_focus, search_params): _search_iri_split = urllib.parse.urlsplit(next(iter(search_focus.iris))) def _iri_with_cursor(page_cursor: PageCursor): @@ -371,13 +497,13 @@ def _iri_with_cursor(page_cursor: PageCursor): _search_iri_split.fragment, )) - _next = search_response.cursor.next_cursor() + _next = search_focus.search_handle.cursor.next_cursor() if _next is not None and _next.is_valid(): yield (TROVE.searchResultPage, _jsonapi_link('next', _iri_with_cursor(_next))) - _prev = search_response.cursor.prev_cursor() + _prev = search_focus.search_handle.cursor.prev_cursor() if _prev is not None and _prev.is_valid(): yield (TROVE.searchResultPage, _jsonapi_link('prev', _iri_with_cursor(_prev))) - _first = search_response.cursor.first_cursor() + _first = search_focus.search_handle.cursor.first_cursor() if _first is not None and _first.is_valid(): yield (TROVE.searchResultPage, _jsonapi_link('first', _iri_with_cursor(_first))) diff --git a/trove/views/_responder.py b/trove/views/_responder.py new file mode 100644 index 000000000..21f6b1652 --- /dev/null +++ b/trove/views/_responder.py @@ -0,0 +1,72 @@ +import datetime +import re +import typing + +from django import http as djhttp + +from trove.render._base import BaseRenderer +from trove.render._rendering import ( + ProtoRendering, + StreamableRendering, +) +from trove.exceptions import TroveError +from trove.vocab import mediatypes + + +def make_http_response( + *, + content_rendering: ProtoRendering, + http_headers: typing.Iterable[tuple[str, str]] = (), + http_request: djhttp.HttpRequest | None = None, +) -> djhttp.HttpResponse: + _response_type = ( + djhttp.StreamingHttpResponse + if isinstance(content_rendering, StreamableRendering) + else djhttp.HttpResponse + ) + _response = _response_type( + content_rendering.iter_content(), + content_type=content_rendering.mediatype, + ) + if http_request is not None: + _requested_filename = http_request.GET.get('withFileName') + if _requested_filename is not None: + _file_name = _get_file_name(_requested_filename, content_rendering.mediatype) + _response.headers['Content-Disposition'] = _disposition(_file_name) + return _response + + +def make_http_error_response( + *, + error: TroveError, + renderer_type: type[BaseRenderer], + http_headers: typing.Iterable[tuple[str, str]] = () +) -> djhttp.HttpResponse: + _content_rendering = renderer_type.render_error_document(error) + return djhttp.HttpResponse( + _content_rendering.iter_content(), + status=error.http_status, + content_type=_content_rendering.mediatype, + ) + + +def _sanitize_file_name(requested_name: str): + _underscored = re.sub(r'["\'/:\\;\s]', '_', requested_name) + _datestamp = datetime.date.today().isoformat() + return f'{_datestamp}_{_underscored}' if _underscored else _datestamp + + +def _get_file_name(requested_name: str, mediatype: str): + _file_name = _sanitize_file_name(requested_name) + _dot_extension = mediatypes.dot_extension(mediatype) + if _file_name.endswith(_dot_extension): + return _file_name + return f'{_file_name}{_dot_extension}' + + +def _disposition(filename: str): + return b'; '.join(( + b'attachment', + b'filename=' + filename.encode('latin-1', errors='replace'), + b"filename*=utf-8''" + filename.encode(), + )) diff --git a/trove/views/browse.py b/trove/views/browse.py index 41049e303..ab1e488fe 100644 --- a/trove/views/browse.py +++ b/trove/views/browse.py @@ -4,10 +4,11 @@ from primitive_metadata import primitive_rdf from trove import models as trove_db -from trove.render import get_renderer +from trove.render import get_renderer_type from trove.util.iris import unquote_iri, get_sufficiently_unique_iri from trove.vocab import namespaces as ns from trove.vocab import static_vocab +from ._responder import make_http_response class BrowseIriView(View): @@ -24,12 +25,15 @@ def get(self, request, **kwargs): _thesaurus_entry = static_vocab.combined_thesaurus__suffuniq().get(_suffuniq_iri, {}) if _thesaurus_entry: _combined_rdf.add_twopledict(_card_focus_iri, _thesaurus_entry) - return get_renderer(request).render_response( - _combined_rdf.tripledict, + _renderer_type = get_renderer_type(request) + _renderer = _renderer_type( _card_focus_iri, - headers={ - 'Content-Disposition': 'inline', - }, + _combined_rdf.tripledict, + ) + return make_http_response( + content_rendering=_renderer.render_document(), + http_headers=[('Content-Disposition', 'inline')], + http_request=request, ) diff --git a/trove/views/indexcard.py b/trove/views/indexcard.py index 88536122b..a685428d8 100644 --- a/trove/views/indexcard.py +++ b/trove/views/indexcard.py @@ -2,28 +2,46 @@ from primitive_metadata import gather from trove import exceptions as trove_exceptions -from trove.render import get_renderer +from trove.render import ( + DEFAULT_RENDERER_TYPE, + get_renderer_type, +) from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy from trove.vocab.namespaces import TROVE from trove.vocab.trove import trove_indexcard_iri +from ._responder import ( + make_http_error_response, + make_http_response, +) class IndexcardView(View): def get(self, request, indexcard_uuid): - _renderer = get_renderer(request) try: + _renderer_type = get_renderer_type(request) _search_gathering = trovesearch_by_indexstrategy.new_gathering({ # TODO (gather): allow omitting kwargs that go unused 'search_params': None, 'specific_index': None, - 'deriver_iri': _renderer.INDEXCARD_DERIVER_IRI, + 'deriver_iri': _renderer_type.INDEXCARD_DERIVER_IRI, }) _indexcard_iri = trove_indexcard_iri(indexcard_uuid) _search_gathering.ask( {}, # TODO: build from `include`/`fields` focus=gather.Focus.new(_indexcard_iri, TROVE.Indexcard), ) - _response_tripledict = _search_gathering.leaf_a_record() - return _renderer.render_response(_response_tripledict, _indexcard_iri) + _renderer = _renderer_type(_indexcard_iri, _search_gathering.leaf_a_record()) + return make_http_response( + content_rendering=_renderer.render_document(), + http_request=request, + ) + except trove_exceptions.CannotRenderMediatype as _error: + return make_http_error_response( + error=_error, + renderer=DEFAULT_RENDERER_TYPE(_indexcard_iri), + ) except trove_exceptions.TroveError as _error: - return _renderer.render_error_response(_error) + return make_http_error_response( + error=_error, + renderer=_renderer_type(_indexcard_iri), + ) diff --git a/trove/views/search.py b/trove/views/search.py index 4173fd5e7..573133566 100644 --- a/trove/views/search.py +++ b/trove/views/search.py @@ -1,4 +1,6 @@ +import abc import logging +from typing import Callable from django import http from django.views import View @@ -6,84 +8,112 @@ from share.search import index_strategy from trove import exceptions as trove_exceptions +from trove.trovesearch.search_handle import BasicSearchHandle from trove.trovesearch.search_params import ( + BaseTroveParams, CardsearchParams, ValuesearchParams, ) -from trove.trovesearch.trovesearch_gathering import trovesearch_by_indexstrategy -from trove.vocab.namespaces import TROVE -from trove.render import get_renderer +from trove.trovesearch.trovesearch_gathering import ( + trovesearch_by_indexstrategy, + CardsearchFocus, + ValuesearchFocus, +) +from trove.render import ( + DEFAULT_RENDERER_TYPE, + get_renderer_type, +) +from ._responder import ( + make_http_error_response, + make_http_response, +) logger = logging.getLogger(__name__) -DEFAULT_CARDSEARCH_ASK = { - TROVE.totalResultCount: None, - TROVE.cardSearchText: None, - TROVE.cardSearchFilter: None, - TROVE.searchResultPage: { - TROVE.indexCard: { - TROVE.resourceMetadata, - }, - }, -} - -DEFAULT_VALUESEARCH_ASK = { - TROVE.propertyPath: None, - TROVE.valueSearchText: None, - TROVE.valueSearchFilter: None, - TROVE.cardSearchText: None, - TROVE.cardSearchFilter: None, - TROVE.searchResultPage: { - TROVE.indexCard: { - TROVE.resourceMetadata, - }, - }, -} - - -class CardsearchView(View): - def get(self, request): - _renderer = get_renderer(request) - try: - _search_iri, _search_gathering = _parse_request(request, _renderer, CardsearchParams) - _search_gathering.ask( - DEFAULT_CARDSEARCH_ASK, # TODO: build from `include`/`fields` - focus=gather.Focus.new(_search_iri, TROVE.Cardsearch), - ) - return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) - except trove_exceptions.TroveError as _error: - return _renderer.render_error_response(_error) +_TrovesearchHandler = Callable[[BaseTroveParams], BasicSearchHandle] + +class _BaseTrovesearchView(View, abc.ABC): + # expected on inheritors + focus_type: type[gather.Focus] + params_dataclass: type[CardsearchParams] -class ValuesearchView(View): def get(self, request): - _renderer = get_renderer(request) try: - _search_iri, _search_gathering = _parse_request(request, _renderer, ValuesearchParams) - _search_gathering.ask( - DEFAULT_VALUESEARCH_ASK, # TODO: build from `include`/`fields` - focus=gather.Focus.new(_search_iri, TROVE.Valuesearch), + _renderer_type = get_renderer_type(request) + except trove_exceptions.CannotRenderMediatype as _error: + return make_http_error_response( + error=_error, + renderer_type=DEFAULT_RENDERER_TYPE, + ) + try: + _url = request.build_absolute_uri() + _search_gathering = self._start_gathering(renderer_type=_renderer_type) + _search_params = self._parse_search_params(request) + _specific_index = index_strategy.get_index_for_trovesearch(_search_params) + _focus = self.focus_type.new( + iris=_url, + search_params=_search_params, + search_handle=self.get_search_handle(_specific_index, _search_params), + ) + if _renderer_type.PASSIVE_RENDER: + # fill the gathering's cache with requested info + _search_gathering.ask(_search_params.include, focus=_focus) + # take gathered data into a response + _renderer = _renderer_type(_focus, _search_gathering) + return make_http_response( + content_rendering=_renderer.render_document(), + http_request=request, ) - return _renderer.render_response(_search_gathering.leaf_a_record(), _search_iri) except trove_exceptions.TroveError as _error: - return _renderer.render_error_response(_error) - - -### -# local helpers - -def _parse_request(request: http.HttpRequest, renderer, search_params_dataclass): - _search_iri = request.build_absolute_uri() - _search_params = search_params_dataclass.from_querystring( - request.META['QUERY_STRING'], - ) - _specific_index = index_strategy.get_index_for_trovesearch(_search_params) - # TODO: 404 for unknown strategy - _search_gathering = trovesearch_by_indexstrategy.new_gathering({ - 'search_params': _search_params, - 'specific_index': _specific_index, - 'deriver_iri': renderer.INDEXCARD_DERIVER_IRI, - }) - return (_search_iri, _search_gathering) + return make_http_error_response( + error=_error, + renderer_type=_renderer_type, + ) + + def _parse_search_params(self, request: http.HttpRequest) -> CardsearchParams: + return self.params_dataclass.from_querystring( + request.META['QUERY_STRING'], + ) + + def _start_gathering(self, renderer_type) -> gather.Gathering: + # TODO: 404 for unknown strategy + return trovesearch_by_indexstrategy.new_gathering({ + 'deriver_iri': renderer_type.INDEXCARD_DERIVER_IRI, + }) + + def get_search_handle(self, specific_index, search_params) -> BasicSearchHandle: + return self._get_wrapped_handler(specific_index)(search_params) + + def get_search_handler( + self, + specific_index: index_strategy.IndexStrategy.SpecificIndex, + ) -> _TrovesearchHandler: + raise NotImplementedError + + def _get_wrapped_handler(self, specific_index): + _raw_handler = self.get_search_handler(specific_index) + + def _wrapped_handler(search_params): + _handle = _raw_handler(search_params) + _handle.handler = _wrapped_handler + return _handle + return _wrapped_handler + + +class CardsearchView(_BaseTrovesearchView): + focus_type = CardsearchFocus + params_dataclass = CardsearchParams + + def get_search_handler(self, specific_index): + return specific_index.pls_handle_cardsearch + + +class ValuesearchView(_BaseTrovesearchView): + focus_type = ValuesearchFocus + params_dataclass = ValuesearchParams + + def get_search_handler(self, specific_index): + return specific_index.pls_handle_valuesearch diff --git a/trove/views/vocab.py b/trove/views/vocab.py index a5e01071f..dcab1c373 100644 --- a/trove/views/vocab.py +++ b/trove/views/vocab.py @@ -1,9 +1,17 @@ from django import http from django.views import View -from trove.render import get_renderer +from trove import exceptions as trove_exceptions +from trove.render import ( + DEFAULT_RENDERER_TYPE, + get_renderer_type, +) from trove.vocab.namespaces import TROVE from trove.vocab.trove import TROVE_API_THESAURUS +from ._responder import ( + make_http_error_response, + make_http_response, +) class TroveVocabView(View): @@ -13,4 +21,20 @@ def get(self, request, vocab_term): _data = {_iri: TROVE_API_THESAURUS[_iri]} except KeyError: raise http.Http404 - return get_renderer(request).render_response(_data, _iri) + try: + _renderer_type = get_renderer_type(request) + _renderer = _renderer_type(_iri, _data) + return make_http_response( + content_rendering=_renderer.render_document(), + http_request=request, + ) + except trove_exceptions.CannotRenderMediatype as _error: + return make_http_error_response( + error=_error, + renderer=DEFAULT_RENDERER_TYPE(_iri), + ) + except trove_exceptions.TroveError as _error: + return make_http_error_response( + error=_error, + renderer=_renderer_type(_iri), + ) diff --git a/trove/vocab/mediatypes.py b/trove/vocab/mediatypes.py index d806f5944..66495683a 100644 --- a/trove/vocab/mediatypes.py +++ b/trove/vocab/mediatypes.py @@ -3,3 +3,23 @@ JSONLD = 'application/ld+json' TURTLE = 'text/turtle' HTML = 'text/html' +TSV = 'text/tab-separated-values' +CSV = 'text/csv' + + +_file_extensions = { + JSON: '.json', + JSONAPI: '.json', + JSONLD: '.json', + TURTLE: '.turtle', + HTML: '.html', + TSV: '.tsv', + CSV: '.csv', +} + + +def dot_extension(mediatype: str) -> str: + try: + return _file_extensions[mediatype] + except KeyError: + raise ValueError(f'unrecognized mediatype: {mediatype}') diff --git a/trove/vocab/osfmap.py b/trove/vocab/osfmap.py index 6fb332af0..b85266d3e 100644 --- a/trove/vocab/osfmap.py +++ b/trove/vocab/osfmap.py @@ -898,6 +898,31 @@ def osfmap_shorthand() -> IriShorthand: )) +TITLE_PROPERTIES = (DCTERMS.title,) +NAME_PROPERTIES = (FOAF.name, OSFMAP.fileName) +LABEL_PROPERTIES = (RDFS.label, SKOS.prefLabel, SKOS.altLabel) +NAMELIKE_PROPERTIES = (*TITLE_PROPERTIES, *NAME_PROPERTIES, *LABEL_PROPERTIES) +SKIPPABLE_PROPERTIES = (OSFMAP.contains, OWL.sameAs) + +DEFAULT_TABULAR_SEARCH_COLUMN_PATHS: tuple[tuple[str, ...], ...] = ( + (DCTERMS.title,), + (OWL.sameAs,), # includes DOI + (DCTERMS.created,), + (DCTERMS.modified,), + (RDF.type,), + (OSFMAP.storageRegion, SKOS.prefLabel), + (OSFMAP.storageByteCount,), + (DCTERMS.creator,), + (DCTERMS.rights,), + (DCTERMS.publisher,), + (OSFMAP.affiliation,), + (OSFMAP.funder,), + (DCTERMS.conformsTo, DCTERMS.title), + (OSFMAP.usage, OSFMAP.viewCount), + (OSFMAP.usage, OSFMAP.downloadCount), +) + + def suggested_property_paths(type_iris: set[str]) -> tuple[tuple[str, ...], ...]: _suggested: tuple[tuple[str, ...], ...] if not type_iris or not type_iris.issubset(OSFMAP_NORMS.focustype_iris): diff --git a/trove/vocab/trove.py b/trove/vocab/trove.py index 0e8bd9da2..c8c2f377f 100644 --- a/trove/vocab/trove.py +++ b/trove/vocab/trove.py @@ -142,6 +142,7 @@ def trove_browse_link(iri: str): TROVE.iriPath: {literal('/trove/index-card-search')}, TROVE.hasParameter: { TROVE.acceptMediatype, + TROVE.withFileName, TROVE.cardSearchText, TROVE.cardSearchFilter, TROVE.pageSize, @@ -228,6 +229,7 @@ def trove_browse_link(iri: str): TROVE.iriPath: {literal('/trove/index-value-search')}, TROVE.hasParameter: { TROVE.acceptMediatype, + TROVE.withFileName, TROVE.valueSearchPropertyPath, TROVE.cardSearchText, TROVE.cardSearchFilter, @@ -324,6 +326,7 @@ def trove_browse_link(iri: str): TROVE.iriPath: {literal('/trove/index-card/{indexCardId}')}, TROVE.hasParameter: { TROVE.acceptMediatype, + TROVE.withFileName, TROVE.indexCardId, }, TROVE.usesConcept: {TROVE.Indexcard}, @@ -507,6 +510,19 @@ def trove_browse_link(iri: str): * `application/ld+json`: rdf as [json-ld](https://www.w3.org/TR/json-ld11/) `acceptMediatype` will override the `Accept` header, if present. +''', language='en')}, + }, + TROVE.withFileName: { + RDF.type: {RDF.Property, TROVE.QueryParameter}, + JSONAPI_MEMBERNAME: {literal('withFileName', language='en')}, + RDFS.label: {literal('withFileName', language='en')}, + RDFS.comment: {literal('request the response be treated as a file download (with the given file name)', language='en')}, + TROVE.jsonSchema: {literal_json({'type': 'string'})}, + DCTERMS.description: {_literal_markdown('''**withFileName** is +a query parameter that indicates the response should be downloaded by a browser + +the response will have the http header `Content-Disposition: attachment` +with a filename based on the query param value, current date, and response content mediatype ''', language='en')}, }, TROVE.cardSearchText: { @@ -756,16 +772,16 @@ def trove_browse_link(iri: str): RDF.type: {RDF.Property, OWL.FunctionalProperty, JSONAPI_ATTRIBUTE}, JSONAPI_MEMBERNAME: {literal('suggestedFilterOperator', language='en')}, }, + TROVE.evidenceCardIdentifier: { + RDF.type: {RDF.Property, OWL.FunctionalProperty, JSONAPI_ATTRIBUTE}, + JSONAPI_MEMBERNAME: {literal('evidenceCardIdentifier', language='en')}, + }, # relationships: TROVE.searchResultPage: { RDF.type: {RDF.Property, JSONAPI_RELATIONSHIP}, JSONAPI_MEMBERNAME: {literal('searchResultPage', language='en')}, }, - TROVE.evidenceCardIdentifier: { - RDF.type: {RDF.Property, OWL.FunctionalProperty, JSONAPI_RELATIONSHIP}, - JSONAPI_MEMBERNAME: {literal('evidenceCardIdentifier', language='en')}, - }, TROVE.relatedPropertyList: { RDF.type: {RDF.Property, JSONAPI_RELATIONSHIP}, JSONAPI_MEMBERNAME: {literal('relatedProperties', language='en')}, From 47cea84c1d434ce77be556de24b8960728dc8725 Mon Sep 17 00:00:00 2001 From: Matt Frazier Date: Fri, 27 Dec 2024 11:45:19 -0500 Subject: [PATCH 2/2] Prepare release 24.7.0 --- CHANGELOG.md | 5 +++++ share/version.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f5fbc436f..9cfe20f77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Change Log + +# [24.7.0] - 2024-12-27 +- allow rendering search responses as downloadable CSVs/TSVs +- add, reshape renderer output types + # [24.6.2] - 2024-12-05 - more stable indexer daemon - `trovesearch_denorm` indexing tweaks: diff --git a/share/version.py b/share/version.py index 12dec9a54..b7d78b402 100644 --- a/share/version.py +++ b/share/version.py @@ -1 +1 @@ -__version__ = '24.6.2' +__version__ = '24.7.0'