From 2f2d5231bd8a37d864652c813740a7702393c65c Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 12 Nov 2024 12:51:20 -0500 Subject: [PATCH 1/5] fix: related property error --- .../index_strategy/trovesearch_denorm.py | 7 +++---- .../_common_trovesearch_tests.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index 35320b5c1..0e3453d11 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -487,13 +487,12 @@ def _cardsearch_response( PropertypathUsage(property_path=_path, usage_count=0) for _path in cardsearch_params.related_property_paths ) - _relatedproperty_by_path = { - _result.property_path: _result + _relatedproperty_by_pathkey = { + ts.propertypath_as_keyword(_result.property_path): _result for _result in _relatedproperty_list } for _bucket in es8_response['aggregations']['agg_related_propertypath_usage']['buckets']: - _path = tuple(json.loads(_bucket['key'])) - _relatedproperty_by_path[_path].usage_count += _bucket['doc_count'] + _relatedproperty_by_pathkey[_bucket['key']].usage_count += _bucket['doc_count'] return CardsearchResponse( cursor=cursor, search_result_page=_results, diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index f3eff4813..e6edab0ef 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -1,4 +1,5 @@ from typing import Iterable, Iterator +import dataclasses from datetime import date, timedelta import itertools import math @@ -10,6 +11,7 @@ from share.search import messages from trove import models as trove_db from trove.trovesearch.search_params import CardsearchParams, ValuesearchParams +from trove.trovesearch.search_response import PropertypathUsage from trove.vocab.namespaces import RDFS, TROVE, RDF, DCTERMS, OWL, FOAF, DCAT from ._with_real_services import RealElasticTestCase @@ -112,6 +114,23 @@ def test_cardsearch_pagination(self): self.assertEqual(_page_count, math.ceil(_total_count / _page_size)) self.assertEqual(_result_iris, _expected_iris) + def test_cardsearch_related_properties(self): + self._fill_test_data_for_querying() + _cardsearch_params = dataclasses.replace( + CardsearchParams.from_querystring(''), + related_property_paths=( + (DCTERMS.creator,), + (DCTERMS.references,), + (BLARG.nada,), + ), + ) + _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) + self.assertEqual(_cardsearch_response.related_propertypath_results, [ + PropertypathUsage((DCTERMS.creator,), 3), + PropertypathUsage((DCTERMS.references,), 2), + PropertypathUsage((BLARG.nada,), 0), + ]) + def test_valuesearch(self): self._fill_test_data_for_querying() _valuesearch_cases = itertools.chain( From fd6466dbd28c637b1c0e092304cce21b390dd84a Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 12 Nov 2024 12:50:09 -0500 Subject: [PATCH 2/5] add Elastic8IndexStrategy.after_chunk --- share/search/index_strategy/elastic8.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/share/search/index_strategy/elastic8.py b/share/search/index_strategy/elastic8.py index 7e772e41f..052eadfdb 100644 --- a/share/search/index_strategy/elastic8.py +++ b/share/search/index_strategy/elastic8.py @@ -69,7 +69,14 @@ def before_chunk( messages_chunk: messages.MessagesChunk, indexnames: typing.Iterable[str], ) -> None: - pass # implement when needed + ... # implement when needed + + def after_chunk( + self, + messages_chunk: messages.MessagesChunk, + indexnames: typing.Iterable[str], + ) -> None: + ... # implement when needed ### # helper methods for subclasses to use (or override) @@ -154,6 +161,7 @@ def pls_handle_messages_chunk(self, messages_chunk): status_code=_status, error_text=str(_response_body), ) + self.after_chunk(messages_chunk, _indexnames) # yield successes after the whole chunk completes # (since one message may involve several actions) for _messageid in _action_tracker.all_done_messages(): From 9c88139f0db28876fe450db17dd08603fdc29a26 Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Tue, 12 Nov 2024 12:51:01 -0500 Subject: [PATCH 3/5] index chunk_timestamp, use for more precise deletion --- .../index_strategy/trovesearch_denorm.py | 17 +- share/search/messages.py | 6 + .../_common_trovesearch_tests.py | 233 ++++++++++++++---- trove/models/indexcard.py | 9 +- 4 files changed, 200 insertions(+), 65 deletions(-) diff --git a/share/search/index_strategy/trovesearch_denorm.py b/share/search/index_strategy/trovesearch_denorm.py index 0e3453d11..4b98e8ee7 100644 --- a/share/search/index_strategy/trovesearch_denorm.py +++ b/share/search/index_strategy/trovesearch_denorm.py @@ -55,7 +55,7 @@ class TrovesearchDenormIndexStrategy(Elastic8IndexStrategy): CURRENT_STRATEGY_CHECKSUM = ChecksumIri( checksumalgorithm_name='sha-256', salt='TrovesearchDenormIndexStrategy', - hexdigest='fa8fe6459f658877f84620412dcab5e2e70d0c949d8977354c586dca99ff2f28', + hexdigest='e538bbc5966a6a289da9e5ba51ecde5ff29528bf07e940716ef8a888d6601916', ) # abstract method from IndexStrategy @@ -83,6 +83,7 @@ def index_mappings(self): 'properties': { 'card': {'properties': self._card_mappings()}, 'iri_value': {'properties': self._iri_value_mappings()}, + 'chunk_timestamp': {'type': 'unsigned_long'}, }, } @@ -149,24 +150,24 @@ def _paths_and_values_mappings(self): } # override method from Elastic8IndexStrategy - def before_chunk(self, messages_chunk: messages.MessagesChunk, indexnames: Iterable[str]): - # delete all per-value docs (to account for missing values) + def after_chunk(self, messages_chunk: messages.MessagesChunk, indexnames: Iterable[str]): + # refresh to avoid delete-by-query conflicts + self.es8_client.indices.refresh(index=','.join(indexnames)) + # delete any docs that belong to cards in this chunk but weren't touched by indexing self.es8_client.delete_by_query( index=list(indexnames), query={'bool': {'must': [ {'terms': {'card.card_pk': messages_chunk.target_ids_chunk}}, - {'exists': {'field': 'iri_value.value_iri'}}, + {'range': {'chunk_timestamp': {'lt': messages_chunk.timestamp}}}, ]}}, ) - # (possible optimization: instead, hold onto doc_ids and (in `after_chunk`?) - # delete_by_query excluding those) # abstract method from Elastic8IndexStrategy def build_elastic_actions(self, messages_chunk: messages.MessagesChunk): _indexcard_rdf_qs = ts.latest_rdf_for_indexcard_pks(messages_chunk.target_ids_chunk) _remaining_indexcard_pks = set(messages_chunk.target_ids_chunk) for _indexcard_rdf in _indexcard_rdf_qs: - _docbuilder = self._SourcedocBuilder(_indexcard_rdf) + _docbuilder = self._SourcedocBuilder(_indexcard_rdf, messages_chunk.timestamp) if not _docbuilder.should_skip(): # if skipped, will be deleted _indexcard_pk = _indexcard_rdf.indexcard_id for _doc_id, _doc in _docbuilder.build_docs(): @@ -254,6 +255,7 @@ class _SourcedocBuilder: '''build elasticsearch sourcedocs for an rdf document ''' indexcard_rdf: trove_db.IndexcardRdf + chunk_timestamp: int indexcard: trove_db.Indexcard = dataclasses.field(init=False) focus_iri: str = dataclasses.field(init=False) rdfdoc: rdf.RdfTripleDictionary = dataclasses.field(init=False) @@ -279,6 +281,7 @@ def build_docs(self) -> Iterator[tuple[str, dict]]: yield self._doc_id(_iri), { 'card': self._card_subdoc, 'iri_value': self._iri_value_subdoc(_iri), + 'chunk_timestamp': self.chunk_timestamp, } def _doc_id(self, value_iri=None) -> str: diff --git a/share/search/messages.py b/share/search/messages.py index a3930b42c..5ba2e466a 100644 --- a/share/search/messages.py +++ b/share/search/messages.py @@ -1,7 +1,9 @@ import abc import dataclasses import enum +import functools import logging +import time import typing from share.search import exceptions @@ -83,6 +85,10 @@ def as_tuples(self): target_id=target_id, ) + @functools.cached_property # cached so it's constant (and unique-enough) for an instance + def timestamp(self) -> int: + return time.time_ns() + @classmethod def stream_chunks( cls, diff --git a/tests/share/search/index_strategy/_common_trovesearch_tests.py b/tests/share/search/index_strategy/_common_trovesearch_tests.py index e6edab0ef..db265322b 100644 --- a/tests/share/search/index_strategy/_common_trovesearch_tests.py +++ b/tests/share/search/index_strategy/_common_trovesearch_tests.py @@ -60,19 +60,54 @@ def test_cardsearch(self): self.cardsearch_cases(), self.cardsearch_integer_cases(), ) - for _queryparams, _expected_result_iris in _cardsearch_cases: - _cardsearch_params = CardsearchParams.from_querystring(urlencode(_queryparams)) - assert isinstance(_cardsearch_params, CardsearchParams) - _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) - # assumes all results fit on one page - _actual_result_iris: set[str] | list[str] = [ - self._indexcard_focus_by_uuid[_result.card_uuid] - for _result in _cardsearch_response.search_result_page - ] - # test sort order only when expected results are ordered - if isinstance(_expected_result_iris, set): - _actual_result_iris = set(_actual_result_iris) - self.assertEqual(_expected_result_iris, _actual_result_iris, msg=f'?{_queryparams}') + for _queryparams, _expected_focus_iris in _cardsearch_cases: + self._assert_cardsearch_iris(_queryparams, _expected_focus_iris) + + def test_cardsearch_after_deletion(self): + _cards = self._fill_test_data_for_querying() + _deleted_focus_iris = {BLARG.b} + self._delete_indexcards([_cards[_focus_iri] for _focus_iri in _deleted_focus_iris]) + _cardsearch_cases = itertools.chain( + self.cardsearch_cases(), + self.cardsearch_integer_cases(), + ) + for _queryparams, _expected_focus_iris in _cardsearch_cases: + if isinstance(_expected_focus_iris, set): + _expected_focus_iris -= _deleted_focus_iris + else: + _expected_focus_iris = [ + _iri + for _iri in _expected_focus_iris + if _iri not in _deleted_focus_iris + ] + self._assert_cardsearch_iris(_queryparams, _expected_focus_iris) + + def test_cardsearch_after_updates(self): + _cards = self._fill_test_data_for_querying() + self._update_indexcard_content(_cards[BLARG.c], BLARG.c, { + BLARG.c: { + RDF.type: {BLARG.Thing}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c}, # subj_bc removed; subj_c added + DCTERMS.title: {rdf.literal('cccc')}, + }, + }) + self._index_indexcards([_cards[BLARG.c]]) + _cases = [ + ( + {'cardSearchFilter[subject]': BLARG.subj_bc}, + {BLARG.b}, + ), + ( + {'cardSearchFilter[subject]': BLARG.subj_ac}, + {BLARG.c, BLARG.a}, + ), + ( + {'cardSearchFilter[subject]': BLARG.subj_c}, + {BLARG.c}, + ), + ] + for _queryparams, _expected_focus_iris in _cases: + self._assert_cardsearch_iris(_queryparams, _expected_focus_iris) def test_cardsearch_pagination(self): _cards: list[trove_db.Indexcard] = [] @@ -81,10 +116,10 @@ def test_cardsearch_pagination(self): _total_count = 55 _start_date = date(1999, 12, 31) for _i in range(_total_count): - _card_iri = BLARG[f'i{_i}'] - _expected_iris.add(_card_iri) - _cards.append(self._create_indexcard(_card_iri, { - _card_iri: { + _focus_iri = BLARG[f'i{_i}'] + _expected_iris.add(_focus_iri) + _cards.append(self._create_indexcard(_focus_iri, { + _focus_iri: { RDF.type: {BLARG.Thing}, DCTERMS.title: {rdf.literal(f'card #{_i}')}, DCTERMS.created: {rdf.literal(_start_date + timedelta(weeks=_i, days=_i))}, @@ -133,20 +168,80 @@ def test_cardsearch_related_properties(self): def test_valuesearch(self): self._fill_test_data_for_querying() - _valuesearch_cases = itertools.chain( - self.valuesearch_simple_cases(), - self.valuesearch_complex_cases(), - ) - for _queryparams, _expected_values in _valuesearch_cases: - _valuesearch_params = ValuesearchParams.from_querystring(urlencode(_queryparams)) - assert isinstance(_valuesearch_params, ValuesearchParams) - _valuesearch_response = self.current_index.pls_handle_valuesearch(_valuesearch_params) - # assumes all results fit on one page - _actual_values = { - _result.value_iri or _result.value_value - for _result in _valuesearch_response.search_result_page - } - self.assertEqual(_expected_values, _actual_values) + for _queryparams, _expected_values in self.valuesearch_cases(): + self._assert_valuesearch_values(_queryparams, _expected_values) + + def test_valuesearch_after_deletion(self): + _cards = self._fill_test_data_for_querying() + _deleted_focus_iris = {BLARG.b} + self._delete_indexcards([_cards[_focus_iri] for _focus_iri in _deleted_focus_iris]) + _cases = [ + ( + {'valueSearchPropertyPath': 'subject'}, + {BLARG.subj_a, BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_c}, # BLARG.subj_b no longer present + ), ( + {'valueSearchPropertyPath': 'dateCreated'}, + {'1999', '2024'}, # 2012 no longer present + ), ( + {'valueSearchPropertyPath': 'subject', 'cardSearchText': 'bbbb'}, + set(), # none + ) + ] + for _queryparams, _expected_values in _cases: + self._assert_valuesearch_values(_queryparams, _expected_values) + + def test_valuesearch_after_updates(self): + _cards = self._fill_test_data_for_querying() + self._update_indexcard_content(_cards[BLARG.c], BLARG.c, { + BLARG.c: { + RDF.type: {BLARG.Thing}, + DCTERMS.creator: {BLARG.someone_new}, # someone_else removed; someone_new added + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_c, BLARG.subj_new}, # subj_bc removed; subj_new added + DCTERMS.title: {rdf.literal('cccc')}, + }, + }) + self._index_indexcards([_cards[BLARG.c]]) + _cases = [ + ( + {'valueSearchPropertyPath': 'subject'}, + {BLARG.subj_a, BLARG.subj_ac, BLARG.subj_b, BLARG.subj_bc, BLARG.subj_c, BLARG.subj_new}, # subj_c new + ), ( + {'valueSearchPropertyPath': 'subject', 'cardSearchFilter[creator]': BLARG.someone_new}, + {BLARG.subj_ac, BLARG.subj_c, BLARG.subj_new}, + ), ( + {'valueSearchPropertyPath': 'subject', 'cardSearchFilter[creator]': BLARG.someone_else}, + set(), # none + ) + ] + for _queryparams, _expected_values in _cases: + self._assert_valuesearch_values(_queryparams, _expected_values) + + def _assert_cardsearch_iris(self, queryparams: dict, expected_focus_iris: Iterable[str]): + _querystring = urlencode(queryparams) + _cardsearch_params = CardsearchParams.from_querystring(_querystring) + assert isinstance(_cardsearch_params, CardsearchParams) + _cardsearch_response = self.current_index.pls_handle_cardsearch(_cardsearch_params) + # assumes all results fit on one page + _actual_result_iris: set[str] | list[str] = [ + self._indexcard_focus_by_uuid[_result.card_uuid] + for _result in _cardsearch_response.search_result_page + ] + # test sort order only when expected results are ordered + if isinstance(expected_focus_iris, set): + _actual_result_iris = set(_actual_result_iris) + self.assertEqual(expected_focus_iris, _actual_result_iris, msg=f'?{_querystring}') + + def _assert_valuesearch_values(self, queryparams, expected_values): + _querystring = urlencode(queryparams) + _valuesearch_params = ValuesearchParams.from_querystring(_querystring) + assert isinstance(_valuesearch_params, ValuesearchParams) + _valuesearch_response = self.current_index.pls_handle_valuesearch(_valuesearch_params) + # assumes all results fit on one page + _actual_values = { + _result.value_iri or _result.value_value + for _result in _valuesearch_response.search_result_page + } + self.assertEqual(expected_values, _actual_values, msg=f'?{_querystring}') def _fill_test_data_for_querying(self): _card_a = self._create_indexcard(BLARG.a, { @@ -171,7 +266,7 @@ def _fill_test_data_for_querying(self): }, BLARG.c: { RDF.type: {BLARG.Thing}, - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_c}, DCTERMS.title: {rdf.literal('cccc')}, }, }) @@ -191,7 +286,7 @@ def _fill_test_data_for_querying(self): }, BLARG.c: { RDF.type: {BLARG.Thing}, - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_c}, DCTERMS.title: {rdf.literal('cccc')}, }, }) @@ -201,7 +296,7 @@ def _fill_test_data_for_querying(self): DCTERMS.created: {rdf.literal(date(2024, 12, 31))}, DCTERMS.creator: {BLARG.someone_else}, DCTERMS.title: {rdf.literal('cccc')}, - DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc}, + DCTERMS.subject: {BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_c}, DCTERMS.description: {rdf.literal('The danger is unleashed only if you substantially disturb this place physically. This place is best shunned and left uninhabited.', language='en')}, }, BLARG.someone_else: { @@ -232,7 +327,13 @@ def _fill_test_data_for_querying(self): }, }, }) - self._index_indexcards([_card_a, _card_b, _card_c]) + _cards = { + BLARG.a: _card_a, + BLARG.b: _card_b, + BLARG.c: _card_c, + } + self._index_indexcards(_cards.values()) + return _cards def cardsearch_cases(self) -> Iterator[tuple[dict[str, str], set[str] | list[str]]]: # using data from _fill_test_data_for_querying @@ -421,7 +522,7 @@ def cardsearch_integer_cases(self) -> Iterator[tuple[dict[str, str], set[str] | [BLARG.c, BLARG.a, BLARG.b], # ordered list ) - def valuesearch_simple_cases(self) -> Iterator[tuple[dict[str, str], set[str]]]: + def valuesearch_cases(self) -> Iterator[tuple[dict[str, str], set[str]]]: yield ( {'valueSearchPropertyPath': 'references'}, {BLARG.b, BLARG.c}, @@ -430,9 +531,22 @@ def valuesearch_simple_cases(self) -> Iterator[tuple[dict[str, str], set[str]]]: {'valueSearchPropertyPath': 'dateCreated'}, {'1999', '2012', '2024'}, ) - # TODO: more - - def valuesearch_complex_cases(self) -> Iterator[tuple[dict[str, str], set[str]]]: + yield ( + {'valueSearchPropertyPath': 'subject'}, + {BLARG.subj_a, BLARG.subj_ac, BLARG.subj_b, BLARG.subj_bc, BLARG.subj_c}, + ) + yield ( + {'valueSearchPropertyPath': 'subject', 'cardSearchFilter[creator]': BLARG.someone}, + {BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_a, BLARG.subj_b}, + ) + yield ( + {'valueSearchPropertyPath': 'subject', 'cardSearchFilter[creator]': BLARG.someone_else}, + {BLARG.subj_ac, BLARG.subj_bc, BLARG.subj_c}, + ) + yield ( + {'valueSearchPropertyPath': 'subject', 'cardSearchText': 'aaaa'}, + {BLARG.subj_ac, BLARG.subj_a}, + ) yield ( { 'valueSearchPropertyPath': 'references', @@ -460,29 +574,40 @@ def _index_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): )) self.current_index.pls_refresh() + def _delete_indexcards(self, indexcards: Iterable[trove_db.Indexcard]): + for _indexcard in indexcards: + _indexcard.pls_delete(notify_indexes=False) # notify by hand to know when done + self._index_indexcards(indexcards) + def _create_indexcard(self, focus_iri: str, rdf_tripledict: rdf.RdfTripleDictionary) -> trove_db.Indexcard: _suid = factories.SourceUniqueIdentifierFactory() - _raw = factories.RawDatumFactory( - suid=_suid, - ) - _indexcard = trove_db.Indexcard.objects.create( - source_record_suid=_suid, - ) + _indexcard = trove_db.Indexcard.objects.create(source_record_suid=_suid) + self._update_indexcard_content(_indexcard, focus_iri, rdf_tripledict) # an osfmap_json card is required for indexing, but not used in these tests - trove_db.DerivedIndexcard.objects.create( + trove_db.DerivedIndexcard.objects.get_or_create( upriver_indexcard=_indexcard, deriver_identifier=trove_db.ResourceIdentifier.objects.get_or_create_for_iri(TROVE['derive/osfmap_json']), ) - trove_db.LatestIndexcardRdf.objects.create( - from_raw_datum=_raw, - indexcard=_indexcard, - focus_iri=focus_iri, - rdf_as_turtle=rdf.turtle_from_tripledict(rdf_tripledict), - turtle_checksum_iri='foo', # not enforced - ) - self._indexcard_focus_by_uuid[str(_indexcard.uuid)] = focus_iri return _indexcard + def _update_indexcard_content( + self, + indexcard: trove_db.Indexcard, + focus_iri: str, + rdf_tripledict: rdf.RdfTripleDictionary, + ) -> None: + _raw = factories.RawDatumFactory(suid=indexcard.source_record_suid) + trove_db.LatestIndexcardRdf.objects.update_or_create( + indexcard=indexcard, + defaults={ + 'from_raw_datum': _raw, + 'focus_iri': focus_iri, + 'rdf_as_turtle': rdf.turtle_from_tripledict(rdf_tripledict), + 'turtle_checksum_iri': 'foo', # not enforced + }, + ) + self._indexcard_focus_by_uuid[str(indexcard.uuid)] = focus_iri + def _create_supplement( self, indexcard: trove_db.Indexcard, diff --git a/trove/models/indexcard.py b/trove/models/indexcard.py index 21005d1e9..0616a2f44 100644 --- a/trove/models/indexcard.py +++ b/trove/models/indexcard.py @@ -205,7 +205,7 @@ def supplementary_rdf_set(self): def get_iri(self): return trove_indexcard_iri(self.uuid) - def pls_delete(self): + def pls_delete(self, *, notify_indexes=True): # do not actually delete Indexcard, just mark deleted: if self.deleted is None: self.deleted = timezone.now() @@ -220,9 +220,10 @@ def pls_delete(self): .filter(upriver_indexcard=self) .delete() ) - # TODO: rearrange to avoid local import - from share.search.index_messenger import IndexMessenger - IndexMessenger().notify_indexcard_update([self]) + if notify_indexes: + # TODO: rearrange to avoid local import + from share.search.index_messenger import IndexMessenger + IndexMessenger().notify_indexcard_update([self]) def __repr__(self): return f'<{self.__class__.__qualname__}({self.uuid}, {self.source_record_suid})' From b7a5ae923dcdd0bf77829d7db5e0b828fcc8305e Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 13 Nov 2024 14:04:44 -0500 Subject: [PATCH 4/5] ensure index strategy currency --- share/search/index_strategy/_base.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/share/search/index_strategy/_base.py b/share/search/index_strategy/_base.py index 1f21aefd5..92ce83c8f 100644 --- a/share/search/index_strategy/_base.py +++ b/share/search/index_strategy/_base.py @@ -1,4 +1,5 @@ import abc +import functools import logging import typing @@ -63,8 +64,9 @@ def indexname_prefix(self): def indexname_wildcard(self): return f'{self.indexname_prefix}*' - @property + @functools.cached_property def current_indexname(self): + self.assert_strategy_is_current() return ''.join(( self.indexname_prefix, self.CURRENT_STRATEGY_CHECKSUM.hexdigest, From f5b21690c9391a19c7a4aaf6dbb0f29d805c067a Mon Sep 17 00:00:00 2001 From: abram axel booth Date: Wed, 13 Nov 2024 14:05:09 -0500 Subject: [PATCH 5/5] incidental: remove dead code --- share/tasks/__init__.py | 8 -------- trove/trovesearch/search_params.py | 4 +--- 2 files changed, 1 insertion(+), 11 deletions(-) diff --git a/share/tasks/__init__.py b/share/tasks/__init__.py index 4c4baecbe..fa93ccf76 100644 --- a/share/tasks/__init__.py +++ b/share/tasks/__init__.py @@ -79,14 +79,6 @@ def schedule_index_backfill(self, index_backfill_pk): .exclude(source_config__source__is_deleted=True) .values_list('id', flat=True) ) - elif _messagetype == MessageType.BACKFILL_IDENTIFIER: - _targetid_queryset = ( - trove_db.ResourceIdentifier.objects - .exclude(suid_set__source_config__disabled=True) - .exclude(suid_set__source_config__source__is_deleted=True) - .values_list('id', flat=True) - .distinct() - ) else: raise ValueError(f'unknown backfill messagetype {_messagetype}') _chunk_size = settings.ELASTICSEARCH['CHUNK_SIZE'] diff --git a/trove/trovesearch/search_params.py b/trove/trovesearch/search_params.py index 67469e80f..c4ecabbd1 100644 --- a/trove/trovesearch/search_params.py +++ b/trove/trovesearch/search_params.py @@ -468,10 +468,9 @@ class CardsearchParams(BaseTroveParams): cardsearch_textsegment_set: frozenset[Textsegment] cardsearch_filter_set: frozenset[SearchFilter] index_strategy_name: str | None - sort_list: tuple[SortParam] + sort_list: tuple[SortParam, ...] page_cursor: PageCursor related_property_paths: tuple[Propertypath, ...] - unnamed_iri_values: frozenset[str] @classmethod def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: @@ -485,7 +484,6 @@ def parse_queryparams(cls, queryparams: QueryparamDict) -> dict: 'page_cursor': _get_page_cursor(queryparams), 'include': None, # TODO 'related_property_paths': _get_related_property_paths(_filter_set), - 'unnamed_iri_values': frozenset(), # TODO: frozenset(_get_unnamed_iri_values(_filter_set)), } def to_querydict(self) -> QueryDict: