From 80d8d8421ba0559c439526e3cc3be279cb5d32b0 Mon Sep 17 00:00:00 2001 From: Alexei Peters Date: Tue, 21 Jan 2025 19:35:49 -0800 Subject: [PATCH] initial work on updated index to support related resource search and paging, re #8 --- afrc/src/afrc/Search/SearchPage.vue | 58 +++- .../Search/components/SearchResultItem.vue | 8 +- afrc/views/search_api.py | 262 +++++++++++------- 3 files changed, 222 insertions(+), 106 deletions(-) diff --git a/afrc/src/afrc/Search/SearchPage.vue b/afrc/src/afrc/Search/SearchPage.vue index cb74c6f..e3a94be 100644 --- a/afrc/src/afrc/Search/SearchPage.vue +++ b/afrc/src/afrc/Search/SearchPage.vue @@ -6,6 +6,8 @@ import { useGettext } from "vue3-gettext"; import Toast from "primevue/toast"; import { useToast } from "primevue/usetoast"; import Button from "primevue/button"; +import Paginator from "primevue/paginator"; + import { DEFAULT_ERROR_TOAST_LIFE, ERROR } from "@/afrc/Search/constants.ts"; import arches from "arches"; @@ -27,6 +29,7 @@ const overlays: Ref = ref([]); const sources: Ref = ref([]); const resultsSelected: Ref = ref([]); const dataLoaded = ref(false); +const newQuery = ref(false); const toast = useToast(); const { $gettext } = useGettext(); @@ -38,6 +41,7 @@ watch(queryString, () => { function updateFilter(componentName: string, value: object) { console.log(value); + newQuery.value = true; // Test for an empty object function isEmpty(value: unknown) { if (value === null || value === undefined) { @@ -79,24 +83,31 @@ function getQueryObject(uri: string | null): GenericObject { return obj; } -const doQuery = function () { +async function doQuery() { const queryObj = JSON.parse(queryString.value ?? "{}"); Object.keys(queryObj).forEach((key) => { queryObj[key] = JSON.stringify(queryObj[key]); }); + if (newQuery.value) { + const componentName = "paging-filter"; + delete queryObj[componentName]; + newQuery.value = false; + } + const qs = new URLSearchParams(queryObj); - fetch(arches.urls["api-search"] + "?" + qs.toString()) + return fetch(arches.urls["api-search"] + "?" + qs.toString()) .then((response) => response.json()) .then((data) => { + console.log(data.total_results); console.log(data); searchResults.value = data.results.hits.hits; resultsCount.value = data.total_results; resultsSelected.value = []; }); -}; +} async function fetchSystemMapData() { try { @@ -133,6 +144,20 @@ async function fetchSystemMapData() { } } +async function onPageChange(event: { + first: number; + rows: number; + page: number; + pageCount: number; +}) { + console.log("onPageChange"); + console.log(queryString.value); + const componentName = "paging-filter"; + query[componentName] = event.page + 1; + queryString.value = JSON.stringify(query); + console.log(queryString.value); +} + onMounted(async () => { doQuery(); await fetchSystemMapData(); @@ -174,11 +199,17 @@ onMounted(async () => {
{{ resultsCount }} Results
+ +
@@ -230,6 +261,7 @@ onMounted(async () => { :root { font-size: 16px; } + .afrc-container { font-family: Arial, sans-serif; background-color: #f8f8f8; @@ -238,50 +270,61 @@ onMounted(async () => { flex-direction: column; flex-grow: 1; } + main { display: flex; flex-direction: row; - height: 100vh; + flex-grow: 1; } + header { font-size: 2rem; display: flex; border-bottom: 1px #ccc solid; padding: 5px; } + .view-buttons { display: flex; gap: 5px; margin-left: 20px; } + section.afrc-search-results-panel { display: flex; flex-direction: column; flex-grow: 1; margin: 15px; overflow-y: auto; + height: calc(100vh - 150px); } + .search-result-list { display: flex; flex-direction: column; + flex-grow: 1; gap: 20px; } + .result-count { font-size: 1.6rem; margin: 0px; margin-bottom: 15px; } + aside { max-width: 25%; border-left: 1px #ccc solid; padding: 15px; } + .facets { padding: 16px; display: flex; flex-wrap: wrap; gap: 10px; } + .facet-item { font-size: 1rem; padding: 16px; @@ -291,6 +334,7 @@ aside { max-width: 15rem; min-height: 15rem; } + .facet-item.selected { background-color: #f0f8ff; border-color: #007bff; diff --git a/afrc/src/afrc/Search/components/SearchResultItem.vue b/afrc/src/afrc/Search/components/SearchResultItem.vue index fba9f7c..d530869 100644 --- a/afrc/src/afrc/Search/components/SearchResultItem.vue +++ b/afrc/src/afrc/Search/components/SearchResultItem.vue @@ -40,14 +40,14 @@ function clearResult() {
- {{ props.searchResult._source.displayname }} + {{ props.searchResult?._source.displayname }}
- {{ searchResult._source.displaydescription }} + {{ searchResult?._source.displaydescription }}
diff --git a/afrc/views/search_api.py b/afrc/views/search_api.py index 9d7a51e..78b8646 100644 --- a/afrc/views/search_api.py +++ b/afrc/views/search_api.py @@ -24,15 +24,18 @@ from django.utils.translation import get_language, gettext as _ from django.db.models import Q -from arches.app.models.models import ResourceXResource +from arches.app.models.models import ResourceXResource, ResourceInstance from arches.app.models.system_settings import settings from arches.app.search.components.base import SearchFilterFactory from arches.app.search.components.search_results import get_localized_descriptor -from arches.app.search.elasticsearch_dsl_builder import Query, Ids +from arches.app.search.elasticsearch_dsl_builder import Query, Ids, Bool, Match, Nested from arches.app.search.mappings import RESOURCES_INDEX from arches.app.search.search_engine_factory import SearchEngineFactory from arches.app.utils.response import JSONResponse, JSONErrorResponse +from arches.app.search.es_mapping_modifier import EsMappingModifier + + logger = logging.getLogger(__name__) @@ -76,7 +79,6 @@ def search_results(request, returnDsl=False): class SearchAPI(View): def get(self, request): - base_resource_type_filter = [ { "graphid": settings.COLLECTIONS_GRAPHID, @@ -84,7 +86,11 @@ def get(self, request): } ] - current_page = request.GET.get("paging-filter", 1) + # import ipdb + + # ipdb.sset_trace() + + current_page = int(request.GET.get("paging-filter", 1)) page_size = int(settings.SEARCH_ITEMS_PER_PAGE) print(page_size) @@ -93,9 +99,10 @@ def get(self, request): request.GET = request_copy direct_results = search_results(request) print(current_page * page_size) + # print(direct_results) print(direct_results["total_results"]) - if direct_results["total_results"] >= current_page * page_size: + if int(direct_results["total_results"]) >= current_page * page_size: print("we have direct hits on collections") return JSONResponse(content=search_results(request)) else: @@ -111,7 +118,11 @@ def get(self, request): request_copy["resource-type-filter"] = json.dumps(base_resource_type_filter) request_copy["paging-filter"] = 1 request.GET = request_copy + backfill_results = search_results(request) + # import ipdb + + # ipdb.sset_trace() # first page of hits of potentially related resources resourceinstanceids = [ @@ -123,7 +134,7 @@ def get(self, request): search_relationships_via_ORM( resourceinstanceids, target_graphid=settings.COLLECTIONS_GRAPHID, - depth=3, + depth=2, ) ) @@ -198,98 +209,157 @@ def get_related_resourceinstanceids(resourceinstanceids, depth=1): return get_related_resourceinstanceids(resourceinstanceids, depth=depth) -def search_relationships(resourceinstanceids=None, target_graphid=None): - with connection.cursor() as cursor: - sql = """ - WITH RECURSIVE resource_traversal_from(resourcexid, resourceid, graphid, depth) AS ( - -- Anchor member: start with the given list of starting resource IDs - SELECT - resource_x_resource.resourcexid, resourceinstanceidto AS resourceid, resourceinstanceto_graphid AS graphid, 0 AS depth - FROM - resource_x_resource - WHERE - resourceinstanceidfrom = ANY(%s::uuid[]) - - UNION ALL - - -- Recursive member: traverse the table bidirectionally - SELECT - resource_x_resource.resourcexid, resource_x_resource.resourceinstanceidto AS resourceid, resourceinstanceto_graphid AS graphid, rt.depth + 1 - FROM - resource_x_resource - INNER JOIN - resource_traversal_from rt - ON - resource_x_resource.resourceinstanceidfrom = rt.resourceid - WHERE - rt.graphid != %s::uuid - - ) CYCLE resourcexid SET is_cycle USING path - - SELECT DISTINCT resourceid - FROM resource_traversal_from - WHERE graphid = %s::uuid - AND DEPTH < 3 - - UNION ( - WITH RECURSIVE resource_traversal_to(resourcexid, resourceid, graphid, depth) AS ( - -- Anchor member: start with the given list of starting resource IDs - SELECT - resource_x_resource.resourcexid, resourceinstanceidfrom AS resourceid, resourceinstancefrom_graphid AS graphid, 0 AS depth - FROM - resource_x_resource - WHERE - resourceinstanceidto = ANY(%s::uuid[]) - - UNION ALL - - SELECT - resource_x_resource.resourcexid, resource_x_resource.resourceinstanceidfrom AS resourceid, resourceinstancefrom_graphid AS graphid, rt.depth + 1 - FROM - resource_x_resource - INNER JOIN - resource_traversal_to rt - ON - resource_x_resource.resourceinstanceidto = rt.resourceid - WHERE - rt.graphid != %s::uuid - - ) CYCLE resourcexid SET is_cycle USING path - - SELECT DISTINCT resourceid - FROM resource_traversal_to - WHERE graphid = %s::uuid - AND DEPTH < 3 +def search_direct_relationships_via_ORM( + resourceinstanceids=None, + target_graphid=None, + depth=1, +): + hits = set() + + # This is a placeholder for the ORM version of the search_relationships function + # This function should return a list of resourceinstanceids of reference collections + # that are related to the given list of resourceinstanceids + def get_related_resourceinstanceids(resourceinstanceids, depth=1): + depth -= 1 + to_crawl = set() + + # This is a placeholder for the ORM version of the get_related_resourceinstanceids function + # This function should return a list of resourceinstanceids of resources that are related to + # the given list of resourceinstanceids + instances_query = Q(resourceinstanceidfrom__in=resourceinstanceids) | Q( + resourceinstanceidto__in=resourceinstanceids + ) + + for res in ResourceXResource.objects.filter(instances_query).values_list( + "resourceinstanceidfrom", + "resourceinstancefrom_graphid", + "resourceinstanceidto", + "resourceinstanceto_graphid", + ): + if res[0] not in resourceinstanceids and res[0] not in hits: + hits.add(res[0]) + + if res[3] not in resourceinstanceids and res[3] not in hits: + hits.add(res[3]) + # if str(res[1]) != target_graphid: + # else: + # hits.add(res[0]) + + # if str(res[3]) != target_graphid: + # to_crawl.add(res[2]) + # else: + # hits.add(res[2]) + + if depth > 0: + get_related_resourceinstanceids(list(hits), depth=depth) + + return hits + + return get_related_resourceinstanceids(resourceinstanceids, depth=depth) + + +class RREsMappingModifier(EsMappingModifier): + + counter = 1 + + def __init__(self): + pass + + @staticmethod + def get_data_from_function(resourceinstanceids): + with connection.cursor() as cursor: + cursor.execute( + "SELECT * FROM __arches_get_values_for_resourceinstances(%s)", + [resourceinstanceids], ) - """ - print( - sql - % ( - resourceinstanceids, - target_graphid, - target_graphid, - resourceinstanceids, - target_graphid, - target_graphid, + rows = cursor.fetchall() + return rows + + @staticmethod + def add_search_terms(resourceinstance, document, terms): + if str(resourceinstance.graph_id) != settings.COLLECTIONS_GRAPHID: + return + + if RREsMappingModifier.get_mapping_property() not in document: + document[RREsMappingModifier.get_mapping_property()] = [] + + related_resource_ids = list( + search_direct_relationships_via_ORM( + resourceinstanceids=[resourceinstance.resourceinstanceid], + target_graphid=settings.COLLECTIONS_GRAPHID, + depth=2, ) ) - cursor.execute( - sql, - [ - resourceinstanceids, - target_graphid, - target_graphid, - resourceinstanceids, - target_graphid, - target_graphid, - ], + # print(related_resource_ids) + + # Example usage + for item in RREsMappingModifier.get_data_from_function(related_resource_ids): + # print(item) + document[RREsMappingModifier.get_mapping_property()].append(item[0]) + + @staticmethod + def create_nested_custom_filter(term, original_element): + if "nested" not in original_element: + return original_element + document_key = RREsMappingModifier.get_mapping_property() + custom_filter = Bool() + # custom_filter.should( + # Match( + # field="%s.custom_value" % document_key, + # query=term["value"], + # type="phrase_prefix", + # ) + # ) + custom_filter.should( + Match( + field=document_key, + query=term["value"], + type="phrase_prefix", + ) ) - hits = [] - # hits = [str(row[0]) for row in cursor.fetchall()] - for row in cursor.fetchall(): - hits.append(str(row[0])) - print(len(hits)) - return hits - + nested_custom_filter = Nested(path=document_key, query=custom_filter) + new_must_element = Bool() + new_must_element.should(original_element) + new_must_element.should(nested_custom_filter) + new_must_element.dsl["bool"]["minimum_should_match"] = 1 + return new_must_element + + @staticmethod + def add_search_filter(search_query, term): + document_key = RREsMappingModifier.get_mapping_property() + # original_must_filter = search_query.dsl["bool"]["must"] + search_query.dsl["bool"]["must"] = [] + search_query.must( + Match( + field=document_key, + query=term["value"], + type="phrase_prefix", + ) + ) + # for must_element in original_must_filter: + # search_query.must( + # RREsMappingModifier.create_nested_custom_filter(term, must_element) + # ) + + # original_must_filter = search_query.dsl["bool"]["must_not"] + # search_query.dsl["bool"]["must_not"] = [] + # for must_element in original_must_filter: + # search_query.must_not( + # RREsMappingModifier.create_nested_custom_filter(term, must_element) + # ) + + @staticmethod + def get_mapping_definition(): + """ + Defines the ES structure of the custom search document section. Called when the initial ES resources index is created. -# {"query": {"ids": {"values": ["fba9bdb3-29a6-3cc2-bd7e-2d3fa7a08c78"]}}, "start": 0, "limit": 0} + :return: dict of the custom document section + :rtype dict + """ + return { + "type": "text", + "fields": { + "raw": {"type": "keyword", "ignore_above": 256}, + "folded": {"type": "text", "analyzer": "folding"}, + }, + }