From d7854d27241daeeb7e7d5895e151f443eeab9b01 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Mon, 7 Oct 2024 16:23:57 +0200 Subject: [PATCH] Support auto-complete suggestions created from multiple fields Like in lobid-gnd: configure fields to use in the `format` param (e.g. `json:title,contribution`), or use `format=json:suggest` for default fields. See https://jira.hbz-nrw.de/browse/RPB-141 --- .../controllers/resources/Application.java | 151 +++++++++++++----- web/app/views/api.scala.html | 9 +- web/test/tests/SuggestionsTest.java | 126 +++++++++++++++ 3 files changed, 241 insertions(+), 45 deletions(-) create mode 100644 web/test/tests/SuggestionsTest.java diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java index e11446e33..0faa4705c 100644 --- a/web/app/controllers/resources/Application.java +++ b/web/app/controllers/resources/Application.java @@ -14,6 +14,7 @@ import java.util.Arrays; import java.util.Collections; import java.util.Comparator; +import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; @@ -24,6 +25,8 @@ import java.util.concurrent.Callable; import java.util.function.Function; import java.util.function.Predicate; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.StreamSupport; @@ -48,6 +51,7 @@ import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.node.ObjectNode; import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Lists; import com.typesafe.config.Config; import com.typesafe.config.ConfigFactory; @@ -103,7 +107,8 @@ public class Application extends Controller { public final static Config CONFIG = ConfigFactory.parseFile(RESOURCES_CONF).resolve(); public final static String MARC_XML_API = CONFIG.getString("mrcx.api"); - + /** Value delimiter for multiple values in suggest responses. */ + public static final String VALUE_DELIMITER = "; "; static Form queryForm = Form.form(String.class); @@ -293,8 +298,8 @@ private static Promise createResult(final String q, queryDetails)).as("application/rss+xml"); default: return responseFormat.startsWith("json:") - ? withCallback( - toSuggestions(json, responseFormat.split(":")[1])) + ? withCallback(Json.parse( + toSuggestions(json, responseFormat.split(":")[1]))) : responseFor(withQueryMetadata(json, index), Accept.Format.JSON_LD.queryParamString); } @@ -366,49 +371,113 @@ private static Status withCallback(final JsonNode json) { final String[] callback = request() == null || request().queryString() == null ? null : request().queryString().get("callback"); - return callback != null ? ok(String.format("/**/%s(%s)", callback[0], json)) - .as("application/javascript; charset=utf-8") : ok(json); - } - - private static JsonNode toSuggestions(JsonNode json, String field) { - Stream documents = StreamSupport - .stream(Spliterators.spliteratorUnknownSize(json.elements(), 0), false); - Stream suggestions = documents.flatMap((JsonNode document) -> { - Stream nodes = fieldValues(field, document); - return nodes.map((JsonNode node) -> { - boolean isTextual = node.isTextual(); - Optional label = isTextual ? Optional.ofNullable(node) - : findValueOptional(node, "label"); - Optional id = isTextual ? getOptional(document, "id") - : findValueOptional(node, "id"); - Optional type = isTextual ? getOptional(document, "type") - : findValueOptional(node, "type"); - JsonNode types = type.orElseGet(() -> Json.toJson(new String[] { "" })); - String typeText = types.elements().next().textValue(); - return Json.toJson(ImmutableMap.of(// - "label", label.orElseGet(() -> Json.toJson("")), // - "id", id.orElseGet(() -> label.orElseGet(() -> Json.toJson(""))), // - "category", - typeText.equals("BibliographicResource") - ? Lobid.typeLabel(Json.fromJson(types, List.class)) - : typeText)); - }); + return callback != null + ? ok(String.format("/**/%s(%s)", callback[0], json)) + .as("application/javascript; charset=utf-8") + : ok(Json.prettyPrint(json)).as("application/json; charset=utf-8"); + } + + static String toSuggestions(JsonNode json, String labelFields) { + Stream defaultFields = + Stream.of("title", "contribution", "medium", "startDate-endDate"); + String fields = labelFields.equals("suggest") + ? defaultFields.collect(Collectors.joining(",")) : labelFields; + Stream documents = Lists.newArrayList(json.elements()).stream(); + Stream suggestions = documents.map((JsonNode document) -> { + Optional id = getOptional(document, "id"); + Optional type = getOptional(document, "type"); + Stream labels = Arrays.asList(fields.split(",")).stream() + .map(String::trim).map(field -> fieldValues(field, document) + .map(Json::toJson).map((JsonNode node) -> // + (node.isTextual() ? Optional.ofNullable(node) + : Optional.ofNullable(node.findValue("label"))) + .orElseGet(() -> Json.toJson("")).asText()) + .collect(Collectors.joining("; "))); + List categories = + Lists.newArrayList(type.orElseGet(() -> Json.toJson("[]")).elements()) + .stream().map(JsonNode::asText) + .filter(t -> !t.equals("BibliographicResource")) + .collect(Collectors.toList()); + return Json.toJson(toSuggestionsMap(id, labels, categories)); }); - return Json.toJson(suggestions.collect(Collectors.toList())); + return Json.toJson(suggestions.distinct().collect(Collectors.toList())) + .toString(); + } + + @SuppressWarnings("serial") + private static Map toSuggestionsMap(Optional id, + Stream labels, List categories) { + return new HashMap() { + { + put("label", labels.filter(t -> !t.trim().isEmpty()) + .collect(Collectors.joining(" | "))); + put("id", id.orElseGet(() -> Json.toJson(""))); + put("category", + categories.stream().sorted().collect(Collectors.joining(" | "))); + } + }; } - private static Stream fieldValues(String field, JsonNode document) { - return document.findValues(field).stream().flatMap((node) -> { - return node.isArray() - ? StreamSupport.stream( - Spliterators.spliteratorUnknownSize(node.elements(), 0), false) - : Arrays.asList(node).stream(); - }); + private static Stream fieldValues(String f, JsonNode document) { + String field = f; + // standard case: `field` is a plain field name, use that: + List result = flatStrings(document.findValues(field)); + if (result.isEmpty()) { + // `label_fieldName` template, e.g. `since_startDate` + if (field.contains("_")) { + Matcher matcher = Pattern.compile("([^_]+)_([A-Za-z]+)").matcher(field); + while (matcher.find()) { + String label = matcher.group(1); + String fieldName = matcher.group(2); + List findValues = document.findValues(fieldName); + if (!findValues.isEmpty()) { + String values = flatStrings(findValues).stream() + .collect(Collectors.joining(VALUE_DELIMITER)); + field = field.replace(matcher.group(), label + " " + values); + } else { + field = field.replace(matcher.group(), ""); + } + } + result = + field.trim().isEmpty() ? Arrays.asList() : Arrays.asList(field); + } + // date ranges, e.g. `startDate-endDate` + else if (field.contains("-")) { + String[] fields = field.split("-"); + String v1 = year(document.findValue(fields[0])); + String v2 = year(document.findValue(fields[1])); + result = v1.isEmpty() && v2.isEmpty() ? Lists.newArrayList() + : Arrays.asList(String.format("%s–%s", v1, v2)); + } + } + return result.stream(); + } + + private static List flatStrings(List values) { + return values.stream().flatMap(node -> toArray(node)) + .map(node -> toString(node)).collect(Collectors.toList()); + } + + private static Stream toArray(JsonNode node) { + return node.isArray() ? Lists.newArrayList(node.elements()).stream() + : Arrays.asList(node).stream(); + } + + private static String toString(JsonNode node) { + return year((node.isTextual() ? Optional.ofNullable(node) + : Optional.ofNullable(node.findValue("label"))) + .orElseGet(() -> Json.toJson("")).asText()); + } + + private static String year(JsonNode node) { + if (node == null || !node.isTextual()) { + return ""; + } + return year(node.asText()); } - private static Optional findValueOptional(JsonNode json, - String field) { - return Optional.ofNullable(json.findValue(field)); + private static String year(String text) { + return text.matches("\\d{4}-\\d{2}-\\d{2}") ? text.split("-")[0] : text; } private static Optional getOptional(JsonNode json, String field) { diff --git a/web/app/views/api.scala.html b/web/app/views/api.scala.html index ca5291725..69f9f01b0 100644 --- a/web/app/views/api.scala.html +++ b/web/app/views/api.scala.html @@ -91,10 +91,11 @@

Bulk-Downloads Siehe auch diesen Abschnitt zu Bulk-Downloads in unserem Blog.

Autovervollständigung

-

Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung aus einem angegebenen Feld:

- @desc("Titel vorschlagen: \"format=json:title\"", resources.routes.Application.query("title:Werth", format="json:title")) - @desc("Mitwirkende vorschlagen: \"format=json:agent\"", resources.routes.Application.query("contribution.agent.label:Hein", format="json:agent")) - @desc("Schlagwort vorschlagen: \"format=json:subject\"", resources.routes.Application.query("subject.componentList.label:Pferd", format="json:subject")) +

Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung.

+ @desc("Standardformat für Vorschläge verwenden: \"format=json:suggest\"", resources.routes.Application.query("Twain", format="json:suggest")) + @desc("Bestimmtes Feld für Vorschläge verwenden: \"format=json:title\"", resources.routes.Application.query("Twain", format="json:title")) + @desc("Vorschläge aus mehreren Feldern zusammenbauen: \"format=json:title,contribution\"", resources.routes.Application.query("Twain", format="json:title,contribution")) + @desc("Feld-Templates zur Anpassung und Gruppierung: \"format=json:title,Erschienen_startDate bei_publishedBy\"", resources.routes.Application.query("Twain", format="json:title,Erschienen_startDate bei_publishedBy"))

Damit kann z.B. eine Autovervollständigung umgesetzt werden, bei der zur Suche an Stelle des gewählten Labels die entsprechende ID verwendet werden kann:

diff --git a/web/test/tests/SuggestionsTest.java b/web/test/tests/SuggestionsTest.java new file mode 100644 index 000000000..598d9cc23 --- /dev/null +++ b/web/test/tests/SuggestionsTest.java @@ -0,0 +1,126 @@ +package tests; + +import static org.hamcrest.CoreMatchers.allOf; +import static org.hamcrest.CoreMatchers.containsString; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.core.IsEqual.equalTo; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static play.test.Helpers.GET; +import static play.test.Helpers.contentAsString; +import static play.test.Helpers.fakeApplication; +import static play.test.Helpers.fakeRequest; +import static play.test.Helpers.route; +import static play.test.Helpers.running; + +import org.junit.Test; + +import play.Application; +import play.libs.Json; +import play.mvc.Result; + +/** + * Test suggestion responses (see {@link controllers.resources.Application}) + */ +@SuppressWarnings("javadoc") +public class SuggestionsTest extends LocalIndexSetup { + + @Test + public void suggestionsWithoutCallback() { + Application application = fakeApplication(); + running(application, () -> { + Result result = route(application, fakeRequest(GET, + "/resources/search?q=*&filter=type:Book&format=json:title,contribution")); + assertNotNull("We have a result", result); + assertThat(result.contentType(), equalTo("application/json")); + String content = contentAsString(result); + assertNotNull("We can parse the result as JSON", Json.parse(content)); + assertThat(content, + allOf(// + containsString("label"), // + containsString("id"), // + containsString("category"))); + assertTrue("We used both given fields for any of the labels", + Json.parse(content).findValues("label").stream() + .anyMatch(label -> label.asText().contains(" | "))); + }); + + } + + @Test + public void suggestionsWithCallback() { + Application application = fakeApplication(); + running(application, () -> { + Result result = route(application, fakeRequest(GET, + "/resources/search?q=*&filter=type:Book&format=json:title&callback=test")); + assertNotNull("We have a result", result); + assertThat(result.contentType(), equalTo("application/javascript")); + assertThat(contentAsString(result), + allOf(containsString("test("), // callback + containsString("label"), containsString("id"), + containsString("category"))); + }); + } + + @Test + public void suggestionsCorsHeader() { + Application application = fakeApplication(); + running(application, () -> { + Result result = route(application, + fakeRequest(GET, "/resources/search?q=*&format=json:title")); + assertNotNull("We have a result", result); + assertThat(result.header("Access-Control-Allow-Origin"), equalTo("*")); + }); + + } + + @Test + public void suggestionsTemplate() { + Application application = fakeApplication(); + running(application, () -> { + String format = "json:title,ab_startDate+als_edition"; + Result result = route(application, fakeRequest(GET, + "/resources/search?q=*&filter=type:Book&format=" + format)); + assertNotNull("We have a result", result); + assertThat(result.contentType(), equalTo("application/json")); + String content = contentAsString(result); + assertNotNull("We can parse the result as JSON", Json.parse(content)); + assertTrue( + "We replaced the field names in the template with their values", + Json.parse(content).findValues("label").stream() + .anyMatch(label -> label.asText().contains("als "))); + }); + } + + @Test + public void suggestionsTemplateMultiValues() { + Application application = fakeApplication(); + running(application, () -> { + String format = "json:title,contribution,about_subject"; + Result result = route(application, + fakeRequest(GET, + "/resources/search?q=Volksschulwesens&filter=type:Book&format=" + + format)); + assertNotNull("We have a result", result); + assertThat(result.contentType(), equalTo("application/json")); + String content = contentAsString(result); + assertNotNull("We can parse the result as JSON", Json.parse(content)); + assertThat("Multi-values use consistent delimiter", content, allOf( + containsString("Handwörterbuch des Volksschulwesens"), + containsString("about Erziehung, Bildung, Unterricht; Volksschule"))); + }); + } + + @Test + public void suggestionsArePrettyPrinted() { + Application application = fakeApplication(); + running(application, () -> { + Result result = route(application, + fakeRequest(GET, "/resources/search?q=*&format=json:suggest")); + assertNotNull(result); + assertThat(result.contentType(), equalTo("application/json")); + assertThat(contentAsString(result), containsString("}, {\n")); + }); + } + +}