From d7854d27241daeeb7e7d5895e151f443eeab9b01 Mon Sep 17 00:00:00 2001
From: Fabian Steeg
Date: Mon, 7 Oct 2024 16:23:57 +0200
Subject: [PATCH] Support auto-complete suggestions created from multiple
fields
Like in lobid-gnd: configure fields to use in the `format` param
(e.g. `json:title,contribution`), or use `format=json:suggest`
for default fields.
See https://jira.hbz-nrw.de/browse/RPB-141
---
.../controllers/resources/Application.java | 151 +++++++++++++-----
web/app/views/api.scala.html | 9 +-
web/test/tests/SuggestionsTest.java | 126 +++++++++++++++
3 files changed, 241 insertions(+), 45 deletions(-)
create mode 100644 web/test/tests/SuggestionsTest.java
diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java
index e11446e33..0faa4705c 100644
--- a/web/app/controllers/resources/Application.java
+++ b/web/app/controllers/resources/Application.java
@@ -14,6 +14,7 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
+import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
@@ -24,6 +25,8 @@
import java.util.concurrent.Callable;
import java.util.function.Function;
import java.util.function.Predicate;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
@@ -48,6 +51,7 @@
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.node.ObjectNode;
import com.google.common.collect.ImmutableMap;
+import com.google.common.collect.Lists;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
@@ -103,7 +107,8 @@ public class Application extends Controller {
public final static Config CONFIG =
ConfigFactory.parseFile(RESOURCES_CONF).resolve();
public final static String MARC_XML_API = CONFIG.getString("mrcx.api");
-
+ /** Value delimiter for multiple values in suggest responses. */
+ public static final String VALUE_DELIMITER = "; ";
static Form queryForm = Form.form(String.class);
@@ -293,8 +298,8 @@ private static Promise createResult(final String q,
queryDetails)).as("application/rss+xml");
default:
return responseFormat.startsWith("json:")
- ? withCallback(
- toSuggestions(json, responseFormat.split(":")[1]))
+ ? withCallback(Json.parse(
+ toSuggestions(json, responseFormat.split(":")[1])))
: responseFor(withQueryMetadata(json, index),
Accept.Format.JSON_LD.queryParamString);
}
@@ -366,49 +371,113 @@ private static Status withCallback(final JsonNode json) {
final String[] callback =
request() == null || request().queryString() == null ? null
: request().queryString().get("callback");
- return callback != null ? ok(String.format("/**/%s(%s)", callback[0], json))
- .as("application/javascript; charset=utf-8") : ok(json);
- }
-
- private static JsonNode toSuggestions(JsonNode json, String field) {
- Stream documents = StreamSupport
- .stream(Spliterators.spliteratorUnknownSize(json.elements(), 0), false);
- Stream suggestions = documents.flatMap((JsonNode document) -> {
- Stream nodes = fieldValues(field, document);
- return nodes.map((JsonNode node) -> {
- boolean isTextual = node.isTextual();
- Optional label = isTextual ? Optional.ofNullable(node)
- : findValueOptional(node, "label");
- Optional id = isTextual ? getOptional(document, "id")
- : findValueOptional(node, "id");
- Optional type = isTextual ? getOptional(document, "type")
- : findValueOptional(node, "type");
- JsonNode types = type.orElseGet(() -> Json.toJson(new String[] { "" }));
- String typeText = types.elements().next().textValue();
- return Json.toJson(ImmutableMap.of(//
- "label", label.orElseGet(() -> Json.toJson("")), //
- "id", id.orElseGet(() -> label.orElseGet(() -> Json.toJson(""))), //
- "category",
- typeText.equals("BibliographicResource")
- ? Lobid.typeLabel(Json.fromJson(types, List.class))
- : typeText));
- });
+ return callback != null
+ ? ok(String.format("/**/%s(%s)", callback[0], json))
+ .as("application/javascript; charset=utf-8")
+ : ok(Json.prettyPrint(json)).as("application/json; charset=utf-8");
+ }
+
+ static String toSuggestions(JsonNode json, String labelFields) {
+ Stream defaultFields =
+ Stream.of("title", "contribution", "medium", "startDate-endDate");
+ String fields = labelFields.equals("suggest")
+ ? defaultFields.collect(Collectors.joining(",")) : labelFields;
+ Stream documents = Lists.newArrayList(json.elements()).stream();
+ Stream suggestions = documents.map((JsonNode document) -> {
+ Optional id = getOptional(document, "id");
+ Optional type = getOptional(document, "type");
+ Stream labels = Arrays.asList(fields.split(",")).stream()
+ .map(String::trim).map(field -> fieldValues(field, document)
+ .map(Json::toJson).map((JsonNode node) -> //
+ (node.isTextual() ? Optional.ofNullable(node)
+ : Optional.ofNullable(node.findValue("label")))
+ .orElseGet(() -> Json.toJson("")).asText())
+ .collect(Collectors.joining("; ")));
+ List categories =
+ Lists.newArrayList(type.orElseGet(() -> Json.toJson("[]")).elements())
+ .stream().map(JsonNode::asText)
+ .filter(t -> !t.equals("BibliographicResource"))
+ .collect(Collectors.toList());
+ return Json.toJson(toSuggestionsMap(id, labels, categories));
});
- return Json.toJson(suggestions.collect(Collectors.toList()));
+ return Json.toJson(suggestions.distinct().collect(Collectors.toList()))
+ .toString();
+ }
+
+ @SuppressWarnings("serial")
+ private static Map toSuggestionsMap(Optional id,
+ Stream labels, List categories) {
+ return new HashMap() {
+ {
+ put("label", labels.filter(t -> !t.trim().isEmpty())
+ .collect(Collectors.joining(" | ")));
+ put("id", id.orElseGet(() -> Json.toJson("")));
+ put("category",
+ categories.stream().sorted().collect(Collectors.joining(" | ")));
+ }
+ };
}
- private static Stream fieldValues(String field, JsonNode document) {
- return document.findValues(field).stream().flatMap((node) -> {
- return node.isArray()
- ? StreamSupport.stream(
- Spliterators.spliteratorUnknownSize(node.elements(), 0), false)
- : Arrays.asList(node).stream();
- });
+ private static Stream fieldValues(String f, JsonNode document) {
+ String field = f;
+ // standard case: `field` is a plain field name, use that:
+ List result = flatStrings(document.findValues(field));
+ if (result.isEmpty()) {
+ // `label_fieldName` template, e.g. `since_startDate`
+ if (field.contains("_")) {
+ Matcher matcher = Pattern.compile("([^_]+)_([A-Za-z]+)").matcher(field);
+ while (matcher.find()) {
+ String label = matcher.group(1);
+ String fieldName = matcher.group(2);
+ List findValues = document.findValues(fieldName);
+ if (!findValues.isEmpty()) {
+ String values = flatStrings(findValues).stream()
+ .collect(Collectors.joining(VALUE_DELIMITER));
+ field = field.replace(matcher.group(), label + " " + values);
+ } else {
+ field = field.replace(matcher.group(), "");
+ }
+ }
+ result =
+ field.trim().isEmpty() ? Arrays.asList() : Arrays.asList(field);
+ }
+ // date ranges, e.g. `startDate-endDate`
+ else if (field.contains("-")) {
+ String[] fields = field.split("-");
+ String v1 = year(document.findValue(fields[0]));
+ String v2 = year(document.findValue(fields[1]));
+ result = v1.isEmpty() && v2.isEmpty() ? Lists.newArrayList()
+ : Arrays.asList(String.format("%s–%s", v1, v2));
+ }
+ }
+ return result.stream();
+ }
+
+ private static List flatStrings(List values) {
+ return values.stream().flatMap(node -> toArray(node))
+ .map(node -> toString(node)).collect(Collectors.toList());
+ }
+
+ private static Stream toArray(JsonNode node) {
+ return node.isArray() ? Lists.newArrayList(node.elements()).stream()
+ : Arrays.asList(node).stream();
+ }
+
+ private static String toString(JsonNode node) {
+ return year((node.isTextual() ? Optional.ofNullable(node)
+ : Optional.ofNullable(node.findValue("label")))
+ .orElseGet(() -> Json.toJson("")).asText());
+ }
+
+ private static String year(JsonNode node) {
+ if (node == null || !node.isTextual()) {
+ return "";
+ }
+ return year(node.asText());
}
- private static Optional findValueOptional(JsonNode json,
- String field) {
- return Optional.ofNullable(json.findValue(field));
+ private static String year(String text) {
+ return text.matches("\\d{4}-\\d{2}-\\d{2}") ? text.split("-")[0] : text;
}
private static Optional getOptional(JsonNode json, String field) {
diff --git a/web/app/views/api.scala.html b/web/app/views/api.scala.html
index ca5291725..69f9f01b0 100644
--- a/web/app/views/api.scala.html
+++ b/web/app/views/api.scala.html
@@ -91,10 +91,11 @@
Autovervollständigung
- Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung aus einem angegebenen Feld:
- @desc("Titel vorschlagen: \"format=json:title\"", resources.routes.Application.query("title:Werth", format="json:title"))
- @desc("Mitwirkende vorschlagen: \"format=json:agent\"", resources.routes.Application.query("contribution.agent.label:Hein", format="json:agent"))
- @desc("Schlagwort vorschlagen: \"format=json:subject\"", resources.routes.Application.query("subject.componentList.label:Pferd", format="json:subject"))
+ Die API unterstützt ein spezielles Antwortformat mit Vorschlägen zur Vervollständigung.
+ @desc("Standardformat für Vorschläge verwenden: \"format=json:suggest\"", resources.routes.Application.query("Twain", format="json:suggest"))
+ @desc("Bestimmtes Feld für Vorschläge verwenden: \"format=json:title\"", resources.routes.Application.query("Twain", format="json:title"))
+ @desc("Vorschläge aus mehreren Feldern zusammenbauen: \"format=json:title,contribution\"", resources.routes.Application.query("Twain", format="json:title,contribution"))
+ @desc("Feld-Templates zur Anpassung und Gruppierung: \"format=json:title,Erschienen_startDate bei_publishedBy\"", resources.routes.Application.query("Twain", format="json:title,Erschienen_startDate bei_publishedBy"))
Damit kann z.B. eine Autovervollständigung umgesetzt werden, bei der zur Suche an Stelle des gewählten Labels die entsprechende ID verwendet werden kann: