From b84f125ed5d1bd88b5459fb86e99c89f0d005038 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Tue, 31 Jul 2018 14:10:56 +0200 Subject: [PATCH 1/3] Add RSS support for search requests via format=rss or accept header See https://github.com/hbz/lobid-resources/issues/887 --- web/app/controllers/resources/Accept.java | 12 ++++- .../controllers/resources/Application.java | 48 ++++++++++++++----- web/app/views/rss.scala.html | 18 +++++++ web/conf/resources.conf | 1 + web/test/tests/AcceptIntegrationTest.java | 2 + web/test/tests/AcceptUnitTest.java | 3 ++ 6 files changed, 71 insertions(+), 13 deletions(-) create mode 100644 web/app/views/rss.scala.html diff --git a/web/app/controllers/resources/Accept.java b/web/app/controllers/resources/Accept.java index e902cc924b..21d3b5ae7c 100644 --- a/web/app/controllers/resources/Accept.java +++ b/web/app/controllers/resources/Accept.java @@ -24,7 +24,8 @@ enum Format { HTML("html", "text/html"), // RDF_XML("rdf", "application/rdf+xml", "application/xml", "text/xml"), // N_TRIPLE("nt", "application/n-triples", "text/plain"), // - TURTLE("ttl", "text/turtle", "application/x-turtle"); + TURTLE("ttl", "text/turtle", "application/x-turtle"), // + RSS("rss", "application/rss+xml"); String[] types; String queryParamString; @@ -33,6 +34,15 @@ private Format(String format, String... types) { this.queryParamString = format; this.types = types; } + + public static Format of(String format) { + for (Format f : Format.values()) { + if (format.equals(f.queryParamString)) { + return f; + } + } + return Format.JSON_LD; + } } /** diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java index ac2f4d0a66..ce4a0ff958 100644 --- a/web/app/controllers/resources/Application.java +++ b/web/app/controllers/resources/Application.java @@ -7,6 +7,8 @@ import java.io.UnsupportedEncodingException; import java.net.URLEncoder; import java.text.Collator; +import java.text.DateFormat; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -69,6 +71,7 @@ import views.html.details_item; import views.html.index; import views.html.query; +import views.html.rss; import views.html.stars; /** @@ -106,6 +109,14 @@ public class Application extends Controller { /** The number of seconds in one day. */ public static final int ONE_DAY = 24 * ONE_HOUR; + /** Date format used in RSS feeds. */ + public static final DateFormat RSS_DATE_FORMAT = + new SimpleDateFormat("E, dd MMM yyyy HH:mm:ss Z"); + + /** Date format used in lobid-resources describedBy.dateCreated field. */ + public static final DateFormat LOBID_DATE_FORMAT = + new SimpleDateFormat("yyyyMMdd"); + /** * @return The index page. */ @@ -217,21 +228,34 @@ public static Promise query(final String q, final String agent, result = bulkResult(q, nested, owner, index); } else { result = Promise.promise(() -> { + // TODO: avoid redundant call here if RSS is requested Search queryResources = index.queryResources(); boolean returnSuggestions = responseFormat.startsWith("json:"); - JsonNode json = returnSuggestions - ? toSuggestions(queryResources.getResult(), format.split(":")[1]) - : queryResources.getResult(); + JsonNode json = + returnSuggestions ? toSuggestions(queryResources.getResult(), + responseFormat.split(":")[1]) : queryResources.getResult(); String s = json.toString(); - boolean htmlRequested = - responseFormat.equals(Accept.Format.HTML.queryParamString); - return htmlRequested - ? ok(query.render(s, q, agent, name, subject, id, publisher, issued, - medium, from, size, queryResources.getTotal(), owner, t, sort, - word)) - : (returnSuggestions ? withCallback(json) - : responseFor(withQueryMetadata(json, index), - Accept.Format.JSON_LD.queryParamString)); + switch (Format.of(responseFormat)) { + case HTML: + return ok(query.render(s, q, agent, name, subject, id, publisher, + issued, medium, from, size, queryResources.getTotal(), owner, t, + sort, word)); + case RSS: + // TODO: link from html (see https://ar.al/2018/06/29/reclaiming-rss/) + JsonNode jsonForRss = + new Search.Builder().query(queryBuilder).from(from).size(size) + .sort("newest").build().queryResources().getResult(); + String[] segments = request().uri().split("/"); + String queryDetails = Arrays.asList(segments).get(segments.length - 1) + .replace("search?", "").replaceAll("&?format=rss", ""); + return ok(rss.render(jsonForRss.toString(), + request().uri().replaceAll("&?format=rss", ""), queryDetails)) + .as("application/rss+xml"); + default: + return returnSuggestions ? withCallback(json) + : responseFor(withQueryMetadata(json, index), + Accept.Format.JSON_LD.queryParamString); + } }); } cacheOnRedeem(cacheId, result, ONE_HOUR); diff --git a/web/app/views/rss.scala.html b/web/app/views/rss.scala.html new file mode 100644 index 0000000000..8af27b43fb --- /dev/null +++ b/web/app/views/rss.scala.html @@ -0,0 +1,18 @@ +@* Copyright 2018 Fabian Steeg, hbz. Licensed under the GPLv2 *@ +@(result: String, uri: String, query: String)@defining(play.api.libs.json.Json.parse(result).asOpt[Seq[play.api.libs.json.JsValue]].getOrElse(Seq()).zipWithIndex) { hits => + + + lobid-resources @query + @controllers.resources.Application.CONFIG.getString("host")@uri + hbz union catalogue query @query + @for((doc,i) <- hits; id = (doc\\"hbzId")(0).as[String]) { + + @((doc\"title").asOpt[String].getOrElse("")) + @controllers.resources.Application.CONFIG.getString("host")@resources.routes.Application.resource(id, null) + @controllers.resources.Application.RSS_DATE_FORMAT.format(controllers.resources.Application.LOBID_DATE_FORMAT.parse((doc\"describedBy"\"dateCreated").asOpt[String].getOrElse(""))) + + + } + + +} diff --git a/web/conf/resources.conf b/web/conf/resources.conf index 4a6d08dcbb..04880d7602 100644 --- a/web/conf/resources.conf +++ b/web/conf/resources.conf @@ -1,3 +1,4 @@ +host="http://lobid.org" hbz01.api="http://lobid.org/hbz01" orgs.api="http://lobid.org/organisations/" diff --git a/web/test/tests/AcceptIntegrationTest.java b/web/test/tests/AcceptIntegrationTest.java index 45aebd8719..ef27ccf453 100644 --- a/web/test/tests/AcceptIntegrationTest.java +++ b/web/test/tests/AcceptIntegrationTest.java @@ -43,9 +43,11 @@ public static Collection data() { { fakeRequest(GET, "/resources/search?q=*").header("Accept", "text/plain"), /*->*/ "application/json" }, // search, others formats as query param: { fakeRequest(GET, "/resources/search?q=*&format=html"), /*->*/ "text/html" }, + { fakeRequest(GET, "/resources/search?q=*&format=rss"), /*->*/ "application/rss+xml" }, // search, others formats via header: { fakeRequest(GET, "/resources/search?q=*").header("Accept", "application/json"), /*->*/ "application/json" }, { fakeRequest(GET, "/resources/search?q=*").header("Accept", "text/html"), /*->*/ "text/html" }, + { fakeRequest(GET, "/resources/search?q=*").header("Accept", "application/rss+xml"), /*->*/ "application/rss+xml" }, // get, default format: JSON { fakeRequest(GET, "/resources/HT018907266"), /*->*/ "application/json" }, { fakeRequest(GET, "/resources/HT018907266?format="), /*->*/ "application/json" }, diff --git a/web/test/tests/AcceptUnitTest.java b/web/test/tests/AcceptUnitTest.java index 368ec8b10a..db26785c77 100644 --- a/web/test/tests/AcceptUnitTest.java +++ b/web/test/tests/AcceptUnitTest.java @@ -40,9 +40,11 @@ public static Collection data() { // no header, just format parameter: { fakeRequest(), "html", /*->*/ "html" }, { fakeRequest(), "json", /*->*/ "json" }, + { fakeRequest(), "json:title", /*->*/ "json:title" }, { fakeRequest(), "rdf", /*->*/ "rdf" }, { fakeRequest(), "ttl", /*->*/ "ttl" }, { fakeRequest(), "nt", /*->*/ "nt" }, + { fakeRequest(), "rss", /*->*/ "rss" }, // supported content types, no format parameter given: { fakeRequest().header("Accept", "text/html"), null, /*->*/ "html" }, { fakeRequest().header("Accept", "application/json"), null, /*->*/ "json" }, @@ -54,6 +56,7 @@ public static Collection data() { { fakeRequest().header("Accept", "application/xml"), null, /*->*/ "rdf" }, { fakeRequest().header("Accept", "application/rdf+xml"), null, /*->*/ "rdf" }, { fakeRequest().header("Accept", "text/xml"), null, /*->*/ "rdf" }, + { fakeRequest().header("Accept", "application/rss+xml"), null, /*->*/ "rss" }, // we pick the preferred content type: { fakeRequest().header("Accept", "text/html,application/json"), null, /*->*/"html" }, { fakeRequest().header("Accept", "application/json,text/html"), null, /*->*/ "json" }, From 136eba3c5fe61e9e4fbd702a2613dfc2daacca02 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 1 Aug 2018 11:40:03 +0200 Subject: [PATCH 2/3] Avoid redundant index query for RSS requests, split up query method See https://github.com/hbz/lobid-resources/issues/887 --- .../controllers/resources/Application.java | 136 +++++++++++------- web/test/tests/IntegrationTests.java | 2 + 2 files changed, 86 insertions(+), 52 deletions(-) diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java index ce4a0ff958..5acdf5b5c2 100644 --- a/web/app/controllers/resources/Application.java +++ b/web/app/controllers/resources/Application.java @@ -188,6 +188,7 @@ public static Promise query(final String q, final String agent, String filter) { // bulk -> jsonl, see https://github.com/hbz/lobid-resources/issues/861 final String format = f != null && f.equals("bulk") ? "jsonl" : f; + final String aggregations = aggs == null ? "" : aggs; if (!aggregations.isEmpty() && !Search.SUPPORTED_AGGREGATIONS .containsAll(Arrays.asList(aggregations.split(",")))) { @@ -195,70 +196,52 @@ public static Promise query(final String q, final String agent, String.format("Unsupported aggregations: %s (supported: %s)", aggregations, Search.SUPPORTED_AGGREGATIONS))); } - addCorsHeader(); - String uuid = session("uuid"); - if (uuid == null) { - uuid = UUID.randomUUID().toString(); - session("uuid", uuid); - } + String responseFormat = Accept.formatFor(format, request().acceptedTypes()); - boolean isBulkRequest = - responseFormat.equals(Accept.Format.BULK.queryParamString); - if (isBulkRequest) { - response().setHeader("Content-Disposition", - String.format( - "attachment; filename=\"lobid-resources-bulk-%s.jsonl\"", - System.currentTimeMillis())); - } - String cacheId = String.format("%s-%s-%s-%s", uuid, request().uri(), - Accept.formatFor(format, request().acceptedTypes()), starredIds()); + addResponseHeaders(responseFormat); + + String cacheId = createCacheId(format); @SuppressWarnings("unchecked") Promise cachedResult = (Promise) Cache.get(cacheId); if (cachedResult != null) return cachedResult; + Logger.debug("Not cached: {}, will cache for one hour", cacheId); QueryBuilder queryBuilder = new Queries.Builder().q(q).agent(agent) .name(name).subject(subject).id(id).publisher(publisher).issued(issued) .medium(medium).t(t).owner(owner).nested(nested).location(location) .filter(filter).word(word).build(); + String sortBy = + responseFormat.equals(Accept.Format.RSS.queryParamString) ? "newest" + : sort; Search index = new Search.Builder().query(queryBuilder).from(from) - .size(size).sort(sort).aggs(aggregations).build(); - Promise result; - if (isBulkRequest) { - result = bulkResult(q, nested, owner, index); - } else { - result = Promise.promise(() -> { - // TODO: avoid redundant call here if RSS is requested - Search queryResources = index.queryResources(); - boolean returnSuggestions = responseFormat.startsWith("json:"); - JsonNode json = - returnSuggestions ? toSuggestions(queryResources.getResult(), - responseFormat.split(":")[1]) : queryResources.getResult(); - String s = json.toString(); - switch (Format.of(responseFormat)) { - case HTML: - return ok(query.render(s, q, agent, name, subject, id, publisher, - issued, medium, from, size, queryResources.getTotal(), owner, t, - sort, word)); - case RSS: - // TODO: link from html (see https://ar.al/2018/06/29/reclaiming-rss/) - JsonNode jsonForRss = - new Search.Builder().query(queryBuilder).from(from).size(size) - .sort("newest").build().queryResources().getResult(); - String[] segments = request().uri().split("/"); - String queryDetails = Arrays.asList(segments).get(segments.length - 1) - .replace("search?", "").replaceAll("&?format=rss", ""); - return ok(rss.render(jsonForRss.toString(), - request().uri().replaceAll("&?format=rss", ""), queryDetails)) - .as("application/rss+xml"); - default: - return returnSuggestions ? withCallback(json) - : responseFor(withQueryMetadata(json, index), - Accept.Format.JSON_LD.queryParamString); - } - }); - } + .size(size).sort(sortBy).aggs(aggregations).build(); + + Promise result = + createResult(q, agent, name, subject, id, publisher, issued, medium, + from, size, owner, t, sort, word, nested, responseFormat, index); cacheOnRedeem(cacheId, result, ONE_HOUR); + + return resultOrError(q, agent, name, subject, id, publisher, issued, medium, + from, size, owner, t, sort, word, result); + } + + private static String createCacheId(final String format) { + String uuid = session("uuid"); + if (uuid == null) { + uuid = UUID.randomUUID().toString(); + session("uuid", uuid); + } + String cacheId = String.format("%s-%s-%s-%s", uuid, request().uri(), + Accept.formatFor(format, request().acceptedTypes()), starredIds()); + return cacheId; + } + + private static Promise resultOrError(final String q, + final String agent, final String name, final String subject, + final String id, final String publisher, final String issued, + final String medium, final int from, final int size, final String owner, + String t, String sort, String word, Promise result) { return result.recover((Throwable throwable) -> { Html html = query.render("[]", q, agent, name, subject, id, publisher, issued, medium, from, size, 0L, owner, t, sort, word); @@ -274,6 +257,55 @@ public static Promise query(final String q, final String agent, }); } + private static Promise createResult(final String q, + final String agent, final String name, final String subject, + final String id, final String publisher, final String issued, + final String medium, final int from, final int size, final String owner, + String t, String sort, String word, String nested, String responseFormat, + Search index) { + Promise result = + responseFormat.equals(Accept.Format.BULK.queryParamString) + ? bulkResult(q, nested, owner, index) + : Promise.promise(() -> { + Search queryResources = index.queryResources(); + JsonNode json = queryResources.getResult(); + String s = json.toString(); + switch (Format.of(responseFormat)) { + case HTML: + return ok(query.render(s, q, agent, name, subject, id, + publisher, issued, medium, from, size, + queryResources.getTotal(), owner, t, sort, word)); + case RSS: + // TODO: link from html (see + // https://ar.al/2018/06/29/reclaiming-rss/) + String[] segments = request().uri().split("/"); + String queryDetails = + Arrays.asList(segments).get(segments.length - 1) + .replace("search?", "").replaceAll("&?format=rss", ""); + return ok(rss.render(s, + request().uri().replaceAll("&?format=rss", ""), + queryDetails)).as("application/rss+xml"); + default: + return responseFormat.startsWith("json:") + ? withCallback( + toSuggestions(json, responseFormat.split(":")[1])) + : responseFor(withQueryMetadata(json, index), + Accept.Format.JSON_LD.queryParamString); + } + }); + return result; + } + + private static void addResponseHeaders(String responseFormat) { + addCorsHeader(); + if (responseFormat.equals(Accept.Format.BULK.queryParamString)) { + response().setHeader("Content-Disposition", + String.format( + "attachment; filename=\"lobid-resources-bulk-%s.jsonl\"", + System.currentTimeMillis())); + } + } + private static Promise bulkResult(final String q, final String nested, final String owner, Search index) { return Promise.promise(() -> { diff --git a/web/test/tests/IntegrationTests.java b/web/test/tests/IntegrationTests.java index e7f8768d31..3b3fff1189 100644 --- a/web/test/tests/IntegrationTests.java +++ b/web/test/tests/IntegrationTests.java @@ -104,6 +104,8 @@ private static void bulkRequestWith(String param) { assertThat(result.contentType()).isEqualTo("application/x-jsonlines"); String text = Helpers.contentAsString(result); assertThat(text.split("\\n").length).isGreaterThanOrEqualTo(10); + assertThat(result.header(Http.HeaderNames.CONTENT_DISPOSITION)) + .isNotNull().isNotEmpty().contains("attachment; filename="); }); } From 15c95d2cdadd3eef737a5964e29055a20d5c29f0 Mon Sep 17 00:00:00 2001 From: Fabian Steeg Date: Wed, 1 Aug 2018 13:19:19 +0200 Subject: [PATCH 3/3] Add links to RSS feed from HTML, update API documentation See https://github.com/hbz/lobid-resources/issues/887 --- web/app/controllers/resources/Application.java | 2 -- web/app/views/api.scala.html | 7 +++++-- web/app/views/main.scala.html | 17 +++++++++-------- web/app/views/query.scala.html | 11 +++++++++-- 4 files changed, 23 insertions(+), 14 deletions(-) diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java index 5acdf5b5c2..2d704ea762 100644 --- a/web/app/controllers/resources/Application.java +++ b/web/app/controllers/resources/Application.java @@ -276,8 +276,6 @@ private static Promise createResult(final String q, publisher, issued, medium, from, size, queryResources.getTotal(), owner, t, sort, word)); case RSS: - // TODO: link from html (see - // https://ar.al/2018/06/29/reclaiming-rss/) String[] segments = request().uri().split("/"); String queryDetails = Arrays.asList(segments).get(segments.length - 1) diff --git a/web/app/views/api.scala.html b/web/app/views/api.scala.html index 019f29ae2f..77577b5712 100644 --- a/web/app/views/api.scala.html +++ b/web/app/views/api.scala.html @@ -66,11 +66,14 @@

Inhaltstypen Standardmäßig liefert dieser Dienst strukturierte API-Antworten (als JSON):

curl http://lobid.org@resources.routes.Application.resource("HT018472857")

-

Er unterstützt Content-Negotiation über den Accept-Header für JSON (application/json), JSON lines (application/x-jsonlines) oder HTML (text/html):

+

Er unterstützt Content-Negotiation für JSON (application/json), JSON lines (application/x-jsonlines), RSS (application/rss+xml) und HTML (text/html):

curl --header "Accept: application/json" http://lobid.org@resources.routes.Application.query("kunst")

+

curl --header "Accept: application/rss+xml" http://lobid.org@resources.routes.Application.query("kunst")

curl --header "Accept: application/x-jsonlines" http://lobid.org@resources.routes.Application.query("kunst") > kunst.jsonl

-

Der Query-Parameter "format" (Werte: html,json,jsonl) kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser:

+

Der Query-Parameter "format" (Werte: html,json,jsonl,rss) kann verwendet werden, um den Accept-Header aufzuheben, z.B. zur Anzeige von JSON im Browser:

@resources.routes.Application.resource("HT018472857", format="json")

+

Oder zum Abonnieren eines RSS-Feeds:

+

@resources.routes.Application.query("kunst", format="rss")

Der Wert des Format-Parameters kann für Einzeltreffer auch in URLs als Dateiendung verwendet werden:

@resources.routes.Application.resourceDotFormat("HT018472857", format="json")

Für größere Anfragen kann die Antwort als gzip komprimiert werden:

diff --git a/web/app/views/main.scala.html b/web/app/views/main.scala.html index d78d8f0cfb..24acd5efc5 100644 --- a/web/app/views/main.scala.html +++ b/web/app/views/main.scala.html @@ -1,6 +1,6 @@ @* Copyright 2014 Fabian Steeg, hbz. Licensed under the GPLv2 *@ -@(q: String, title: String)(content: Html) +@(q: String, title: String, rss: Option[String] = None)(content: Html) @import controllers.resources.Lobid @@ -9,12 +9,13 @@ @title - - - - - - + + + + + + + @for(rssPath <- rss){} @@ -63,7 +64,7 @@ - @if(request.uri.toString() != resources.routes.Application.advanced().toString()){@tags.search_form(q)} + @if(request.uri.toString() != resources.routes.Application.advanced().toString() && !title.contains("API")){@tags.search_form(q)} @content