From b55bbbf508b2cd484abfa4221649589c06c9db7d Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 13 Jun 2022 10:30:49 +0200 Subject: [PATCH 01/29] Add skos lookup (metafacture/metafacture-core#415) Works like fix function 'lookup', also using a Map. The Map is build dynamically querying an RDF model. --- README.md | 10 + build.gradle | 1 + metafix/build.gradle | 3 + .../org/metafacture/metafix/FixMethod.java | 60 ++++ .../org/metafacture/metafix/maps/RdfMap.java | 297 ++++++++++++++++++ .../metafix/MetafixLookupTest.java | 95 ++++++ .../org/metafacture/metafix/maps/test.ttl | 12 + 7 files changed, 478 insertions(+) create mode 100644 metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java create mode 100644 metafix/src/test/resources/org/metafacture/metafix/maps/test.ttl diff --git a/README.md b/README.md index bcb96f861..1b429fcf8 100644 --- a/README.md +++ b/README.md @@ -606,6 +606,16 @@ lookup("path.to.field", "map-name", __default: "NA") lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown.txt") ``` +##### `lookup_rdf` + +Looks up matching values in an RDF resource and replaces the field value with this match. A file as well as an HTTP(S) resource can be used. + +```perl +lookup_rdf("", "", target: "") +lookup_rdf("", "", target: "", target_language: "") +lookup_rdf("", "", target: "", __default: "NA") +``` + ##### `prepend` Adds a string at the beginning of a field value. diff --git a/build.gradle b/build.gradle index 18f8a0aa1..96a42e589 100644 --- a/build.gradle +++ b/build.gradle @@ -38,6 +38,7 @@ subprojects { 'equalsverifier': '3.8.2', 'jackson': '2.13.3', 'jetty': '9.4.14.v20181114', + 'jena': '3.17.0', 'jquery': '3.3.1-1', 'junit_jupiter': '5.8.2', 'junit_platform': '1.4.2', diff --git a/metafix/build.gradle b/metafix/build.gradle index 4dcb74faf..f90c61a70 100644 --- a/metafix/build.gradle +++ b/metafix/build.gradle @@ -13,9 +13,12 @@ dependencies { implementation "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" implementation "com.fasterxml.jackson.core:jackson-databind:${versions.jackson}" implementation "com.google.guava:guava:${versions.guava}" + implementation "org.apache.jena:jena-core:${versions.jena}" + implementation "org.apache.jena:jena-arq:${versions.jena}" implementation "org.eclipse.emf:org.eclipse.emf.ecore:${versions.xtext}" // Workaround for hbz/lobid-resources#1462 implementation "org.eclipse.xtext:org.eclipse.xtext.xbase:${versions.xtext}" implementation "org.eclipse.xtext:org.eclipse.xtext:${versions.xtext}" + implementation "org.eclipse.xtext:org.eclipse.xtext.xbase:${versions.xtext}" implementation "org.slf4j:slf4j-api:${versions.slf4j}" testImplementation "org.junit.jupiter:junit-jupiter-api:${versions.junit_jupiter}" diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index f3f5512c9..de7800b61 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -17,6 +17,7 @@ package org.metafacture.metafix; import org.metafacture.metafix.api.FixFunction; +import org.metafacture.metafix.maps.RdfMap; import org.metafacture.metamorph.api.Maps; import org.metafacture.metamorph.functions.ISBN; import org.metafacture.metamorph.functions.Timestamp; @@ -92,6 +93,24 @@ public void apply(final Metafix metafix, final Record record, final List metafix.putMap(params.get(0), options); } }, + put_rdfmap { + @Override + public void apply(final Metafix metafix, final Record record, final List params, final Map options) { + final String fileName = params.get(0); + final RdfMap rdf = new RdfMap(); + rdf.setFile(metafix.resolvePath(fileName)); + if (options.containsKey("target_language")) { + rdf.setTargetLanguage(options.get("target_language")); + } + if (options.containsKey("target")) { + rdf.setTarget(options.get("target")); + } + if (options.containsKey(Maps.DEFAULT_MAP_KEY)) { + rdf.setDefault(options.get(Maps.DEFAULT_MAP_KEY)); + } + metafix.putMap(params.size() > 1 ? params.get(1) : fileName, rdf); + } + }, put_var { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { @@ -470,6 +489,7 @@ public void apply(final Metafix metafix, final Record record, final List @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { +<<<<<<< HEAD final Map map; if (params.size() <= 1) { @@ -517,6 +537,16 @@ public void apply(final Metafix metafix, final Record record, final List } } }, + lookup_rdf { + @Override + public void apply(final Metafix metafix, final Record record, final List params, final Map options) { + final Map map = extracted(metafix, record, params, options, KIND_OF_RDFMAP); + record.transform(params.get(0), oldValue -> { + final String newValue = map.get(oldValue); + return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; + }); + } + }, prepend { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { @@ -639,6 +669,8 @@ public void apply(final Metafix metafix, final Record record, final List } }; + public static final String KIND_OF_RDFMAP = "rdfmap"; + public static final String KIND_OF_FILEMAP = "filemap"; private static final Pattern NAMED_GROUP_PATTERN = Pattern.compile("\\(\\?<(.+?)>"); private static final String FILEMAP_SEPARATOR_OPTION = "sep_char"; @@ -647,5 +679,33 @@ public void apply(final Metafix metafix, final Record record, final List private static final String ERROR_STRING_OPTION = "error_string"; private static final Random RANDOM = new Random(); + private static String defaultValue; + + private static Map extracted(final Metafix metafix, final Record record, final List params, final Map options, final String kindOfMap) { + final Map map; + if (params.size() <= 1) { + map = options; + } + else { + final String mapName = params.get(1); + + if (!metafix.getMapNames().contains(mapName)) { + if (mapName.contains(".") || mapName.contains(File.separator)) { + if (kindOfMap.equals(KIND_OF_FILEMAP)) { + put_filemap.apply(metafix, record, Arrays.asList(mapName), options); + } + if (kindOfMap.equals(KIND_OF_RDFMAP)) { + put_rdfmap.apply(metafix, record, Arrays.asList(mapName), options); + } + } + else { + // Probably an unknown internal map? Log a warning? + } + } + map = metafix.getMap(mapName); + } + defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default' + return map; + } } diff --git a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java new file mode 100644 index 000000000..6e76e2584 --- /dev/null +++ b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java @@ -0,0 +1,297 @@ +/* + * Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al + * + * Licensed under the Apache License, Version 2.0 the "License"; + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.metafacture.metafix.maps; + +import org.metafacture.metafix.FixExecutionException; +import org.metafacture.metamorph.api.Maps; +import org.metafacture.metamorph.api.helpers.AbstractReadOnlyMap; + +import org.apache.jena.rdf.model.Model; +import org.apache.jena.rdf.model.Property; +import org.apache.jena.rdf.model.ResIterator; +import org.apache.jena.rdf.model.Resource; +import org.apache.jena.rdf.model.ResourceFactory; +import org.apache.jena.rdf.model.Statement; +import org.apache.jena.rdf.model.StmtIterator; +import org.apache.jena.riot.RDFDataMgr; +import org.apache.jena.riot.RiotNotFoundException; +import org.apache.jena.shared.PropertyNotFoundException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.io.UncheckedIOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.NoSuchElementException; +import java.util.Optional; +import java.util.Set; + +/** + * Provides a dynamically build {@link Map} based on an RDF resource. Can be one file or a comma separated list of RDF + * files or an HTTP(S) URI. + * The resources are supposed to be UTF-8 encoded. + *

+ * + * Important: When using a list of files make sure to set the proper separator. All lines that are not + * split in two parts by the separator are ignored! + * + * @author Markus Michael Geipel + * @author Pascal Christoph (dr0i) + * @see org.metafacture.metamorph.maps.FileMap + */ +public final class RdfMap extends AbstractReadOnlyMap { + private static String targetLanguage = ""; + private static String target; + private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class); + private Model model; + private boolean isUninitialized = true; + private final ArrayList filenames = new ArrayList<>(); + private final Map map = new HashMap<>(); + + /** + * Creates an instance of {@link RdfMap}. + */ + public RdfMap() { + targetLanguage = ""; + } + + private void init() { + loadFiles(); + if (!map.containsKey(Maps.DEFAULT_MAP_KEY)) { + setDefault(Maps.DEFAULT_MAP_KEY); + } + final String[] nsPrefixAndProperty = target.split(":"); + if (nsPrefixAndProperty.length == 2) { + target = model.getNsPrefixURI(nsPrefixAndProperty[0]) + nsPrefixAndProperty[1]; + } + isUninitialized = false; + } + + /** + * Sets a comma separated list of files which provides the {@link Model}. + * + * @param files a comma separated list of files + */ + public void setFiles(final String files) { + Collections.addAll(filenames, files.split("\\s*,\\s*")); + } + + /** + * Sets a file which provides the {@link Model}. + * + * @param file the file + */ + public void setFile(final String file) { + Collections.addAll(filenames, file); + } + + private void loadFiles() { + filenames.forEach(this::loadFile); + } + + private void loadFile(final String file) { + try { + if (model == null) { + model = RDFDataMgr.loadModel(file); + } + else { + RDFDataMgr.read(model, file); + } + } + catch (final RiotNotFoundException e) { + throw new FixExecutionException("rdf file: cannot read file", e); + } + } + + private InputStream openStream(final String file) { + return openAsFile(file).orElseGet(() -> openAsResource(file).orElseGet(() -> openAsUrl(file).orElseThrow(() -> new FixExecutionException("File not found: " + file)))); + } + + private Optional openAsFile(final String file) { + try { + return Optional.of(new FileInputStream(file)); + } + catch (final FileNotFoundException e) { + return Optional.empty(); + } + } + + private Optional openAsResource(final String file) { + return Optional.ofNullable(Thread.currentThread().getContextClassLoader().getResourceAsStream(file)); + } + + private Optional openAsUrl(final String file) { + final URL url; + try { + url = new URL(file); + } + catch (final MalformedURLException e) { + return Optional.empty(); + } + try { + return Optional.of(url.openStream()); + } + catch (final IOException e) { + throw new UncheckedIOException(e); + } + } + + /** + * Builds a Map dynamically by querying an RDF model based on a key and a targeted Property + * (to be set in {@link RdfMap#setTarget(String)}) and an optional language tag (to be set in + * {@link RdfMap#setTargetLanguage}). + *
+ * The Map acts as a cache. + *

+ * To minimize the need of parameters three different querying modes are gone through. If one fails, the next one is + * tried: + *

+ * 1. Get the value of the targeted Property of a Subject + *
+ * 2. Get the Subject matching a targeted Property value + *
+ * 3. Get the value of a Property using the value of a targeted Property + * + * @param key the Property value, or a Subject, to be looked up + */ + @Override + public String get(final Object key) { + if (isUninitialized) { + init(); + } + String ret = Maps.DEFAULT_MAP_KEY; + if (map.containsKey(key.toString())) { + ret = map.get(key.toString()); + } + else { + final Resource resource = ResourceFactory.createResource(key.toString()); + final Property targetProperty = ResourceFactory.createProperty(target); + try { + //first try to get LITERAL using SUBJECT and PROPERTY + if (!RdfMap.targetLanguage.isEmpty()) { + ret = model.getRequiredProperty(resource, targetProperty, RdfMap.targetLanguage).getString(); + } + else { + ret = model.getRequiredProperty(resource, targetProperty).getString(); + } + } + catch (final PropertyNotFoundException | NullPointerException | NoSuchElementException e) { + //second try to get SUBJECT using PROPERTY and LITERAL + ret = getSubjectUsingPropertyAndLiteral(key, targetProperty); + //third try: get LITERAL of PREDICATE A using PREDICATE B + if (ret == Maps.DEFAULT_MAP_KEY) { + ret = getLiteralOfPredicateUsingOtherPredicate(key, targetProperty); + } + else { + LOG.info("Could not lookup:'" + key + "@" + (RdfMap.targetLanguage.isEmpty() ? RdfMap.targetLanguage : "") + " for " + target + "'. Going with default value."); + } + } + map.put(key.toString(), ret); + } + return ret; + } + + private String getLiteralOfPredicateUsingOtherPredicate(final Object key, final Property targetProperty) { + Resource resource; + final ResIterator iter; + String ret = map.get(Maps.DEFAULT_MAP_KEY); + iter = model.listSubjectsWithProperty(targetProperty); + while (iter.hasNext()) { + resource = iter.nextResource(); + if (resource.getProperty(targetProperty).getString().equals(key.toString())) { + Statement stmt = resource.getProperty(targetProperty); + final StmtIterator iterProp = resource.listProperties(targetProperty); + while (iterProp.hasNext()) { + stmt = iterProp.nextStatement(); + if (stmt.getLanguage().equals(RdfMap.targetLanguage) && !stmt.getString().equals(key)) { + ret = stmt.getString(); + } + } + } + } + return ret; + } + + private String getSubjectUsingPropertyAndLiteral(final Object key, final Property targetProperty) { + Resource resource; + String ret = map.get(Maps.DEFAULT_MAP_KEY); + final ResIterator iter = model.listSubjectsWithProperty(targetProperty); + while (iter.hasNext()) { + resource = iter.nextResource(); + if (resource.getProperty(targetProperty).getString().equals(key.toString())) { + if (!RdfMap.targetLanguage.isEmpty()) { + if (resource.getProperty(targetProperty).getLanguage().equals(RdfMap.targetLanguage)) { + ret = resource.getURI(); + } + } + else { + ret = resource.getURI(); + } + } + } + return ret; + } + + @Override + public Set keySet() { + if (isUninitialized) { + init(); + } + return Collections.unmodifiableSet(map.keySet()); + } + + /** + * Sets the language of the target Property which is queried in the RDF. Valid values are defined by BCP47. + *
+ * Setting the language of the target Property is optional. + * + * @param targetLanguage the language of the target Property to be queried + */ + public void setTargetLanguage(final String targetLanguage) { + RdfMap.targetLanguage = targetLanguage; + } + + /** + * Sets the target Property which is queried in the RDF. Namespaces are allowed. + *
+ * Setting a target Property is mandatory. + * + * @param target the Property to be queried + */ + public void setTarget(final String target) { + RdfMap.target = target; + } + + /** + * Sets the default value returned if the key couldn't be found. + *
+ * Default value: {@link Maps#DEFAULT_MAP_KEY} + * + * @param defaultValue the default value returned + */ + public void setDefault(final String defaultValue) { + map.put(Maps.DEFAULT_MAP_KEY, defaultValue); + } +} diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index 2b0f3e842..b3e1b3355 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -37,9 +37,11 @@ public class MetafixLookupTest { private static final String CSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.csv"; + private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl"; private static final String TSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.tsv"; private static final String LOOKUP = "lookup('title.*',"; + private static final String LOOKUP_IN_RDF = "lookup_in_rdf('prefLabel.*',"; @Mock private StreamReceiver streamReceiver; @@ -956,6 +958,99 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException { MetafixTestHelpers.assertTempFile("you\ntoo\n", p -> shouldPrintUnknown(", destination: '" + p + "', append: 'false'", null, "")); } + @Test + public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { + MetafixTestHelpers.assertFix(streamReceiver, + Arrays.asList("lookup_rdf('created'," + " '" + RDF_MAP + "', target:\"created\", __default:\"0000-01-01\")" + ), + i -> { + i.startRecord("1"); + i.literal("created", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("created", "0000-01-01"); + o.get().endRecord(); + } + ); + } + + @Test + public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() { + MetafixTestHelpers.assertFix(streamReceiver, + Arrays.asList("lookup_rdf('notation'," + " '" + RDF_MAP + "', target:\"skos:notation\")" + ), + i -> { + i.startRecord("1"); + i.literal("notation", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("notation", "4"); + o.get().endRecord(); + } + ); + } + + @Test //Scenario 1: + public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() { + MetafixTestHelpers.assertFix(streamReceiver, + Arrays.asList("set_array('prefLabel', 'https://w3id.org/kim/hochschulfaechersystematik/n4')", + "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", target_language:\"de\" )" + ), + i -> { + i.startRecord("1"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); + o.get().endRecord(); + } + ); + } + + @Test //Scenario 2: + public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecificLanguage() { + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList("set_array('id', 'Mathematics, Natural Sciences')", + "lookup_rdf('id.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", " + + "target_language:\"en\" )" + ), + i -> { + i.startRecord("1"); + i.literal("prefLabel", "Mathematics, Natural Science"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Mathematics, Natural Science"); + o.get().literal("id", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + o.get().endRecord(); + } + ); + } + + @Test //Scenario 3: + public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage() { + MetafixTestHelpers.assertFix(streamReceiver, + Arrays.asList("set_array('prefLabel', 'Mathematics, Natural Sciences')", + "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", " + + "target_language:\"de\" )" + ), + i -> { + i.startRecord("1"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); + o.get().endRecord(); + } + ); + } + private void assertMap(final String... fixDef) { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(fixDef), i -> { diff --git a/metafix/src/test/resources/org/metafacture/metafix/maps/test.ttl b/metafix/src/test/resources/org/metafacture/metafix/maps/test.ttl new file mode 100644 index 000000000..6229e9ea4 --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/maps/test.ttl @@ -0,0 +1,12 @@ +@base . +@prefix dct: . +@prefix skos: . +@prefix schema: . +@prefix vann: . + + a skos:Concept ; + skos:prefLabel "Mathematik, Naturwissenschaften"@de, "Mathematics, Natural Sciences"@en ; + skos:narrower , , , , , , , ; + skos:notation "4" ; + skos:topConceptOf . + From 2cba8491d82a1a631f979a697d8a155e9d2786fb Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Mon, 5 Sep 2022 17:14:45 +0200 Subject: [PATCH 02/29] Fix usage without namespace (metafacture-core#415) --- .../org/metafacture/metafix/maps/RdfMap.java | 49 ++----------------- .../metafix/MetafixLookupTest.java | 37 ++++++++------ 2 files changed, 27 insertions(+), 59 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java index 6e76e2584..06afef654 100644 --- a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java +++ b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java @@ -1,5 +1,5 @@ /* - * Copyright 2013, 2014, 2021 Deutsche Nationalbibliothek et al + * Copyright 2022 hbz * * Licensed under the Apache License, Version 2.0 the "License"; * you may not use this file except in compliance with the License. @@ -33,19 +33,11 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStream; -import java.io.UncheckedIOException; -import java.net.MalformedURLException; -import java.net.URL; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Optional; import java.util.Set; /** @@ -82,9 +74,9 @@ private void init() { if (!map.containsKey(Maps.DEFAULT_MAP_KEY)) { setDefault(Maps.DEFAULT_MAP_KEY); } - final String[] nsPrefixAndProperty = target.split(":"); - if (nsPrefixAndProperty.length == 2) { - target = model.getNsPrefixURI(nsPrefixAndProperty[0]) + nsPrefixAndProperty[1]; + if (!target.toLowerCase().startsWith("http")) { + final String[] nsPrefixAndProperty = target.split(":"); + target = nsPrefixAndProperty.length == 2 ? model.getNsPrefixURI(nsPrefixAndProperty[0]) + nsPrefixAndProperty[1] : nsPrefixAndProperty[0]; } isUninitialized = false; } @@ -125,39 +117,6 @@ private void loadFile(final String file) { } } - private InputStream openStream(final String file) { - return openAsFile(file).orElseGet(() -> openAsResource(file).orElseGet(() -> openAsUrl(file).orElseThrow(() -> new FixExecutionException("File not found: " + file)))); - } - - private Optional openAsFile(final String file) { - try { - return Optional.of(new FileInputStream(file)); - } - catch (final FileNotFoundException e) { - return Optional.empty(); - } - } - - private Optional openAsResource(final String file) { - return Optional.ofNullable(Thread.currentThread().getContextClassLoader().getResourceAsStream(file)); - } - - private Optional openAsUrl(final String file) { - final URL url; - try { - url = new URL(file); - } - catch (final MalformedURLException e) { - return Optional.empty(); - } - try { - return Optional.of(url.openStream()); - } - catch (final IOException e) { - throw new UncheckedIOException(e); - } - } - /** * Builds a Map dynamically by querying an RDF model based on a key and a targeted Property * (to be set in {@link RdfMap#setTarget(String)}) and an optional language tag (to be set in diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index b3e1b3355..78748c8ff 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -1033,21 +1033,30 @@ public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecifi } @Test //Scenario 3: - public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage() { + public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguageUsingNamespace() { + shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage("skos:prefLabel"); + } + + @Test //Scenario 3 without namespace : + public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguageWithoutNamespace() { + shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage("http://www.w3.org/2004/02/skos/core#prefLabel"); + } + + private void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage(final String target) { MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("set_array('prefLabel', 'Mathematics, Natural Sciences')", - "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", " + - "target_language:\"de\" )" - ), - i -> { - i.startRecord("1"); - i.endRecord(); - }, - o -> { - o.get().startRecord("1"); - o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); - o.get().endRecord(); - } + Arrays.asList("set_array('prefLabel', 'Mathematics, Natural Sciences')", + "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"" + target + "\", " + + "target_language:\"de\" )" + ), + i -> { + i.startRecord("1"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); + o.get().endRecord(); + } ); } From 1819aff50a053afb7b25a90e97081e5769c9e705 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 6 Sep 2022 16:08:39 +0200 Subject: [PATCH 03/29] Add follow URL redirections (metafacture-core#415) --- .../org/metafacture/metafix/maps/RdfMap.java | 80 +++++++++++++++---- .../metafix/MetafixLookupTest.java | 21 ++++- 2 files changed, 82 insertions(+), 19 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java index 06afef654..97304c75e 100644 --- a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java +++ b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java @@ -28,21 +28,23 @@ import org.apache.jena.rdf.model.Statement; import org.apache.jena.rdf.model.StmtIterator; import org.apache.jena.riot.RDFDataMgr; -import org.apache.jena.riot.RiotNotFoundException; import org.apache.jena.shared.PropertyNotFoundException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLConnection; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.Map; import java.util.NoSuchElementException; -import java.util.Set; /** * Provides a dynamically build {@link Map} based on an RDF resource. Can be one file or a comma separated list of RDF - * files or an HTTP(S) URI. + * files or an HTTP(S) URI. Redirections of HTTP(S) URIs are followed. * The resources are supposed to be UTF-8 encoded. *

* @@ -57,6 +59,10 @@ public final class RdfMap extends AbstractReadOnlyMap { private static String targetLanguage = ""; private static String target; private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class); + private static final int MAX_REDIRECTIONS = 10; + private static final int MIN_HTTP_STATUS_CODE = 299; + private static final int MAX_HTTP_STATUS_CODE = 400; + private Model model; private boolean isUninitialized = true; private final ArrayList filenames = new ArrayList<>(); @@ -104,15 +110,19 @@ private void loadFiles() { } private void loadFile(final String file) { + String f = file; try { + if (file.toLowerCase().startsWith("http")) { + f = read(file); + } if (model == null) { - model = RDFDataMgr.loadModel(file); + model = RDFDataMgr.loadModel(f); } else { - RDFDataMgr.read(model, file); + RDFDataMgr.read(model, f); } } - catch (final RiotNotFoundException e) { + catch (final IOException e) { throw new FixExecutionException("rdf file: cannot read file", e); } } @@ -140,7 +150,7 @@ public String get(final Object key) { if (isUninitialized) { init(); } - String ret = Maps.DEFAULT_MAP_KEY; + String ret; if (map.containsKey(key.toString())) { ret = map.get(key.toString()); } @@ -160,7 +170,7 @@ public String get(final Object key) { //second try to get SUBJECT using PROPERTY and LITERAL ret = getSubjectUsingPropertyAndLiteral(key, targetProperty); //third try: get LITERAL of PREDICATE A using PREDICATE B - if (ret == Maps.DEFAULT_MAP_KEY) { + if (ret.equals(Maps.DEFAULT_MAP_KEY)) { ret = getLiteralOfPredicateUsingOtherPredicate(key, targetProperty); } else { @@ -180,7 +190,7 @@ private String getLiteralOfPredicateUsingOtherPredicate(final Object key, final while (iter.hasNext()) { resource = iter.nextResource(); if (resource.getProperty(targetProperty).getString().equals(key.toString())) { - Statement stmt = resource.getProperty(targetProperty); + Statement stmt; final StmtIterator iterProp = resource.listProperties(targetProperty); while (iterProp.hasNext()) { stmt = iterProp.nextStatement(); @@ -213,14 +223,6 @@ private String getSubjectUsingPropertyAndLiteral(final Object key, final Propert return ret; } - @Override - public Set keySet() { - if (isUninitialized) { - init(); - } - return Collections.unmodifiableSet(map.keySet()); - } - /** * Sets the language of the target Property which is queried in the RDF. Valid values are defined by BCP47. *
@@ -253,4 +255,48 @@ public void setTarget(final String target) { public void setDefault(final String defaultValue) { map.put(Maps.DEFAULT_MAP_KEY, defaultValue); } + + /** + * Gets a redirected URL, if any redirection takes place. Adapted predated code from org.apache.jena.rdfxml.xmlinput.JenaReader. + * + * @Deprecated Using newer jena version (needs java 11) this method would be obsolete. + * @param url the URL to resolve + * @return the (redirected) URL + * @throws IOException if any IO error occurs + */ + private String read(final String url) throws IOException { + String connectionURL = url; + try { + int count = 0; + URLConnection conn; + while (true) { + final URLConnection conn2 = new URL(connectionURL).openConnection(); + if (!(conn2 instanceof HttpURLConnection)) { + conn = conn2; + break; + } + count += 1; + if (count > MAX_REDIRECTIONS) { + throw new IOException("Too many redirects followed for " + url); + } + final HttpURLConnection httpURLConnection = (HttpURLConnection) conn2; + conn2.setRequestProperty("accept", "*/*"); + final int statusCode = httpURLConnection.getResponseCode(); + if (statusCode <= MIN_HTTP_STATUS_CODE || statusCode >= MAX_HTTP_STATUS_CODE) { + conn = conn2; + break; + } + // Redirect + connectionURL = conn2.getHeaderField("Location"); + if (connectionURL == null || url.equals(connectionURL)) { + throw new IOException("Failed to follow redirects for " + url); + } + } + connectionURL = conn.getURL().toString(); + } + catch (final IOException e) { + throw new IOException(e); + } + return connectionURL; + } } diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index 78748c8ff..18617b93d 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -38,10 +38,9 @@ public class MetafixLookupTest { private static final String CSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.csv"; private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl"; + private static final String RDF_URL = "http://purl.org/lobid/rpb"; private static final String TSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.tsv"; - private static final String LOOKUP = "lookup('title.*',"; - private static final String LOOKUP_IN_RDF = "lookup_in_rdf('prefLabel.*',"; @Mock private StreamReceiver streamReceiver; @@ -994,6 +993,24 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate( ); } + @Test + public void shouldLookupRdfUrlWithRedirection() { + MetafixTestHelpers.assertFix(streamReceiver, + Arrays.asList("lookup_rdf('prefLabel'," + " '" + RDF_URL + "', target:\"skos:prefLabel\")" + ), + i -> { + i.startRecord("1"); + i.literal("prefLabel", "http://purl.org/lobid/rpb#n882022"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Presserecht"); + o.get().endRecord(); + } + ); + } + @Test //Scenario 1: public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() { MetafixTestHelpers.assertFix(streamReceiver, From 5dea28eba070d48d97871eb56924f6e76fbec53f Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Thu, 8 Sep 2022 16:34:38 +0200 Subject: [PATCH 04/29] Clean up formatting for SKOS lookup tests. (#229) --- .../metafix/MetafixLookupTest.java | 60 +++++++++---------- 1 file changed, 30 insertions(+), 30 deletions(-) diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index 18617b93d..e4e7ebcc1 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -40,6 +40,7 @@ public class MetafixLookupTest { private static final String RDF_MAP = "src/test/resources/org/metafacture/metafix/maps/test.ttl"; private static final String RDF_URL = "http://purl.org/lobid/rpb"; private static final String TSV_MAP = "src/test/resources/org/metafacture/metafix/maps/test.tsv"; + private static final String LOOKUP = "lookup('title.*',"; @Mock @@ -959,8 +960,8 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException { @Test public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { - MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("lookup_rdf('created'," + " '" + RDF_MAP + "', target:\"created\", __default:\"0000-01-01\")" + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')" ), i -> { i.startRecord("1"); @@ -977,8 +978,8 @@ public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { @Test public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() { - MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("lookup_rdf('notation'," + " '" + RDF_MAP + "', target:\"skos:notation\")" + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')" ), i -> { i.startRecord("1"); @@ -995,8 +996,8 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate( @Test public void shouldLookupRdfUrlWithRedirection() { - MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("lookup_rdf('prefLabel'," + " '" + RDF_URL + "', target:\"skos:prefLabel\")" + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "lookup_rdf('prefLabel', '" + RDF_URL + "', target: 'skos:prefLabel')" ), i -> { i.startRecord("1"); @@ -1011,11 +1012,11 @@ public void shouldLookupRdfUrlWithRedirection() { ); } - @Test //Scenario 1: + @Test // Scenario 1 public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateOfSpecificLanguage() { - MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("set_array('prefLabel', 'https://w3id.org/kim/hochschulfaechersystematik/n4')", - "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", target_language:\"de\" )" + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "set_array('prefLabel', 'https://w3id.org/kim/hochschulfaechersystematik/n4')", + "lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: 'skos:prefLabel', target_language: 'de')" ), i -> { i.startRecord("1"); @@ -1029,11 +1030,11 @@ public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicateO ); } - @Test //Scenario 2: + @Test // Scenario 2 public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecificLanguage() { - MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList("set_array('id', 'Mathematics, Natural Sciences')", - "lookup_rdf('id.*'," + " '" + RDF_MAP + "', target:\"skos:prefLabel\", " + - "target_language:\"en\" )" + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "set_array('id', 'Mathematics, Natural Sciences')", + "lookup_rdf('id.*', '" + RDF_MAP + "', target: 'skos:prefLabel', target_language: 'en')" ), i -> { i.startRecord("1"); @@ -1049,31 +1050,30 @@ public void shouldLookupInExternalRdfMapGetSubjectWithTargetedPredicateOfSpecifi ); } - @Test //Scenario 3: + @Test // Scenario 3 public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguageUsingNamespace() { shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage("skos:prefLabel"); } - @Test //Scenario 3 without namespace : + @Test // Scenario 3 without namespace public void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguageWithoutNamespace() { shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage("http://www.w3.org/2004/02/skos/core#prefLabel"); } private void shouldLookupInExternalRdfMapGetObjectWithTargetedPredicateOfSpecificLanguage(final String target) { - MetafixTestHelpers.assertFix(streamReceiver, - Arrays.asList("set_array('prefLabel', 'Mathematics, Natural Sciences')", - "lookup_rdf('prefLabel.*'," + " '" + RDF_MAP + "', target:\"" + target + "\", " + - "target_language:\"de\" )" - ), - i -> { - i.startRecord("1"); - i.endRecord(); - }, - o -> { - o.get().startRecord("1"); - o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); - o.get().endRecord(); - } + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "set_array('prefLabel', 'Mathematics, Natural Sciences')", + "lookup_rdf('prefLabel.*', '" + RDF_MAP + "', target: '" + target + "', target_language: 'de')" + ), + i -> { + i.startRecord("1"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("prefLabel", "Mathematik, Naturwissenschaften"); + o.get().endRecord(); + } ); } From 78260629beaaf5830eb23569a4a8af73ef6ae379 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 15 Sep 2022 11:52:31 +0200 Subject: [PATCH 05/29] Use camelCase (metafacture-core#415) --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 1b429fcf8..894cd98fd 100644 --- a/README.md +++ b/README.md @@ -611,9 +611,9 @@ lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown Looks up matching values in an RDF resource and replaces the field value with this match. A file as well as an HTTP(S) resource can be used. ```perl -lookup_rdf("", "", target: "") -lookup_rdf("", "", target: "", target_language: "") -lookup_rdf("", "", target: "", __default: "NA") +lookup_rdf("", "", target: "") +lookup_rdf("", "", target: "", target_language: "") +lookup_rdf("", "", target: "", __default: "NA") ``` ##### `prepend` From 4b341789f9a4395b2c28a95fae8d544ad55c7cf2 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 15 Sep 2022 11:56:51 +0200 Subject: [PATCH 06/29] Use withOption(...) Co-authored-by: Jens Wille --- .../main/java/org/metafacture/metafix/FixMethod.java | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index de7800b61..f9c6aebac 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -99,15 +99,9 @@ public void apply(final Metafix metafix, final Record record, final List final String fileName = params.get(0); final RdfMap rdf = new RdfMap(); rdf.setFile(metafix.resolvePath(fileName)); - if (options.containsKey("target_language")) { - rdf.setTargetLanguage(options.get("target_language")); - } - if (options.containsKey("target")) { - rdf.setTarget(options.get("target")); - } - if (options.containsKey(Maps.DEFAULT_MAP_KEY)) { - rdf.setDefault(options.get(Maps.DEFAULT_MAP_KEY)); - } + withOption(options, "target", rdf::setTarget); + withOption(options, "target_language", rdf::setTargetLanguage); + withOption(options, Maps.DEFAULT_MAP_KEY, rdf::setDefault); metafix.putMap(params.size() > 1 ? params.get(1) : fileName, rdf); } }, From 1d347309782d6d5661240dbb5eef1697ec35214a Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 16 Sep 2022 15:52:40 +0200 Subject: [PATCH 07/29] Rename method (metafacture-core#415) --- metafix/src/main/java/org/metafacture/metafix/FixMethod.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index f9c6aebac..8684de650 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -483,7 +483,6 @@ public void apply(final Metafix metafix, final Record record, final List @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { -<<<<<<< HEAD final Map map; if (params.size() <= 1) { @@ -534,7 +533,7 @@ public void apply(final Metafix metafix, final Record record, final List lookup_rdf { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - final Map map = extracted(metafix, record, params, options, KIND_OF_RDFMAP); + final Map map = getMap(metafix, record, params, options, KIND_OF_RDFMAP); record.transform(params.get(0), oldValue -> { final String newValue = map.get(oldValue); return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; @@ -675,7 +674,7 @@ public void apply(final Metafix metafix, final Record record, final List private static final Random RANDOM = new Random(); private static String defaultValue; - private static Map extracted(final Metafix metafix, final Record record, final List params, final Map options, final String kindOfMap) { + private static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final String kindOfMap) { final Map map; if (params.size() <= 1) { map = options; From b11a77ee94b24d5702c6540c3bf3607451ebced9 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 16 Sep 2022 16:10:17 +0200 Subject: [PATCH 08/29] Set default value if not defined (metafacture-core#415) --- .../org/metafacture/metafix/FixMethod.java | 2 +- .../metafix/MetafixLookupTest.java | 20 ++++++++++++++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index 8684de650..ccec7c15b 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -535,7 +535,7 @@ public void apply(final Metafix metafix, final Record record, final List public void apply(final Metafix metafix, final Record record, final List params, final Map options) { final Map map = getMap(metafix, record, params, options, KIND_OF_RDFMAP); record.transform(params.get(0), oldValue -> { - final String newValue = map.get(oldValue); + final String newValue = map.getOrDefault(oldValue, defaultValue); return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; }); } diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index e4e7ebcc1..72cad7646 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -959,7 +959,7 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException { } @Test - public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { + public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( "lookup_rdf('created', '" + RDF_MAP + "', target: 'created', __default: '0000-01-01')" ), @@ -976,6 +976,24 @@ public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { ); } + @Test + public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( + "lookup_rdf('created', '" + RDF_MAP + "', target: 'created')" + ), + i -> { + i.startRecord("1"); + i.literal("created", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("created", "__default"); + o.get().endRecord(); + } + ); + } + @Test public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( From 234ab5ada08ee0f62eac2d328224532946446a55 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 20 Sep 2022 16:55:57 +0200 Subject: [PATCH 09/29] Fix put_rdfmap (metafacture-core#415) - add tests - add to README --- README.md | 22 ++++-- .../org/metafacture/metafix/FixMethod.java | 22 +++--- .../org/metafacture/metafix/maps/RdfMap.java | 58 ++++++++++----- .../metafix/MetafixLookupTest.java | 71 +++++++++++++++---- 4 files changed, 129 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 894cd98fd..fc573d856 100644 --- a/README.md +++ b/README.md @@ -200,6 +200,17 @@ Options: - `key_column`: Defines the column to be used for keys. Uses zero index. (Default: `0`) - `value_column`: Defines the column to be used for values. Uses zero index. (Default: `1`) +##### `put_rdfmap` + +Defines an external RDF map for lookup from a file or an HTTP(S) resource. +As the RDF map is reducing RDF triples to a key/value map it is mandatory to set the target. +The targeted RDF property can optional be bound by an RDF language tag. + +```perl +put_rdfmap("", "", target: "") +put_rdfmap("", "", target: ", target_language: """) +``` + ##### `put_map` Defines an internal map for [lookup](#lookup) from key/value pairs. @@ -608,12 +619,15 @@ lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown ##### `lookup_rdf` -Looks up matching values in an RDF resource and replaces the field value with this match. A file as well as an HTTP(S) resource can be used. +Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used. +The targeted RDF property can optional be bound by an RDF language tag. ```perl -lookup_rdf("", "", target: "") -lookup_rdf("", "", target: "", target_language: "") -lookup_rdf("", "", target: "", __default: "NA") +lookup_rdf("", "", target: "") +lookup_rdf("", "", target: "") +lookup_rdf("", "", target: "") +lookup_rdf("", "", target: "", target_language: "") +lookup_rdf("", "", target: "", __default: "NA") ``` ##### `prepend` diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index ccec7c15b..22c029e36 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -96,13 +96,14 @@ public void apply(final Metafix metafix, final Record record, final List put_rdfmap { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - final String fileName = params.get(0); + final String resourceName = params.get(0); final RdfMap rdf = new RdfMap(); - rdf.setFile(metafix.resolvePath(fileName)); - withOption(options, "target", rdf::setTarget); - withOption(options, "target_language", rdf::setTargetLanguage); + rdf.setResource(metafix.resolvePath(resourceName)); + withOption(options, RdfMap.TARGET, rdf::setTarget); + withOption(options, RdfMap.TARGET_LANGUAGE, rdf::setTargetLanguage); withOption(options, Maps.DEFAULT_MAP_KEY, rdf::setDefault); - metafix.putMap(params.size() > 1 ? params.get(1) : fileName, rdf); + final String mapName = (params.size() > 1 ? params.get(1) : params.get(0)) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + metafix.putMap(mapName, rdf); } }, put_var { @@ -680,15 +681,20 @@ private static Map getMap(final Metafix metafix, final Record re map = options; } else { - final String mapName = params.get(1); - + final String mapName; + if (kindOfMap.equals(KIND_OF_FILEMAP)) { + mapName = params.get(1); + } + else { + mapName = params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + } if (!metafix.getMapNames().contains(mapName)) { if (mapName.contains(".") || mapName.contains(File.separator)) { if (kindOfMap.equals(KIND_OF_FILEMAP)) { put_filemap.apply(metafix, record, Arrays.asList(mapName), options); } if (kindOfMap.equals(KIND_OF_RDFMAP)) { - put_rdfmap.apply(metafix, record, Arrays.asList(mapName), options); + put_rdfmap.apply(metafix, record, Arrays.asList(params.get(1)), options); } } else { diff --git a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java index 97304c75e..5bcc69b18 100644 --- a/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java +++ b/metafix/src/main/java/org/metafacture/metafix/maps/RdfMap.java @@ -56,30 +56,30 @@ * @see org.metafacture.metamorph.maps.FileMap */ public final class RdfMap extends AbstractReadOnlyMap { - private static String targetLanguage = ""; - private static String target; - private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class); + public static final String TARGET = "target"; + public static final String TARGET_LANGUAGE = "target_language"; private static final int MAX_REDIRECTIONS = 10; private static final int MIN_HTTP_STATUS_CODE = 299; private static final int MAX_HTTP_STATUS_CODE = 400; - + private static final Logger LOG = LoggerFactory.getLogger(RdfMap.class); private Model model; private boolean isUninitialized = true; private final ArrayList filenames = new ArrayList<>(); private final Map map = new HashMap<>(); + private String targetLanguage = ""; + private String target; /** * Creates an instance of {@link RdfMap}. */ public RdfMap() { targetLanguage = ""; + setDefault(Maps.DEFAULT_MAP_KEY); + } private void init() { loadFiles(); - if (!map.containsKey(Maps.DEFAULT_MAP_KEY)) { - setDefault(Maps.DEFAULT_MAP_KEY); - } if (!target.toLowerCase().startsWith("http")) { final String[] nsPrefixAndProperty = target.split(":"); target = nsPrefixAndProperty.length == 2 ? model.getNsPrefixURI(nsPrefixAndProperty[0]) + nsPrefixAndProperty[1] : nsPrefixAndProperty[0]; @@ -101,7 +101,7 @@ public void setFiles(final String files) { * * @param file the file */ - public void setFile(final String file) { + public void setResource(final String file) { Collections.addAll(filenames, file); } @@ -147,20 +147,20 @@ private void loadFile(final String file) { */ @Override public String get(final Object key) { - if (isUninitialized) { - init(); - } String ret; if (map.containsKey(key.toString())) { ret = map.get(key.toString()); } else { + if (isUninitialized) { + init(); + } final Resource resource = ResourceFactory.createResource(key.toString()); final Property targetProperty = ResourceFactory.createProperty(target); try { //first try to get LITERAL using SUBJECT and PROPERTY - if (!RdfMap.targetLanguage.isEmpty()) { - ret = model.getRequiredProperty(resource, targetProperty, RdfMap.targetLanguage).getString(); + if (!targetLanguage.isEmpty()) { + ret = model.getRequiredProperty(resource, targetProperty, targetLanguage).getString(); } else { ret = model.getRequiredProperty(resource, targetProperty).getString(); @@ -174,7 +174,7 @@ public String get(final Object key) { ret = getLiteralOfPredicateUsingOtherPredicate(key, targetProperty); } else { - LOG.info("Could not lookup:'" + key + "@" + (RdfMap.targetLanguage.isEmpty() ? RdfMap.targetLanguage : "") + " for " + target + "'. Going with default value."); + LOG.info("Could not lookup:'" + key + (targetLanguage.isEmpty() ? "@" + targetLanguage : "") + " for " + target + "'. Going with default value."); } } map.put(key.toString(), ret); @@ -194,7 +194,7 @@ private String getLiteralOfPredicateUsingOtherPredicate(final Object key, final final StmtIterator iterProp = resource.listProperties(targetProperty); while (iterProp.hasNext()) { stmt = iterProp.nextStatement(); - if (stmt.getLanguage().equals(RdfMap.targetLanguage) && !stmt.getString().equals(key)) { + if (stmt.getLanguage().equals(targetLanguage) && !stmt.getString().equals(key)) { ret = stmt.getString(); } } @@ -210,8 +210,8 @@ private String getSubjectUsingPropertyAndLiteral(final Object key, final Propert while (iter.hasNext()) { resource = iter.nextResource(); if (resource.getProperty(targetProperty).getString().equals(key.toString())) { - if (!RdfMap.targetLanguage.isEmpty()) { - if (resource.getProperty(targetProperty).getLanguage().equals(RdfMap.targetLanguage)) { + if (!this.targetLanguage.isEmpty()) { + if (resource.getProperty(targetProperty).getLanguage().equals(targetLanguage)) { ret = resource.getURI(); } } @@ -223,6 +223,16 @@ private String getSubjectUsingPropertyAndLiteral(final Object key, final Propert return ret; } + /** + * Gets the language of the target Property which is queried in the RDF. Valid values are defined by BCP47. + *
+ * + * @return the targeted language + */ + public String getTargetLanguage() { + return targetLanguage; + } + /** * Sets the language of the target Property which is queried in the RDF. Valid values are defined by BCP47. *
@@ -231,7 +241,17 @@ private String getSubjectUsingPropertyAndLiteral(final Object key, final Propert * @param targetLanguage the language of the target Property to be queried */ public void setTargetLanguage(final String targetLanguage) { - RdfMap.targetLanguage = targetLanguage; + this.targetLanguage = targetLanguage; + } + + /** + * Gets the target Property which is queried in the RDF. Namespaces are allowed. + *
+ * + * @return the target Property to be queried + */ + public String getTarget() { + return target; } /** @@ -242,7 +262,7 @@ public void setTargetLanguage(final String targetLanguage) { * @param target the Property to be queried */ public void setTarget(final String target) { - RdfMap.target = target; + this.target = target; } /** diff --git a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java index 72cad7646..2ff1d429c 100644 --- a/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java +++ b/metafix/src/test/java/org/metafacture/metafix/MetafixLookupTest.java @@ -958,6 +958,30 @@ public void shouldPrintUnknownToFileWithoutAppend() throws IOException { MetafixTestHelpers.assertTempFile("you\ntoo\n", p -> shouldPrintUnknown(", destination: '" + p + "', append: 'false'", null, "")); } + @Test + public void shouldLookupInSeparateExternalRdfFileMapWithName() { + assertRdfMap( + "put_rdfmap('" + RDF_MAP + "', 'testMapSkosNotation', target: 'skos:notation')", + "lookup_rdf('notation', 'testMapSkosNotation', target: 'skos:notation')" + ); + } + + @Test + public void shouldLookupInSeparateExternalRdfFileMapWithDifferentTargets() { + assertRdfMapWithDifferentTargets( + "put_rdfmap('" + RDF_MAP + "', 'testRdfMapSkosNotation', target: 'skos:notation')", + "put_rdfmap('" + RDF_MAP + "', 'testRdfMapCreated', target: 'created')", + "lookup_rdf('notation', 'testRdfMapSkosNotation', target: 'skos:notation')", + "lookup_rdf('created', 'testRdfMapCreated', target: 'created')"); + } + + @Test + public void shouldLookupInExternalRdfWithDifferentTargets() { + assertRdfMapWithDifferentTargets( + "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')", + "lookup_rdf('created', '" + RDF_MAP + "', target: 'created')"); + } + @Test public void shouldLookupInExternalRdfUseDefinedDefaultValueIfNotFound() { MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( @@ -996,19 +1020,8 @@ public void shouldLookupInExternalRdfUseDefaultValueIfNotFound() { @Test public void shouldLookupInExternalRdfMapGetObjectOfSubjectWithTargetedPredicate() { - MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList( - "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')" - ), - i -> { - i.startRecord("1"); - i.literal("notation", "https://w3id.org/kim/hochschulfaechersystematik/n4"); - i.endRecord(); - }, - o -> { - o.get().startRecord("1"); - o.get().literal("notation", "4"); - o.get().endRecord(); - } + assertRdfMap( + "lookup_rdf('notation', '" + RDF_MAP + "', target: 'skos:notation')" ); } @@ -1114,4 +1127,36 @@ private void assertMap(final String... fixDef) { ); } + private void assertRdfMap(final String... fixDef) { + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(fixDef), + i -> { + i.startRecord("1"); + i.literal("notation", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("notation", "4"); + o.get().endRecord(); + } + ); + } + + private void assertRdfMapWithDifferentTargets(final String... fixDef) { + MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(fixDef), + i -> { + i.startRecord("1"); + i.literal("notation", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.literal("created", "https://w3id.org/kim/hochschulfaechersystematik/n4"); + i.endRecord(); + }, + o -> { + o.get().startRecord("1"); + o.get().literal("notation", "4"); + o.get().literal("created", "__default"); + o.get().endRecord(); + } + ); + } + } From c3418a4a892036f0c3e523b53b2d135e8d4b3c09 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 20 Sep 2022 17:02:23 +0200 Subject: [PATCH 10/29] Remove static defaultValue (metafacture-core#415) This was overriding every defaultValue, but maps should have the possibility to have different default values. --- metafix/src/main/java/org/metafacture/metafix/FixMethod.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index 22c029e36..4524d5421 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -536,7 +536,7 @@ public void apply(final Metafix metafix, final Record record, final List public void apply(final Metafix metafix, final Record record, final List params, final Map options) { final Map map = getMap(metafix, record, params, options, KIND_OF_RDFMAP); record.transform(params.get(0), oldValue -> { - final String newValue = map.getOrDefault(oldValue, defaultValue); + final String newValue = map.getOrDefault(oldValue, map.get(Maps.DEFAULT_MAP_KEY)); return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; }); } @@ -673,7 +673,6 @@ public void apply(final Metafix metafix, final Record record, final List private static final String ERROR_STRING_OPTION = "error_string"; private static final Random RANDOM = new Random(); - private static String defaultValue; private static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final String kindOfMap) { final Map map; @@ -703,7 +702,6 @@ private static Map getMap(final Metafix metafix, final Record re } map = metafix.getMap(mapName); } - defaultValue = map.get(Maps.DEFAULT_MAP_KEY); // TODO: Catmandu uses 'default' return map; } From 94ae82a5412bc0253d7b14fea18c8d96afab7795 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Tue, 20 Sep 2022 18:12:26 +0200 Subject: [PATCH 11/29] Extract lookup into FixFunction (metafacture-core#415) --- .../org/metafacture/metafix/FixMethod.java | 62 +++++-------------- .../metafacture/metafix/api/FixFunction.java | 32 ++++++++++ 2 files changed, 48 insertions(+), 46 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index 4524d5421..58927828e 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -23,7 +23,6 @@ import org.metafacture.metamorph.functions.Timestamp; import org.metafacture.metamorph.maps.FileMap; -import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.Collections; @@ -31,6 +30,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Random; import java.util.concurrent.atomic.LongAdder; import java.util.function.Consumer; @@ -96,14 +96,19 @@ public void apply(final Metafix metafix, final Record record, final List put_rdfmap { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - final String resourceName = params.get(0); - final RdfMap rdf = new RdfMap(); - rdf.setResource(metafix.resolvePath(resourceName)); - withOption(options, RdfMap.TARGET, rdf::setTarget); - withOption(options, RdfMap.TARGET_LANGUAGE, rdf::setTargetLanguage); - withOption(options, Maps.DEFAULT_MAP_KEY, rdf::setDefault); - final String mapName = (params.size() > 1 ? params.get(1) : params.get(0)) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); - metafix.putMap(mapName, rdf); + final String rdfMapName = params.size() == 1 ? params.get(0) : params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + final String replaceTargets = options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); + final String resourceName = Optional.ofNullable(params.get(0)) + .map(str -> str.replaceAll(replaceTargets + "$", "")) + .orElse(params.get(0)); + final RdfMap rdfMap = new RdfMap(); + + rdfMap.setResource(metafix.resolvePath(resourceName)); + withOption(options, RdfMap.TARGET, rdfMap::setTarget); + withOption(options, RdfMap.TARGET_LANGUAGE, rdfMap::setTargetLanguage); + withOption(options, Maps.DEFAULT_MAP_KEY, rdfMap::setDefault); + + metafix.putMap(rdfMapName, rdfMap); } }, put_var { @@ -484,6 +489,7 @@ public void apply(final Metafix metafix, final Record record, final List @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { +<<<<<<< HEAD final Map map; if (params.size() <= 1) { @@ -534,11 +540,7 @@ public void apply(final Metafix metafix, final Record record, final List lookup_rdf { @Override public void apply(final Metafix metafix, final Record record, final List params, final Map options) { - final Map map = getMap(metafix, record, params, options, KIND_OF_RDFMAP); - record.transform(params.get(0), oldValue -> { - final String newValue = map.getOrDefault(oldValue, map.get(Maps.DEFAULT_MAP_KEY)); - return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; - }); + lookup(metafix, record, params, options, put_rdfmap); } }, prepend { @@ -663,8 +665,6 @@ public void apply(final Metafix metafix, final Record record, final List } }; - public static final String KIND_OF_RDFMAP = "rdfmap"; - public static final String KIND_OF_FILEMAP = "filemap"; private static final Pattern NAMED_GROUP_PATTERN = Pattern.compile("\\(\\?<(.+?)>"); private static final String FILEMAP_SEPARATOR_OPTION = "sep_char"; @@ -674,35 +674,5 @@ public void apply(final Metafix metafix, final Record record, final List private static final Random RANDOM = new Random(); - private static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final String kindOfMap) { - final Map map; - if (params.size() <= 1) { - map = options; - } - else { - final String mapName; - if (kindOfMap.equals(KIND_OF_FILEMAP)) { - mapName = params.get(1); - } - else { - mapName = params.get(1) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, ""); - } - if (!metafix.getMapNames().contains(mapName)) { - if (mapName.contains(".") || mapName.contains(File.separator)) { - if (kindOfMap.equals(KIND_OF_FILEMAP)) { - put_filemap.apply(metafix, record, Arrays.asList(mapName), options); - } - if (kindOfMap.equals(KIND_OF_RDFMAP)) { - put_rdfmap.apply(metafix, record, Arrays.asList(params.get(1)), options); - } - } - else { - // Probably an unknown internal map? Log a warning? - } - } - map = metafix.getMap(mapName); - } - return map; - } } diff --git a/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java b/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java index 458a02335..0e3b6ff56 100644 --- a/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java +++ b/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java @@ -21,7 +21,11 @@ import org.metafacture.metafix.Metafix; import org.metafacture.metafix.Record; import org.metafacture.metafix.Value; +import org.metafacture.metafix.maps.RdfMap; +import org.metafacture.metamorph.api.Maps; +import java.io.File; +import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Map; @@ -107,4 +111,32 @@ default Stream flatten(final Stream stream) { )); } + default void lookup(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction kindOfMap) { + final Map map = getMap(metafix, record, params, options, kindOfMap); + record.transform(params.get(0), oldValue -> { + final String newValue = map.getOrDefault(oldValue, map.get(Maps.DEFAULT_MAP_KEY)); + return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; + }); + } + + static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction kindOfMap) { + final Map map; + if (params.size() <= 1) { + map = options; + } + else { + final String mapName = kindOfMap.toString().equals("put_rdfmap") ? (params.size() > 1 ? params.get(1) : params.get(0)) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "") : params.get(1); + if (!metafix.getMapNames().contains(mapName)) { + if (mapName.contains(".") || mapName.contains(File.separator)) { + kindOfMap.apply(metafix, record, Arrays.asList(mapName), options); + } + else { + // Probably an unknown internal map? Log a warning? + } + } + map = metafix.getMap(mapName); + } + return map; + } + } From 62da8a0ccaf8f07b9f2b853331e16d9bd9933a07 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Thu, 22 Sep 2022 15:41:47 +0200 Subject: [PATCH 12/29] Add slf4j-log4j12 dependency Fixes the "Failed to load class org.slf4j.impl.StaticLoggerBinder" message when building. --- metafix/build.gradle | 1 + 1 file changed, 1 insertion(+) diff --git a/metafix/build.gradle b/metafix/build.gradle index f90c61a70..750e5ae1a 100644 --- a/metafix/build.gradle +++ b/metafix/build.gradle @@ -20,6 +20,7 @@ dependencies { implementation "org.eclipse.xtext:org.eclipse.xtext:${versions.xtext}" implementation "org.eclipse.xtext:org.eclipse.xtext.xbase:${versions.xtext}" implementation "org.slf4j:slf4j-api:${versions.slf4j}" + implementation "org.slf4j:slf4j-log4j12:${versions.slf4j}" testImplementation "org.junit.jupiter:junit-jupiter-api:${versions.junit_jupiter}" testImplementation "org.junit.platform:junit-platform-launcher:${versions.junit_platform}" From 29d136027bf70e187a15164bc2b2f557779bf771 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 23 Sep 2022 09:29:08 +0200 Subject: [PATCH 13/29] Fix order; rename variable; remove empty line (metafacture-core#415) --- README.md | 14 +++++++++++++- .../java/org/metafacture/metafix/FixMethod.java | 1 - .../org/metafacture/metafix/api/FixFunction.java | 10 +++++----- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index fc573d856..01f78ff5d 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,7 @@ The separator (`sep_char`) will vary depending on the source file, e.g.: | CSV | `,` or `;` | | TSV | `\t` | +<<<<<<< HEAD Options: - `allow_empty_values`: Sets whether to allow empty values in the filemap or to ignore these entries. (Default: `false`) @@ -223,6 +224,17 @@ put_map("", ) ``` +##### `put_rdfmap` + +Defines an external RDF map for lookup from a file or an HTTP(S) resource. +As the RDF map is reducing RDF triples to a key/value map it is mandatory to set the target. +The targeted RDF property can optionally be bound by an RDF language tag. + +```perl +put_rdfmap("", "", target: "") +put_rdfmap("", "", target: ", target_language: "") +``` + ##### `put_var` Defines a single global variable that can be referenced with `$[]`. @@ -620,7 +632,7 @@ lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown ##### `lookup_rdf` Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used. -The targeted RDF property can optional be bound by an RDF language tag. +The targeted RDF property can optionalyl be bound by an RDF language tag. ```perl lookup_rdf("", "", target: "") diff --git a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java index 58927828e..d6c37dce4 100644 --- a/metafix/src/main/java/org/metafacture/metafix/FixMethod.java +++ b/metafix/src/main/java/org/metafacture/metafix/FixMethod.java @@ -674,5 +674,4 @@ public void apply(final Metafix metafix, final Record record, final List private static final Random RANDOM = new Random(); - } diff --git a/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java b/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java index 0e3b6ff56..0994fbd1f 100644 --- a/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java +++ b/metafix/src/main/java/org/metafacture/metafix/api/FixFunction.java @@ -111,24 +111,24 @@ default Stream flatten(final Stream stream) { )); } - default void lookup(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction kindOfMap) { - final Map map = getMap(metafix, record, params, options, kindOfMap); + default void lookup(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction mapFunction) { + final Map map = getMap(metafix, record, params, options, mapFunction); record.transform(params.get(0), oldValue -> { final String newValue = map.getOrDefault(oldValue, map.get(Maps.DEFAULT_MAP_KEY)); return newValue != null ? newValue : getBoolean(options, "delete") ? null : oldValue; }); } - static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction kindOfMap) { + static Map getMap(final Metafix metafix, final Record record, final List params, final Map options, final FixFunction mapFunction) { final Map map; if (params.size() <= 1) { map = options; } else { - final String mapName = kindOfMap.toString().equals("put_rdfmap") ? (params.size() > 1 ? params.get(1) : params.get(0)) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "") : params.get(1); + final String mapName = mapFunction.toString().equals("put_rdfmap") ? (params.size() > 1 ? params.get(1) : params.get(0)) + options.get(RdfMap.TARGET) + options.getOrDefault(RdfMap.TARGET_LANGUAGE, "") : params.get(1); if (!metafix.getMapNames().contains(mapName)) { if (mapName.contains(".") || mapName.contains(File.separator)) { - kindOfMap.apply(metafix, record, Arrays.asList(mapName), options); + mapFunction.apply(metafix, record, Arrays.asList(mapName), options); } else { // Probably an unknown internal map? Log a warning? From 5d70f14eaee20a13faac0671bc87c7be42c5f4c7 Mon Sep 17 00:00:00 2001 From: Pascal Christoph Date: Fri, 23 Sep 2022 10:33:15 +0200 Subject: [PATCH 14/29] Fix typos (metafacture-core#415) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 01f78ff5d..360ef8d4e 100644 --- a/README.md +++ b/README.md @@ -232,7 +232,7 @@ The targeted RDF property can optionally be bound by an RDF language tag. ```perl put_rdfmap("", "", target: "") -put_rdfmap("", "", target: ", target_language: "") +put_rdfmap("", "", target: "", target_language: "") ``` ##### `put_var` @@ -632,7 +632,7 @@ lookup("path.to.field", "map-name", print_unknown: "true", destination: "unknown ##### `lookup_rdf` Looks up matching values in an RDF resource and replaces the field value with a match defined by a targeted RDF property. External files or HTTP(S) resources as well as internal RDF maps can be used. -The targeted RDF property can optionalyl be bound by an RDF language tag. +The targeted RDF property can optionally be bound by an RDF language tag. ```perl lookup_rdf("", "", target: "") From 9dd9762ce085946e6ba6033d54738e8597d9fa4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20B=C3=BClte?= Date: Thu, 29 Sep 2022 18:37:42 +0200 Subject: [PATCH 15/29] Add integration tests for Skos-Lookup https://github.com/metafacture/metafacture-core/issues/415 --- .../expected.json | 16 +++ .../hcrt.ttl | 134 ++++++++++++++++++ .../input.json | 16 +++ .../test.fix | 4 + .../test.flux | 8 ++ .../expected.json | 16 +++ .../hcrt.ttl | 134 ++++++++++++++++++ .../input.json | 16 +++ .../test.fix | 4 + .../test.flux | 8 ++ .../lookupRdfPropertyToProperty/expected.json | 12 ++ .../lookupRdfPropertyToProperty/hcrt.ttl | 134 ++++++++++++++++++ .../lookupRdfPropertyToProperty/input.json | 12 ++ .../lookupRdfPropertyToProperty/test.fix | 1 + .../lookupRdfPropertyToProperty/test.flux | 8 ++ .../lookupRdfPropertyToSubject/expected.json | 12 ++ .../lookupRdfPropertyToSubject/hcrt.ttl | 134 ++++++++++++++++++ .../lookupRdfPropertyToSubject/input.json | 12 ++ .../lookupRdfPropertyToSubject/test.fix | 1 + .../lookupRdfPropertyToSubject/test.flux | 8 ++ .../lookupRdfSubjectToProperty/expected.json | 12 ++ .../lookupRdfSubjectToProperty/hcrt.ttl | 134 ++++++++++++++++++ .../lookupRdfSubjectToProperty/input.json | 12 ++ .../lookupRdfSubjectToProperty/test.fix | 2 + .../lookupRdfSubjectToProperty/test.flux | 8 ++ 25 files changed, 858 insertions(+) create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/hcrt.ttl create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/test.flux create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToSubject/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToSubject/hcrt.ttl create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToSubject/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToSubject/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToSubject/test.flux create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/hcrt.ttl create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToProperty/test.flux create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToSubject/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToSubject/hcrt.ttl create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToSubject/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToSubject/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfPropertyToSubject/test.flux create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToProperty/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToProperty/hcrt.ttl create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToProperty/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToProperty/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfSubjectToProperty/test.flux diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/expected.json b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/expected.json new file mode 100644 index 000000000..b5e3f2335 --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/expected.json @@ -0,0 +1,16 @@ +{ + "name": "Jake", + "a": "https://w3id.org/kim/hcrt/application" +} +{ + "name": "Blacky", + "a": "https://w3id.org/kim/hcrt/index" +} +{ + "name": "Noone", + "a": "cat" +} +{ + "name" : "Noone_2", + "a" : "Assessment" +} diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/hcrt.ttl b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/hcrt.ttl new file mode 100644 index 000000000..0a77874de --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/lookup/fromJson/toJson/lookupRdfDefinedPropertyToProperty/hcrt.ttl @@ -0,0 +1,134 @@ +@base . +@prefix dct: . +@prefix skos: . +@prefix vann: . + + + a skos:ConceptScheme; + dct:title "Hochschulcampus Ressourcentypen"@de, "Higher Education Resource Types"@en, "Типи ресурсів вищої освіти"@uk ; + dct:description "Eine Wertelliste für Typen von Lernressourcen (Learning Resource Type), entstanden im Kontext des Metadatenschemas \"LOM for Higher Education OER Repositories\" (https://w3id.org/dini-ag-kim/hs-oer-lom-profil/latest/)."@de ; + dct:publisher ; + dct:issued "2020-02-07" ; + vann:preferredNamespaceUri "https://w3id.org/kim/hcrt/" ; + vann:preferredNamespacePrefix "hcrt" ; + dct:license ; + skos:hasTopConcept , ,