Skip to content
This repository has been archived by the owner on Jan 27, 2025. It is now read-only.

Add support for Metamorph wildcards. #97

Merged
merged 5 commits into from
Dec 22, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion metafix/src/main/java/org/metafacture/metafix/Metafix.java
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,6 @@ public class Metafix implements StreamPipe<StreamReceiver>, Maps { // checkstyle
private final Map<String, Map<String, String>> maps = new HashMap<>();
private final StreamFlattener flattener = new StreamFlattener();

// TODO: Use SimpleRegexTrie / WildcardTrie for wildcard, alternation and character class support
private Record currentRecord = new Record();
private Fix fix;
private Map<String, String> vars = new HashMap<>();
Expand Down
120 changes: 107 additions & 13 deletions metafix/src/main/java/org/metafacture/metafix/Value.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@

package org.metafacture.metafix;

import org.metafacture.commons.tries.SimpleRegexTrie;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
Expand All @@ -24,7 +26,9 @@
import java.util.Map;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.function.UnaryOperator;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
Expand All @@ -37,6 +41,8 @@ public class Value {
private static final String LAST_FIELD = "$last";
private static final String ASTERISK = "*";

private static final String UNEXPECTED = "expected array or hash, got ";

private final Array array;
private final Hash hash;
private final String string;
Expand Down Expand Up @@ -236,6 +242,19 @@ private static String[] tail(final String[] fields) {
return Arrays.copyOfRange(fields, 1, fields.length);
}

private void transformFields(final String[] fields, final UnaryOperator<String> operator) {
switch (type) {
case Array:
asArray().transformFields(fields, operator);
break;
case Hash:
asHash().transformFields(fields, operator);
break;
default:
throw new IllegalStateException(UNEXPECTED + type);
}
}

enum Type {
Array,
Hash,
Expand Down Expand Up @@ -396,6 +415,45 @@ private Value findInValue(final String[] path, final Value value) {
return result;
}

private void transformFields(final String[] fields, final UnaryOperator<String> operator) {
final String field = fields[0];
final String[] remainingFields = tail(fields);
final int size = size();

if (fields.length == 0 || field.equals(ASTERISK)) {
for (int i = 0; i < size; ++i) {
transformFields(i, remainingFields, operator);
}
}
else if (isNumber(field)) {
final int index = Integer.parseInt(field) - 1; // TODO: 0-based Catmandu vs. 1-based Metafacture
if (index >= 0 && index < size) {
transformFields(index, remainingFields, operator);
}
}
// TODO: WDCD? copy_field('your.name','author[].name'), where name is an array
else {
for (int i = 0; i < size; ++i) {
transformFields(i, fields, operator);
}
}

list.removeIf(v -> v == null);
}

private void transformFields(final int index, final String[] fields, final UnaryOperator<String> operator) {
final Value value = get(index);

if (value != null) {
if (value.isString()) {
set(index, operator != null ? new Value(operator.apply(value.asString())) : null);
}
else {
value.transformFields(fields, operator);
}
}
}

private void insert(final InsertMode mode, final String[] fields, final String newValue) {
switch (fields[0]) {
case ASTERISK:
Expand Down Expand Up @@ -448,8 +506,6 @@ public static class Hash extends AbstractValueType {

private static final String FIELD_PATH_SEPARATOR = "\\.";

private static final String UNEXPECTED = "expected array or hash, got ";

private final Map<String, Value> map = new LinkedHashMap<>();

/**
Expand All @@ -465,7 +521,7 @@ protected Hash() {
* @return true if this hash contains the metadata field, false otherwise
*/
public boolean containsField(final String field) {
return map.containsKey(field);
return map.keySet().stream().anyMatch(fieldMatcher(field));
}

/**
Expand Down Expand Up @@ -526,7 +582,9 @@ public Value append(final String fieldPath, final String newValue) {
* @return the metadata value
*/
public Value get(final String field) {
return map.get(field);
// TODO: special treatment (only) for exact matches?
final List<Value> list = findFields(field).map(map::get).collect(Collectors.toList());
return list.isEmpty() ? null : list.size() == 1 ? list.get(0) : new Value(list);
}

public Value find(final String fieldPath) {
Expand Down Expand Up @@ -642,7 +700,7 @@ private Value insert(final InsertMode mode, final String[] fields, final String
* @param field the field name
*/
public void remove(final String field) {
map.remove(field);
modifyFields(field, map::remove);
}

public void removeNested(final String fieldPath) {
Expand Down Expand Up @@ -706,14 +764,35 @@ private void appendValue(final String[] newName, final Value v) {
}

public void transformFields(final List<String> params, final UnaryOperator<String> operator) {
final String field = params.get(0);
final Value value = find(field);
if (value != null) {
removeNested(field);
if (operator != null) {
value.asList(a -> a.forEach(v -> append(field, operator.apply(v.toString()))));
}
transformFields(split(params.get(0)), operator);
}

private void transformFields(final String[] fields, final UnaryOperator<String> operator) {
final String field = fields[0];
final String[] remainingFields = tail(fields);

if (field.equals(ASTERISK)) {
// TODO: search in all elements of value.asHash()?
transformFields(remainingFields, operator);
return;
}

modifyFields(field, f -> {
final Value value = map.get(f);

if (value != null) {
if (remainingFields.length == 0) {
map.remove(f);

if (operator != null) {
value.asList(a -> a.forEach(v -> append(f, operator.apply(v.toString()))));
}
}
else {
value.transformFields(remainingFields, operator);
}
}
});
}

/**
Expand All @@ -722,7 +801,7 @@ public void transformFields(final List<String> params, final UnaryOperator<Strin
* @param fields the field names
*/
public void retainFields(final Collection<String> fields) {
map.keySet().retainAll(fields);
map.keySet().retainAll(fields.stream().flatMap(this::findFields).collect(Collectors.toSet()));
}

/**
Expand Down Expand Up @@ -752,6 +831,21 @@ public String asString() {
return map.toString();
}

private void modifyFields(final String pattern, final Consumer<String> consumer) {
findFields(pattern).collect(Collectors.toSet()).forEach(consumer);
}

private Stream<String> findFields(final String pattern) {
return map.keySet().stream().filter(fieldMatcher(pattern));
}

private Predicate<String> fieldMatcher(final String pattern) {
final SimpleRegexTrie<String> trie = new SimpleRegexTrie<>();
trie.put(pattern, pattern);

return field -> trie.get(field).contains(pattern);
}

}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@

import com.google.common.collect.ImmutableMap;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.Disabled;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.junit.jupiter.api.extension.RegisterExtension;
Expand Down Expand Up @@ -393,7 +392,6 @@ public void parseTextQuotedGroups() {
}

@Test
@Disabled("Use SimpleRegexTrie/WildcardTrie")
public void alternation() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"trim('title-1|title-2')"),
Expand All @@ -413,8 +411,8 @@ public void alternation() {
o.get().endRecord();

o.get().startRecord("2");
o.get().literal("title-2", "marc");
o.get().literal("title-1", "json");
o.get().literal("title-1", "marc");
o.get().literal("title-2", "json");
o.get().endRecord();

o.get().startRecord("3");
Expand All @@ -423,7 +421,6 @@ public void alternation() {
}

@Test
@Disabled("Use SimpleRegexTrie/WildcardTrie")
public void wildcard() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"trim('title-?')"),
Expand All @@ -443,8 +440,8 @@ public void wildcard() {
o.get().endRecord();

o.get().startRecord("2");
o.get().literal("title-2", "marc");
o.get().literal("title-1", "json");
o.get().literal("title-1", "marc");
o.get().literal("title-2", "json");
o.get().endRecord();

o.get().startRecord("3");
Expand All @@ -453,7 +450,6 @@ public void wildcard() {
}

@Test
@Disabled("Use SimpleRegexTrie")
public void characterClass() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"trim('title-[12]')"),
Expand All @@ -473,8 +469,8 @@ public void characterClass() {
o.get().endRecord();

o.get().startRecord("2");
o.get().literal("title-2", "marc");
o.get().literal("title-1", "json");
o.get().literal("title-1", "marc");
o.get().literal("title-2", "json");
o.get().endRecord();

o.get().startRecord("3");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1174,7 +1174,7 @@ public void accessArrayImplicit() {
}

@Test
@Disabled("TODO: WDCD? explicit * for array fields?")
// TODO: WDCD? explicit * for array fields?
public void accessArrayByWildcard() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"upcase('name.*')"),
Expand Down Expand Up @@ -1246,7 +1246,7 @@ public void accessArrayOfObjectsByIndex() {
}

@Test
@Disabled("TODO: implement implicit iteration?")
// TODO: implement implicit iteration?
public void accessArrayOfObjectsByWildcard() {
MetafixTestHelpers.assertFix(streamReceiver, Arrays.asList(
"upcase('author.*.name')",
Expand Down