Skip to content
This repository has been archived by the owner on Jan 27, 2025. It is now read-only.

Commit

Permalink
Optimize path wildcard matching for wildcard field names. (#97, f64cbde)
Browse files Browse the repository at this point in the history
Still a hotspot so we employ a bunch of additional strategies:

- Avoid `Stream` API in hot path.
- Sidestep `WildcardTrie` w.r.t. alternations.
- Guard `SimpleRegexTrie` matching with prefix match.
  • Loading branch information
blackwinter committed May 13, 2022
1 parent 46fb034 commit 7df9a55
Showing 1 changed file with 77 additions and 26 deletions.
103 changes: 77 additions & 26 deletions metafix/src/main/java/org/metafacture/metafix/Value.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@
package org.metafacture.metafix;

import org.metafacture.commons.tries.SimpleRegexTrie;
import org.metafacture.commons.tries.WildcardTrie;

import java.util.ArrayList;
import java.util.Collection;
import java.util.ConcurrentModificationException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Predicate;
import java.util.regex.Matcher;
Expand Down Expand Up @@ -210,6 +211,7 @@ public Value asList(final Consumer<Array> consumer) {
if (path != null) {
path = FixPath.RESERVED_FIELD_PATTERN.matcher(newName).replaceAll(Matcher.quoteReplacement(split(path)[0]));
}

return this;
}

Expand All @@ -219,6 +221,7 @@ public Value asList(final Consumer<Array> consumer) {
final String lastSegment = pathSegments[pathSegments.length - 1];
this.path = container.path + "." + lastSegment;
}

return this;
}

Expand All @@ -229,6 +232,7 @@ public Value asList(final Consumer<Array> consumer) {
else {
path = fallback + suffix;
}

return this;
}

Expand Down Expand Up @@ -471,7 +475,10 @@ public void remove(final int index) {
public static class Hash extends AbstractValueType {

// NOTE: Keep in sync with `WildcardTrie`/`SimpleRegexTrie` implementation in metafacture-core.
private static final Matcher PATTERN_MATCHER = Pattern.compile("[*?|]|\\[[^\\]]").matcher("");
private static final Pattern ALTERNATION_PATTERN = Pattern.compile(WildcardTrie.OR_STRING, Pattern.LITERAL);
private static final Matcher PATTERN_MATCHER = Pattern.compile("[*?]|\\[[^\\]]").matcher("");

private static final Map<String, String> PREFIX_CACHE = new HashMap<>();

private static final Map<String, Map<String, Boolean>> TRIE_CACHE = new HashMap<>();
private static final SimpleRegexTrie<String> TRIE = new SimpleRegexTrie<>();
Expand All @@ -491,7 +498,7 @@ protected Hash() {
* @return true if this hash contains the metadata field, false otherwise
*/
public boolean containsField(final String field) {
return matchFields(field, Stream::anyMatch);
return !findFields(field).isEmpty();
}

public boolean containsPath(final String fieldPath) {
Expand Down Expand Up @@ -575,22 +582,29 @@ public Value get(final String field) {

/*package-private*/ Value get(final String field, final boolean enforceStringValue) { // TODO use Type.String etc.?
// TODO: special treatment (only) for exact matches?
final List<Value> list = findFields(field).map(actualField -> {
final Value value = getField(actualField);
if (enforceStringValue) {
value.asString();
}
return value;
}).collect(Collectors.toList());
return list.isEmpty() ? null : list.size() == 1 ? list.get(0) : newArray(a -> list.forEach(v -> v.matchType()
.ifArray(b -> b.forEach(a::add))
.orElse(a::add)));
final Set<String> set = findFields(field);

return set.isEmpty() ? null : set.size() == 1 ? getField(set.iterator().next(), enforceStringValue) :
newArray(a -> set.forEach(f -> getField(f, enforceStringValue).matchType()
.ifArray(b -> b.forEach(a::add))
.orElse(a::add)
));
}

public Value getField(final String field) {
return map.get(field);
}

private Value getField(final String field, final boolean enforceStringValue) {
final Value value = getField(field);

if (enforceStringValue) {
value.asString();
}

return value;
}

public Value getList(final String field, final Consumer<Array> consumer) {
return asList(get(field), consumer);
}
Expand All @@ -612,13 +626,15 @@ public void addAll(final Hash hash) {
*/
public void add(final String field, final Value newValue) {
final Value oldValue = new FixPath(field).findIn(this);

if (oldValue == null) {
put(field, newValue);
}
else {
if (!oldValue.isArray()) { // repeated field: convert single val to first in array
oldValue.updatePathAppend(".1", field);
}

put(field, oldValue.asList(oldVals -> newValue.asList(newVals -> {
for (int i = 0; i < newVals.size(); ++i) {
oldVals.add(newVals.get(i).updatePathAppend("." + (i + 1 + oldVals.size()), field));
Expand All @@ -634,6 +650,7 @@ public void add(final String field, final Value newValue) {
*/
public void remove(final String field) {
final FixPath fixPath = new FixPath(field);

if (fixPath.size() > 1) {
fixPath.removeNestedFrom(this);
}
Expand All @@ -652,7 +669,10 @@ public void removeField(final String field) {
* @param fields the field names
*/
public void retainFields(final Collection<String> fields) {
map.keySet().retainAll(fields.stream().flatMap(this::findFields).collect(Collectors.toSet()));
final Set<String> retainFields = new HashSet<>();
fields.forEach(f -> retainFields.addAll(findFields(f)));

map.keySet().retainAll(retainFields);
}

/**
Expand Down Expand Up @@ -702,24 +722,55 @@ public String toString() {
* @param consumer the action to be performed for each value
*/
/*package-private*/ void modifyFields(final String pattern, final Consumer<String> consumer) {
findFields(pattern).collect(Collectors.toSet()).forEach(consumer);
findFields(pattern).forEach(consumer);
}

private Stream<String> findFields(final String pattern) {
return matchFields(pattern, Stream::filter);
private Set<String> findFields(final String pattern) {
final Set<String> fieldSet = new LinkedHashSet<>();

for (final String term : ALTERNATION_PATTERN.split(pattern)) {
findFields(term, fieldSet);
}

return fieldSet;
}

private <T> T matchFields(final String pattern, final BiFunction<Stream<String>, Predicate<String>, T> function) {
if (PATTERN_MATCHER.reset(pattern).find()) {
final Map<String, Boolean> matcher = TRIE_CACHE.computeIfAbsent(pattern, k -> {
TRIE.put(k, k);
return new HashMap<>();
});
private void findFields(final String pattern, final Set<String> fieldSet) {
if (!PREFIX_CACHE.containsKey(pattern)) {
final Matcher patternMatcher = PATTERN_MATCHER.reset(pattern);

if (patternMatcher.find()) {
TRIE.put(pattern, pattern);
TRIE_CACHE.put(pattern, new HashMap<>());

return function.apply(map.keySet().stream(), f -> matcher.computeIfAbsent(f, k -> TRIE.get(k).contains(pattern)));
PREFIX_CACHE.put(pattern, pattern.substring(0, patternMatcher.start()));
}
else {
PREFIX_CACHE.put(pattern, null);
}
}
else {
return function.apply(Stream.of(pattern), map::containsKey);

final String prefix = PREFIX_CACHE.get(pattern);

if (prefix != null) {
final Map<String, Boolean> fieldCache = TRIE_CACHE.get(pattern);

for (final String field : map.keySet()) {
if (!fieldCache.containsKey(field)) {
final boolean matches = field.startsWith(prefix) && TRIE.get(field).contains(pattern);
fieldCache.put(field, matches);

if (matches) {
fieldSet.add(field);
}
}
else if (fieldCache.get(field)) {
fieldSet.add(field);
}
}
}
else if (map.containsKey(pattern)) {
fieldSet.add(pattern);
}
}

Expand Down

0 comments on commit 7df9a55

Please sign in to comment.