From 36bcec92cd856195bd22b092318fbd6f1eeb7d48 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Thu, 3 Dec 2020 17:48:33 +0100 Subject: [PATCH 1/2] Discard/replace "null" values by pattern. Fixes metafacture/metafacture-fix#34. --- .../org/metafacture/mangling/NullFilter.java | 22 ++++++-- .../metafacture/mangling/NullFilterTest.java | 52 +++++++++++++++++++ 2 files changed, 71 insertions(+), 3 deletions(-) diff --git a/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java b/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java index 421ce36d7..08e17b1f6 100644 --- a/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java +++ b/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java @@ -21,9 +21,12 @@ import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.ForwardingStreamPipe; +import java.util.function.Predicate; +import java.util.regex.Pattern; + /** - * Replaces null values with replacement string, or, if replacement - * string is null (default), discards null values entirely. + * Replaces "null" values with replacement string, or, if replacement + * string is null (default), discards them entirely. * * @author Jens Wille * @@ -33,8 +36,21 @@ @Out(StreamReceiver.class) public final class NullFilter extends ForwardingStreamPipe { + private String pattern = null; + + private Predicate predicate = v -> false; + private String replacement = null; + public void setPattern(final String pattern) { + this.pattern = pattern; + this.predicate = Pattern.compile(pattern).asPredicate(); + } + + public String getPattern() { + return pattern; + } + public void setReplacement(final String replacement) { this.replacement = replacement; } @@ -45,7 +61,7 @@ public String getReplacement() { @Override public void literal(final String name, final String value) { - if (value != null) { + if (value != null && !predicate.test(value)) { getReceiver().literal(name, value); } else if (replacement != null) { getReceiver().literal(name, replacement); diff --git a/metafacture-mangling/src/test/java/org/metafacture/mangling/NullFilterTest.java b/metafacture-mangling/src/test/java/org/metafacture/mangling/NullFilterTest.java index da6c31df4..318bbd4af 100644 --- a/metafacture-mangling/src/test/java/org/metafacture/mangling/NullFilterTest.java +++ b/metafacture-mangling/src/test/java/org/metafacture/mangling/NullFilterTest.java @@ -16,6 +16,7 @@ package org.metafacture.mangling; import static org.mockito.Mockito.inOrder; +import static org.mockito.Mockito.times; import static org.mockito.Mockito.verifyNoMoreInteractions; import org.junit.After; @@ -37,6 +38,7 @@ public final class NullFilterTest { private static final String RECORD_ID = "id"; private static final String ENTITY_NAME = "entity-name"; private static final String LITERAL_NAME = "literal-name"; + private static final String LITERAL_NULL = "literal-NULL"; private static final String LITERAL_VALUE = "literal-value"; private NullFilter nullFilter; @@ -79,6 +81,7 @@ public void shouldDiscardNullValues() { nullFilter.startRecord(RECORD_ID); nullFilter.startEntity(ENTITY_NAME); nullFilter.literal(LITERAL_NAME, LITERAL_VALUE); + nullFilter.literal(LITERAL_NAME, LITERAL_NULL); nullFilter.literal(LITERAL_NAME, null); nullFilter.endEntity(); nullFilter.endRecord(); @@ -87,6 +90,7 @@ public void shouldDiscardNullValues() { ordered.verify(receiver).startRecord(RECORD_ID); ordered.verify(receiver).startEntity(ENTITY_NAME); ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_VALUE); + ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_NULL); ordered.verify(receiver).endEntity(); ordered.verify(receiver).endRecord(); @@ -100,6 +104,7 @@ public void shouldReplaceNullValues() { nullFilter.startRecord(RECORD_ID); nullFilter.startEntity(ENTITY_NAME); nullFilter.literal(LITERAL_NAME, LITERAL_VALUE); + nullFilter.literal(LITERAL_NAME, LITERAL_NULL); nullFilter.literal(LITERAL_NAME, null); nullFilter.endEntity(); nullFilter.endRecord(); @@ -108,6 +113,7 @@ public void shouldReplaceNullValues() { ordered.verify(receiver).startRecord(RECORD_ID); ordered.verify(receiver).startEntity(ENTITY_NAME); ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_VALUE); + ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_NULL); ordered.verify(receiver).literal(LITERAL_NAME, "replacement"); ordered.verify(receiver).endEntity(); ordered.verify(receiver).endRecord(); @@ -115,4 +121,50 @@ public void shouldReplaceNullValues() { verifyNoMoreInteractions(receiver); } + @Test + public void shouldDiscardNullValuesByPattern() { + nullFilter.setPattern("NULL"); + + nullFilter.startRecord(RECORD_ID); + nullFilter.startEntity(ENTITY_NAME); + nullFilter.literal(LITERAL_NAME, LITERAL_VALUE); + nullFilter.literal(LITERAL_NAME, LITERAL_NULL); + nullFilter.literal(LITERAL_NAME, null); + nullFilter.endEntity(); + nullFilter.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord(RECORD_ID); + ordered.verify(receiver).startEntity(ENTITY_NAME); + ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_VALUE); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + + verifyNoMoreInteractions(receiver); + } + + @Test + public void shouldReplaceNullValuesByPattern() { + nullFilter.setPattern("NULL"); + nullFilter.setReplacement("replacement"); + + nullFilter.startRecord(RECORD_ID); + nullFilter.startEntity(ENTITY_NAME); + nullFilter.literal(LITERAL_NAME, LITERAL_VALUE); + nullFilter.literal(LITERAL_NAME, LITERAL_NULL); + nullFilter.literal(LITERAL_NAME, null); + nullFilter.endEntity(); + nullFilter.endRecord(); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord(RECORD_ID); + ordered.verify(receiver).startEntity(ENTITY_NAME); + ordered.verify(receiver).literal(LITERAL_NAME, LITERAL_VALUE); + ordered.verify(receiver, times(2)).literal(LITERAL_NAME, "replacement"); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + + verifyNoMoreInteractions(receiver); + } + } From 69cdf754d674fbaaa79c5c273bce35f8bef4746c Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Thu, 3 Dec 2020 17:52:30 +0100 Subject: [PATCH 2/2] Add Flux command for NullFilter. --- .../src/main/java/org/metafacture/mangling/NullFilter.java | 2 ++ .../src/main/resources/flux-commands.properties | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java b/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java index 08e17b1f6..30057acbd 100644 --- a/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java +++ b/metafacture-mangling/src/main/java/org/metafacture/mangling/NullFilter.java @@ -15,6 +15,7 @@ */ package org.metafacture.mangling; +import org.metafacture.framework.FluxCommand; import org.metafacture.framework.StreamReceiver; import org.metafacture.framework.annotations.Description; import org.metafacture.framework.annotations.In; @@ -34,6 +35,7 @@ @Description("Discards or replaces null values") @In(StreamReceiver.class) @Out(StreamReceiver.class) +@FluxCommand("filter-nulls") public final class NullFilter extends ForwardingStreamPipe { private String pattern = null; diff --git a/metafacture-mangling/src/main/resources/flux-commands.properties b/metafacture-mangling/src/main/resources/flux-commands.properties index 1480a1d99..bcc15efd0 100644 --- a/metafacture-mangling/src/main/resources/flux-commands.properties +++ b/metafacture-mangling/src/main/resources/flux-commands.properties @@ -13,10 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # +change-id org.metafacture.mangling.RecordIdChanger +discard-events org.metafacture.mangling.StreamEventDiscarder filter-duplicate-objects org.metafacture.mangling.DuplicateObjectFilter +filter-nulls org.metafacture.mangling.NullFilter +flatten org.metafacture.mangling.StreamFlattener literal-to-object org.metafacture.mangling.LiteralToObject object-to-literal org.metafacture.mangling.ObjectToLiteral -change-id org.metafacture.mangling.RecordIdChanger record-to-entity org.metafacture.mangling.RecordToEntity -discard-events org.metafacture.mangling.StreamEventDiscarder -flatten org.metafacture.mangling.StreamFlattener