diff --git a/metafacture-biblio/build.gradle b/metafacture-biblio/build.gradle index 7faf64733..87bbff635 100644 --- a/metafacture-biblio/build.gradle +++ b/metafacture-biblio/build.gradle @@ -31,3 +31,10 @@ dependencies { testImplementation 'junit:junit:4.12' testImplementation 'org.mockito:mockito-core:2.5.5' } + +test { + testLogging { + showStandardStreams = true + exceptionFormat = 'full' + } +} diff --git a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java index 7de7fb1ef..98d7d082e 100644 --- a/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java +++ b/metafacture-biblio/src/main/java/org/metafacture/biblio/marc21/MarcXmlHandler.java @@ -45,6 +45,8 @@ public final class MarcXmlHandler extends DefaultXmlPipe { private static final String NAMESPACE = "http://www.loc.gov/MARC21/slim"; private static final String LEADER = "leader"; private static final String TYPE = "type"; + + private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER; private String currentTag = ""; private String namespace = NAMESPACE; private StringBuilder builder = new StringBuilder(); @@ -60,6 +62,14 @@ private boolean checkNamespace(final String uri) { return namespace == null || namespace.equals(uri); } + public void setAttributeMarker(final String attributeMarker) { + this.attributeMarker = attributeMarker; + } + + public String getAttributeMarker() { + return attributeMarker; + } + @Override public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException { if (SUBFIELD.equals(localName)) { @@ -75,7 +85,7 @@ else if (CONTROLFIELD.equals(localName)) { } else if (RECORD.equals(localName) && checkNamespace(uri)) { getReceiver().startRecord(""); - getReceiver().literal(TYPE, attributes.getValue(TYPE)); + getReceiver().literal(attributeMarker + TYPE, attributes.getValue(TYPE)); } else if (LEADER.equals(localName)) { builder = new StringBuilder(); @@ -87,18 +97,15 @@ else if (LEADER.equals(localName)) { public void endElement(final String uri, final String localName, final String qName) throws SAXException { if (SUBFIELD.equals(localName)) { getReceiver().literal(currentTag, builder.toString().trim()); - } else if (DATAFIELD.equals(localName)) { getReceiver().endEntity(); } else if (CONTROLFIELD.equals(localName)) { getReceiver().literal(currentTag, builder.toString().trim()); - } else if (RECORD.equals(localName) && checkNamespace(uri)) { getReceiver().endRecord(); - } else if (LEADER.equals(localName)) { getReceiver().literal(currentTag, builder.toString()); diff --git a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java index 094b4547e..3902a6041 100644 --- a/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java +++ b/metafacture-biblio/src/test/java/org/metafacture/biblio/marc21/MarcXmlHandlerTest.java @@ -23,7 +23,9 @@ import org.junit.Before; import org.junit.Test; import org.metafacture.framework.StreamReceiver; +import org.mockito.InOrder; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.MockitoAnnotations; import org.xml.sax.SAXException; import org.xml.sax.helpers.AttributesImpl; @@ -130,4 +132,39 @@ public void issue330ShouldOptionallyRecognizeRecordsWithoutNamespace() verifyNoMoreInteractions(receiver); } + @Test + public void shouldNotEncodeTypeAttributeAsMarkedLiteral() throws SAXException { + final AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic"); + + marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes); + marcXmlHandler.endElement(NAMESPACE, RECORD, ""); + + final InOrder ordered = Mockito.inOrder(receiver); + ordered.verify(receiver).startRecord(""); + ordered.verify(receiver).literal(TYPE, "bibliographic"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + + @Test + public void issue336_shouldEncodeTypeAttributeAsLiteralWithConfiguredMarker() throws SAXException { + final String marker = "~"; + marcXmlHandler.setAttributeMarker(marker); + + final AttributesImpl attributes = new AttributesImpl(); + attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic"); + + marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes); + marcXmlHandler.endElement(NAMESPACE, RECORD, ""); + + final InOrder ordered = Mockito.inOrder(receiver); + ordered.verify(receiver).startRecord(""); + ordered.verify(receiver).literal(marker + TYPE, "bibliographic"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + verifyNoMoreInteractions(receiver); + } + } diff --git a/metafacture-framework/src/main/java/org/metafacture/framework/helpers/DefaultXmlPipe.java b/metafacture-framework/src/main/java/org/metafacture/framework/helpers/DefaultXmlPipe.java index 773bcdad4..a429fd094 100644 --- a/metafacture-framework/src/main/java/org/metafacture/framework/helpers/DefaultXmlPipe.java +++ b/metafacture-framework/src/main/java/org/metafacture/framework/helpers/DefaultXmlPipe.java @@ -38,6 +38,11 @@ */ public class DefaultXmlPipe extends DefaultSender implements XmlPipe { + public static final String DEFAULT_ATTRIBUTE_MARKER = ""; + public static final String DEFAULT_RECORD_TAG = "record"; + public static final String DEFAULT_ROOT_TAG = "records"; + public static final String DEFAULT_VALUE_TAG = "value"; + public DefaultXmlPipe() { } diff --git a/metafacture-xml/build.gradle b/metafacture-xml/build.gradle index c91c82c8e..7949ccd4d 100644 --- a/metafacture-xml/build.gradle +++ b/metafacture-xml/build.gradle @@ -26,3 +26,10 @@ dependencies { testImplementation 'org.mockito:mockito-core:2.5.5' testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21' } + +test { + testLogging { + showStandardStreams = true + exceptionFormat = 'full' + } +} diff --git a/metafacture-xml/src/main/java/org/metafacture/xml/GenericXmlHandler.java b/metafacture-xml/src/main/java/org/metafacture/xml/GenericXmlHandler.java index abc3aaeb9..a2813f6fa 100644 --- a/metafacture-xml/src/main/java/org/metafacture/xml/GenericXmlHandler.java +++ b/metafacture-xml/src/main/java/org/metafacture/xml/GenericXmlHandler.java @@ -40,13 +40,13 @@ @FluxCommand("handle-generic-xml") public final class GenericXmlHandler extends DefaultXmlPipe { - public static final String DEFAULT_RECORD_TAG = "record"; - public static final boolean EMIT_NAMESPACE = false; private static final Pattern TABS = Pattern.compile("\t+"); + private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER; private String recordTagName = DEFAULT_RECORD_TAG; + private String valueTagName = DEFAULT_VALUE_TAG; private boolean inRecord; private StringBuilder valueBuffer = new StringBuilder(); @@ -92,6 +92,14 @@ public String getRecordTagName() { return recordTagName; } + public void setValueTagName(final String valueTagName) { + this.valueTagName = valueTagName; + } + + public String getValueTagName() { + return valueTagName; + } + /** * Triggers namespace awareness. If set to "true" input data like "foo:bar" * will be passed through as "foo:bar". For backward compatibility the default @@ -110,6 +118,14 @@ public boolean getEmitNamespace() { return this.emitNamespace; } + public void setAttributeMarker(final String attributeMarker) { + this.attributeMarker = attributeMarker; + } + + public String getAttributeMarker() { + return attributeMarker; + } + @Override public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) { if (inRecord) { @@ -159,7 +175,7 @@ public void characters(final char[] chars, final int start, final int length) { private void writeValue() { final String value = valueBuffer.toString(); if (!value.trim().isEmpty()) { - getReceiver().literal("value", value.replace('\n', ' ')); + getReceiver().literal(valueTagName, value.replace('\n', ' ')); } valueBuffer = new StringBuilder(); } @@ -170,7 +186,7 @@ private void writeAttributes(final Attributes attributes) { for (int i = 0; i < length; ++i) { final String name = emitNamespace ? attributes.getQName(i) : attributes.getLocalName(i); final String value = attributes.getValue(i); - getReceiver().literal(name, value); + getReceiver().literal(attributeMarker + name, value); } } diff --git a/metafacture-xml/src/main/java/org/metafacture/xml/SimpleXmlEncoder.java b/metafacture-xml/src/main/java/org/metafacture/xml/SimpleXmlEncoder.java index 4cfb69c28..cff74368e 100644 --- a/metafacture-xml/src/main/java/org/metafacture/xml/SimpleXmlEncoder.java +++ b/metafacture-xml/src/main/java/org/metafacture/xml/SimpleXmlEncoder.java @@ -26,6 +26,7 @@ import org.metafacture.framework.annotations.In; import org.metafacture.framework.annotations.Out; import org.metafacture.framework.helpers.DefaultStreamPipe; +import org.metafacture.framework.helpers.DefaultXmlPipe; import java.io.IOException; import java.net.URL; @@ -53,9 +54,6 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe namespaces = new HashMap(); private boolean writeRootTag = true; private boolean writeXmlHeader = true; @@ -96,6 +96,14 @@ public void setRecordTag(final String tag) { recordTag = tag; } + public void setValueTag(final String valueTag) { + this.valueTag = valueTag; + } + + public String getValueTag() { + return valueTag; + } + public void setNamespaceFile(final String file) { final Properties properties; try { @@ -146,6 +154,14 @@ public void setNamespaces(final Map namespaces) { this.namespaces = namespaces; } + public void setAttributeMarker(final String attributeMarker) { + this.attributeMarker = attributeMarker; + } + + public String getAttributeMarker() { + return attributeMarker; + } + @Override public void startRecord(final String identifier) { if (separateRoots) { @@ -192,11 +208,11 @@ public void endEntity() { @Override public void literal(final String name, final String value) { - if (name.isEmpty()) { + if (name.equals(valueTag)) { element.setText(value); } - else if (name.startsWith(ATTRIBUTE_MARKER)) { - element.addAttribute(name.substring(1), value); + else if (name.startsWith(attributeMarker)) { + element.addAttribute(name.substring(attributeMarker.length()), value); } else { element.createChild(name).setText(value); diff --git a/metafacture-xml/src/test/java/org/metafacture/xml/GenericXMLHandlerTest.java b/metafacture-xml/src/test/java/org/metafacture/xml/GenericXMLHandlerTest.java index e376c24b1..ff35baad8 100644 --- a/metafacture-xml/src/test/java/org/metafacture/xml/GenericXMLHandlerTest.java +++ b/metafacture-xml/src/test/java/org/metafacture/xml/GenericXMLHandlerTest.java @@ -25,6 +25,7 @@ import org.metafacture.framework.StreamReceiver; import org.mockito.InOrder; import org.mockito.Mock; +import org.mockito.Mockito; import org.mockito.MockitoAnnotations; import org.xml.sax.helpers.AttributesImpl; @@ -132,6 +133,28 @@ public void shouldEmitPCDataAsALiteralNamedValue() { ordered.verify(receiver).literal("value", "char-data"); } + @Test + public void shouldEmitPCDataAsALiteralWithConfiguredValueTagName() { + final String name = "data"; + genericXmlHandler.setValueTagName(name); + + final char[] charData = "char-data".toCharArray(); + genericXmlHandler.startElement("", "record", "record", attributes); + genericXmlHandler.startElement("", "entity", "entity", attributes); + genericXmlHandler.characters(charData, 0, charData.length); + genericXmlHandler.endElement("", "entity", "entity"); + genericXmlHandler.endElement("", "record", "record"); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord(""); + ordered.verify(receiver).startEntity("entity"); + ordered.verify(receiver).literal(name, "char-data"); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + Mockito.verifyNoMoreInteractions(receiver); + } + @Test public void shouldEmitNamespaceOnEntityElementAndAttribute() { genericXmlHandler.setEmitNamespace(true); @@ -141,6 +164,47 @@ public void shouldEmitNamespaceOnEntityElementAndAttribute() { final InOrder ordered = inOrder(receiver); ordered.verify(receiver).startEntity("ns:entity"); - ordered.verify(receiver).literal("ns:attr","attr-value"); + ordered.verify(receiver).literal("ns:attr", "attr-value"); + } + + @Test + public void shouldNotEncodeAttributesAsMarkedLiterals() { + attributes.addAttribute("", "attr", "attr", "CDATA", "attr-value"); + genericXmlHandler.startElement("", "record", "record", attributes); + genericXmlHandler.endElement("", "record", "record"); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord(""); + ordered.verify(receiver).literal("attr", "attr-value"); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + Mockito.verifyNoMoreInteractions(receiver); + } + + @Test + public void issue379_shouldEncodeAttributesAsLiteralsWithConfiguredMarker() { + final String marker = "~"; + genericXmlHandler.setAttributeMarker(marker); + + genericXmlHandler.startElement("", "record", "record", attributes); + attributes.addAttribute("", "authority", "authority", "CDATA", "marcrelator"); + attributes.addAttribute("", "type", "type", "CDATA", "text"); + genericXmlHandler.startElement("", "roleTerm", "roleTerm", attributes); + final char[] charData = "Author".toCharArray(); + genericXmlHandler.characters(charData, 0, charData.length); + genericXmlHandler.endElement("", "roleTerm", "roleTerm"); + genericXmlHandler.endElement("", "record", "record"); + + final InOrder ordered = inOrder(receiver); + ordered.verify(receiver).startRecord(""); + ordered.verify(receiver).startEntity("roleTerm"); + ordered.verify(receiver).literal(marker + "authority", "marcrelator"); + ordered.verify(receiver).literal(marker + "type", "text"); + ordered.verify(receiver).literal("value", "Author"); + ordered.verify(receiver).endEntity(); + ordered.verify(receiver).endRecord(); + ordered.verifyNoMoreInteractions(); + Mockito.verifyNoMoreInteractions(receiver); } + } diff --git a/metafacture-xml/src/test/java/org/metafacture/xml/SimpleXmlEncoderTest.java b/metafacture-xml/src/test/java/org/metafacture/xml/SimpleXmlEncoderTest.java index 7ea246f0e..cf7a3d8d6 100644 --- a/metafacture-xml/src/test/java/org/metafacture/xml/SimpleXmlEncoderTest.java +++ b/metafacture-xml/src/test/java/org/metafacture/xml/SimpleXmlEncoderTest.java @@ -179,6 +179,157 @@ public void shouldAddNamespaceWithEmptyKeyFromPropertiesFileAsDefaultNamespaceTo getResultXml()); } + @Test + public void testShouldEncodeUnnamedLiteralsAsText() { + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal("", VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldStillEncodeUnnamedLiteralsAsTextWithConfiguredValueTagName() { + simpleXmlEncoder.setValueTag("data"); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal("", VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + // SimpleXmlEncoder.Element.writeElement() does not write child elements with empty name + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldNotEncodeLiteralsWithDifferentValueTagNameAsText() { + simpleXmlEncoder.setValueTag("data"); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal(TAG, VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + + @Test + public void issue379_testShouldEncodeConfiguredValueLiteralsAsText() { + final String name = "data"; + simpleXmlEncoder.setValueTag(name); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal(name, VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldEncodeMarkedLiteralsAsAttributes() { + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal(TAG, VALUE); + simpleXmlEncoder.literal("~attr", VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldNotEncodeMarkedEntitiesAsAttributes() { + simpleXmlEncoder.setAttributeMarker("*"); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.startEntity("~entity"); + simpleXmlEncoder.literal(TAG, VALUE); + simpleXmlEncoder.endEntity(); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "<~entity>" + + "value" + + "" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldNotEncodeLiteralsWithDifferentMarkerAsAttributes() { + simpleXmlEncoder.setAttributeMarker("*"); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal(TAG, VALUE); + simpleXmlEncoder.literal("~attr", VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "<~attr>value" + + "" + + "", + getResultXml()); + } + + @Test + public void testShouldEncodeMarkedLiteralsWithConfiguredMarkerAsAttributes() { + final String marker = "**"; + simpleXmlEncoder.setAttributeMarker(marker); + + simpleXmlEncoder.startRecord(""); + simpleXmlEncoder.literal(TAG, VALUE); + simpleXmlEncoder.literal(marker + "attr", VALUE); + simpleXmlEncoder.endRecord(); + simpleXmlEncoder.closeStream(); + + assertEquals("" + + "" + + "" + + "value" + + "" + + "", + getResultXml()); + } + private void emitTwoRecords() { simpleXmlEncoder.startRecord("X"); simpleXmlEncoder.literal(TAG, VALUE);