Skip to content

Commit

Permalink
Merge pull request #394 from metafacture/379-improveHandlingOfXmlAttr…
Browse files Browse the repository at this point in the history
…ibutes

Improve handling of XML attributes and element values.
  • Loading branch information
blackwinter authored Sep 28, 2021
2 parents 8af6c69 + d6e68ff commit a3fd2cd
Show file tree
Hide file tree
Showing 9 changed files with 327 additions and 17 deletions.
7 changes: 7 additions & 0 deletions metafacture-biblio/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,10 @@ dependencies {
testImplementation 'junit:junit:4.12'
testImplementation 'org.mockito:mockito-core:2.5.5'
}

test {
testLogging {
showStandardStreams = true
exceptionFormat = 'full'
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ public final class MarcXmlHandler extends DefaultXmlPipe<StreamReceiver> {
private static final String NAMESPACE = "http://www.loc.gov/MARC21/slim";
private static final String LEADER = "leader";
private static final String TYPE = "type";

private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
private String currentTag = "";
private String namespace = NAMESPACE;
private StringBuilder builder = new StringBuilder();
Expand All @@ -60,6 +62,14 @@ private boolean checkNamespace(final String uri) {
return namespace == null || namespace.equals(uri);
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) throws SAXException {
if (SUBFIELD.equals(localName)) {
Expand All @@ -75,7 +85,7 @@ else if (CONTROLFIELD.equals(localName)) {
}
else if (RECORD.equals(localName) && checkNamespace(uri)) {
getReceiver().startRecord("");
getReceiver().literal(TYPE, attributes.getValue(TYPE));
getReceiver().literal(attributeMarker + TYPE, attributes.getValue(TYPE));
}
else if (LEADER.equals(localName)) {
builder = new StringBuilder();
Expand All @@ -87,18 +97,15 @@ else if (LEADER.equals(localName)) {
public void endElement(final String uri, final String localName, final String qName) throws SAXException {
if (SUBFIELD.equals(localName)) {
getReceiver().literal(currentTag, builder.toString().trim());

}
else if (DATAFIELD.equals(localName)) {
getReceiver().endEntity();
}
else if (CONTROLFIELD.equals(localName)) {
getReceiver().literal(currentTag, builder.toString().trim());

}
else if (RECORD.equals(localName) && checkNamespace(uri)) {
getReceiver().endRecord();

}
else if (LEADER.equals(localName)) {
getReceiver().literal(currentTag, builder.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,9 @@
import org.junit.Before;
import org.junit.Test;
import org.metafacture.framework.StreamReceiver;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.AttributesImpl;
Expand Down Expand Up @@ -130,4 +132,39 @@ public void issue330ShouldOptionallyRecognizeRecordsWithoutNamespace()
verifyNoMoreInteractions(receiver);
}

@Test
public void shouldNotEncodeTypeAttributeAsMarkedLiteral() throws SAXException {
final AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");

marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
marcXmlHandler.endElement(NAMESPACE, RECORD, "");

final InOrder ordered = Mockito.inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal(TYPE, "bibliographic");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
verifyNoMoreInteractions(receiver);
}

@Test
public void issue336_shouldEncodeTypeAttributeAsLiteralWithConfiguredMarker() throws SAXException {
final String marker = "~";
marcXmlHandler.setAttributeMarker(marker);

final AttributesImpl attributes = new AttributesImpl();
attributes.addAttribute(NAMESPACE, "type", "type", "CDATA", "bibliographic");

marcXmlHandler.startElement(NAMESPACE, RECORD, "", attributes);
marcXmlHandler.endElement(NAMESPACE, RECORD, "");

final InOrder ordered = Mockito.inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal(marker + TYPE, "bibliographic");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
verifyNoMoreInteractions(receiver);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
*/
public class DefaultXmlPipe<R extends Receiver> extends DefaultSender<R> implements XmlPipe<R> {

public static final String DEFAULT_ATTRIBUTE_MARKER = "";
public static final String DEFAULT_RECORD_TAG = "record";
public static final String DEFAULT_ROOT_TAG = "records";
public static final String DEFAULT_VALUE_TAG = "value";

public DefaultXmlPipe() {
}

Expand Down
7 changes: 7 additions & 0 deletions metafacture-xml/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,10 @@ dependencies {
testImplementation 'org.mockito:mockito-core:2.5.5'
testRuntimeOnly 'org.slf4j:slf4j-simple:1.7.21'
}

test {
testLogging {
showStandardStreams = true
exceptionFormat = 'full'
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,13 @@
@FluxCommand("handle-generic-xml")
public final class GenericXmlHandler extends DefaultXmlPipe<StreamReceiver> {

public static final String DEFAULT_RECORD_TAG = "record";

public static final boolean EMIT_NAMESPACE = false;

private static final Pattern TABS = Pattern.compile("\t+");

private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
private String recordTagName = DEFAULT_RECORD_TAG;
private String valueTagName = DEFAULT_VALUE_TAG;

private boolean inRecord;
private StringBuilder valueBuffer = new StringBuilder();
Expand Down Expand Up @@ -92,6 +92,14 @@ public String getRecordTagName() {
return recordTagName;
}

public void setValueTagName(final String valueTagName) {
this.valueTagName = valueTagName;
}

public String getValueTagName() {
return valueTagName;
}

/**
* Triggers namespace awareness. If set to "true" input data like "foo:bar"
* will be passed through as "foo:bar". For backward compatibility the default
Expand All @@ -110,6 +118,14 @@ public boolean getEmitNamespace() {
return this.emitNamespace;
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startElement(final String uri, final String localName, final String qName, final Attributes attributes) {
if (inRecord) {
Expand Down Expand Up @@ -159,7 +175,7 @@ public void characters(final char[] chars, final int start, final int length) {
private void writeValue() {
final String value = valueBuffer.toString();
if (!value.trim().isEmpty()) {
getReceiver().literal("value", value.replace('\n', ' '));
getReceiver().literal(valueTagName, value.replace('\n', ' '));
}
valueBuffer = new StringBuilder();
}
Expand All @@ -170,7 +186,7 @@ private void writeAttributes(final Attributes attributes) {
for (int i = 0; i < length; ++i) {
final String name = emitNamespace ? attributes.getQName(i) : attributes.getLocalName(i);
final String value = attributes.getValue(i);
getReceiver().literal(name, value);
getReceiver().literal(attributeMarker + name, value);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.metafacture.framework.annotations.In;
import org.metafacture.framework.annotations.Out;
import org.metafacture.framework.helpers.DefaultStreamPipe;
import org.metafacture.framework.helpers.DefaultXmlPipe;

import java.io.IOException;
import java.net.URL;
Expand Down Expand Up @@ -53,9 +54,6 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str

public static final String ATTRIBUTE_MARKER = "~";

public static final String DEFAULT_ROOT_TAG = "records";
public static final String DEFAULT_RECORD_TAG = "record";

private static final String NEW_LINE = "\n";
private static final String INDENT = "\t";

Expand All @@ -72,8 +70,10 @@ public final class SimpleXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Str

private final StringBuilder builder = new StringBuilder();

private String rootTag = DEFAULT_ROOT_TAG;
private String recordTag = DEFAULT_RECORD_TAG;
private String attributeMarker = ATTRIBUTE_MARKER;
private String rootTag = DefaultXmlPipe.DEFAULT_ROOT_TAG;
private String recordTag = DefaultXmlPipe.DEFAULT_RECORD_TAG;
private String valueTag = DefaultXmlPipe.DEFAULT_VALUE_TAG;
private Map<String, String> namespaces = new HashMap<String, String>();
private boolean writeRootTag = true;
private boolean writeXmlHeader = true;
Expand All @@ -96,6 +96,14 @@ public void setRecordTag(final String tag) {
recordTag = tag;
}

public void setValueTag(final String valueTag) {
this.valueTag = valueTag;
}

public String getValueTag() {
return valueTag;
}

public void setNamespaceFile(final String file) {
final Properties properties;
try {
Expand Down Expand Up @@ -146,6 +154,14 @@ public void setNamespaces(final Map<String, String> namespaces) {
this.namespaces = namespaces;
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startRecord(final String identifier) {
if (separateRoots) {
Expand Down Expand Up @@ -192,11 +208,11 @@ public void endEntity() {

@Override
public void literal(final String name, final String value) {
if (name.isEmpty()) {
if (name.equals(valueTag)) {
element.setText(value);
}
else if (name.startsWith(ATTRIBUTE_MARKER)) {
element.addAttribute(name.substring(1), value);
else if (name.startsWith(attributeMarker)) {
element.addAttribute(name.substring(attributeMarker.length()), value);
}
else {
element.createChild(name).setText(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import org.metafacture.framework.StreamReceiver;
import org.mockito.InOrder;
import org.mockito.Mock;
import org.mockito.Mockito;
import org.mockito.MockitoAnnotations;
import org.xml.sax.helpers.AttributesImpl;

Expand Down Expand Up @@ -132,6 +133,28 @@ public void shouldEmitPCDataAsALiteralNamedValue() {
ordered.verify(receiver).literal("value", "char-data");
}

@Test
public void shouldEmitPCDataAsALiteralWithConfiguredValueTagName() {
final String name = "data";
genericXmlHandler.setValueTagName(name);

final char[] charData = "char-data".toCharArray();
genericXmlHandler.startElement("", "record", "record", attributes);
genericXmlHandler.startElement("", "entity", "entity", attributes);
genericXmlHandler.characters(charData, 0, charData.length);
genericXmlHandler.endElement("", "entity", "entity");
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).startEntity("entity");
ordered.verify(receiver).literal(name, "char-data");
ordered.verify(receiver).endEntity();
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

@Test
public void shouldEmitNamespaceOnEntityElementAndAttribute() {
genericXmlHandler.setEmitNamespace(true);
Expand All @@ -141,6 +164,47 @@ public void shouldEmitNamespaceOnEntityElementAndAttribute() {

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startEntity("ns:entity");
ordered.verify(receiver).literal("ns:attr","attr-value");
ordered.verify(receiver).literal("ns:attr", "attr-value");
}

@Test
public void shouldNotEncodeAttributesAsMarkedLiterals() {
attributes.addAttribute("", "attr", "attr", "CDATA", "attr-value");
genericXmlHandler.startElement("", "record", "record", attributes);
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).literal("attr", "attr-value");
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

@Test
public void issue379_shouldEncodeAttributesAsLiteralsWithConfiguredMarker() {
final String marker = "~";
genericXmlHandler.setAttributeMarker(marker);

genericXmlHandler.startElement("", "record", "record", attributes);
attributes.addAttribute("", "authority", "authority", "CDATA", "marcrelator");
attributes.addAttribute("", "type", "type", "CDATA", "text");
genericXmlHandler.startElement("", "roleTerm", "roleTerm", attributes);
final char[] charData = "Author".toCharArray();
genericXmlHandler.characters(charData, 0, charData.length);
genericXmlHandler.endElement("", "roleTerm", "roleTerm");
genericXmlHandler.endElement("", "record", "record");

final InOrder ordered = inOrder(receiver);
ordered.verify(receiver).startRecord("");
ordered.verify(receiver).startEntity("roleTerm");
ordered.verify(receiver).literal(marker + "authority", "marcrelator");
ordered.verify(receiver).literal(marker + "type", "text");
ordered.verify(receiver).literal("value", "Author");
ordered.verify(receiver).endEntity();
ordered.verify(receiver).endRecord();
ordered.verifyNoMoreInteractions();
Mockito.verifyNoMoreInteractions(receiver);
}

}
Loading

0 comments on commit a3fd2cd

Please sign in to comment.