Skip to content

Commit

Permalink
Optionally encode marked literals as MARC record attributes.
Browse files Browse the repository at this point in the history
Fixes #402.
  • Loading branch information
blackwinter committed Oct 4, 2021
1 parent f048c33 commit cd6aa08
Show file tree
Hide file tree
Showing 2 changed files with 136 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,10 @@
@Out(String.class)
@FluxCommand("encode-marcxml")
public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<String>> {

public static final String DEFAULT_ATTRIBUTE_MARKER = null;
private static final String ATTRIBUTE_TEMPLATE = " %s=\"%s\"";

private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
private static final String ROOT_CLOSE = "</marc:collection>";

Expand Down Expand Up @@ -83,6 +87,9 @@ public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Strin
private int indentationLevel;
private boolean formatted;

private String attributeMarker = DEFAULT_ATTRIBUTE_MARKER;
private int recordAttributeOffset;

public MarcXmlEncoder() {
this.builder = new StringBuilder();
this.atStreamStart = true;
Expand Down Expand Up @@ -119,6 +126,14 @@ public void setFormatted(final boolean formatted) {
this.formatted = formatted;
}

public void setAttributeMarker(final String attributeMarker) {
this.attributeMarker = attributeMarker;
}

public String getAttributeMarker() {
return attributeMarker;
}

@Override
public void startRecord(final String identifier) {
if (atStreamStart) {
Expand All @@ -134,6 +149,7 @@ public void startRecord(final String identifier) {

prettyPrintIndentation();
writeRaw(RECORD_OPEN);
recordAttributeOffset = builder.length() - 1;
prettyPrintNewLine();

incrementIndentationLevel();
Expand Down Expand Up @@ -182,7 +198,10 @@ public void endEntity() {
@Override
public void literal(final String name, final String value) {
if ("".equals(currentEntity)) {
if (!writeLeader(name, value)) {
if (attributeMarker != null && name.startsWith(attributeMarker)) {
builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name.substring(attributeMarker.length()), value));
}
else if (!writeLeader(name, value)) {
prettyPrintIndentation();
writeRaw(String.format(CONTROLFIELD_OPEN_TEMPLATE, name));
if (value != null) {
Expand All @@ -199,7 +218,6 @@ else if (!writeLeader(currentEntity, value)) {
writeRaw(SUBFIELD_CLOSE);
prettyPrintNewLine();
}

}

@Override
Expand Down Expand Up @@ -276,5 +294,7 @@ private void prettyPrintNewLine() {
private void sendAndClearData() {
getReceiver().process(builder.toString());
builder.delete(0, builder.length());
recordAttributeOffset = 0;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,10 @@
*/

public class MarcXmlEncoderTest {
private static StringBuilder resultCollector;
private static MarcXmlEncoder encoder;

private static final String TAG = "tag";
private static final String VALUE = "value";

private static final String XML_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
private static final String XML_1_DECLARATION = "<?xml version=\"1.1\" encoding=\"UTF-8\"?>";
private static final String XML_16_DECLARATION = "<?xml version=\"1.0\" encoding=\"UTF-16\"?>";
Expand All @@ -48,6 +50,9 @@ public class MarcXmlEncoderTest {
private static final String XML_MARC_COLLECTION_END_TAG = "</marc:collection>";
private static final String RECORD_ID = "92005291";

private static MarcXmlEncoder encoder;
private static StringBuilder resultCollector;

@Before
public void setUp() {
encoder = new MarcXmlEncoder();
Expand Down Expand Up @@ -198,6 +203,112 @@ public void issue336_createRecordWithTopLevelLeader() {
assertEquals(expected, actual);
}

@Test
public void shouldNotEncodeMarkedLiteralsAsAttributes() {
encoder.startRecord("");
encoder.literal(TAG, VALUE);
encoder.literal("~attr", VALUE);
encoder.endRecord();
encoder.closeStream();

assertEquals(XML_DECLARATION +
XML_ROOT_OPEN +
"<marc:record>" +
"<marc:controlfield tag=\"tag\">value</marc:controlfield>" +
"<marc:controlfield tag=\"~attr\">value</marc:controlfield>" +
"</marc:record>" +
XML_MARC_COLLECTION_END_TAG,
resultCollector.toString());
}

@Test
public void issue402_shouldEncodeMarkedLiteralsWithConfiguredMarkerAsAttributes() {
final String marker = "~";
encoder.setAttributeMarker(marker);

encoder.startRecord("");
encoder.literal(TAG, VALUE);
encoder.literal(marker + "attr", VALUE);
encoder.endRecord();
encoder.closeStream();

assertEquals(XML_DECLARATION +
XML_ROOT_OPEN +
"<marc:record attr=\"value\">" +
"<marc:controlfield tag=\"tag\">value</marc:controlfield>" +
"</marc:record>" +
XML_MARC_COLLECTION_END_TAG,
resultCollector.toString());
}

@Test
public void shouldNotEncodeNestedMarkedLiteralsAsAttributes() {
final String marker = "~";
encoder.setAttributeMarker(marker);

encoder.startRecord("");
encoder.startEntity("tag12");
encoder.literal(TAG, VALUE);
encoder.literal(marker + "attr", VALUE);
encoder.endEntity();
encoder.endRecord();
encoder.closeStream();

assertEquals(XML_DECLARATION +
XML_ROOT_OPEN +
"<marc:record>" +
"<marc:datafield tag=\"tag\" ind1=\"1\" ind2=\"2\">" +
"<marc:subfield code=\"tag\">value</marc:subfield>" +
"<marc:subfield code=\"~attr\">value</marc:subfield>" +
"</marc:datafield>" +
"</marc:record>" +
XML_MARC_COLLECTION_END_TAG,
resultCollector.toString());
}

@Test
public void shouldNotEncodeMarkedEntitiesWithConfiguredMarkerAsAttributes() {
final String marker = "~";
encoder.setAttributeMarker(marker);

encoder.startRecord("");
encoder.startEntity(marker + "data");
encoder.literal(TAG, VALUE);
encoder.endEntity();
encoder.endRecord();
encoder.closeStream();

assertEquals(XML_DECLARATION +
XML_ROOT_OPEN +
"<marc:record>" +
"<marc:datafield tag=\"~da\" ind1=\"t\" ind2=\"a\">" +
"<marc:subfield code=\"tag\">value</marc:subfield>" +
"</marc:datafield>" +
"</marc:record>" +
XML_MARC_COLLECTION_END_TAG,
resultCollector.toString());
}

@Test
public void shouldNotEncodeLiteralsWithDifferentMarkerAsAttributes() {
encoder.setAttributeMarker("*");

encoder.startRecord("");
encoder.literal(TAG, VALUE);
encoder.literal("~attr", VALUE);
encoder.endRecord();
encoder.closeStream();

assertEquals(XML_DECLARATION +
XML_ROOT_OPEN +
"<marc:record>" +
"<marc:controlfield tag=\"tag\">value</marc:controlfield>" +
"<marc:controlfield tag=\"~attr\">value</marc:controlfield>" +
"</marc:record>" +
XML_MARC_COLLECTION_END_TAG,
resultCollector.toString());
}

@Test
public void sendDataAndClearWhenRecordStartedAndStreamResets() {
encoder.startRecord("1");
Expand Down Expand Up @@ -229,4 +340,5 @@ public void shouldIgnoreNullValueOfLiteral() {
String actual = resultCollector.toString();
assertEquals(expected, actual);
}

}

0 comments on commit cd6aa08

Please sign in to comment.