Skip to content

Commit

Permalink
Add filter to omit empty records
Browse files Browse the repository at this point in the history
Not all input records are of interest. They are passed empty. With this filter
empty records are ignored, not passed.

See #1058.
  • Loading branch information
dr0i committed Apr 22, 2020
1 parent 3713c9c commit d12b7a3
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 12 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@
import org.metafacture.io.ObjectWriter;
import org.metafacture.json.JsonEncoder;
import org.metafacture.mangling.LiteralToObject;
import org.metafacture.metamorph.Filter;
import org.metafacture.metamorph.Metamorph;
import org.metafacture.strings.StringReader;
import org.metafacture.xml.XmlDecoder;
import org.metafacture.xml.XmlElementSplitter;


/**
* Filter resources with hbz holdings from culturegraph marcxml, tranform it
* into JSON and write this as an elasticsearch bulk json file.
Expand Down Expand Up @@ -47,6 +47,8 @@ public static void main(String... args) {
private static StringReader receiverThread() {
final StringReader sr = new StringReader();
sr.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(new Filter(
new Metamorph("src/main/resources/morph-cg-to-es.xml")))
.setReceiver(
new Metamorph("src/main/resources/morph-cg-to-es.xml"))
.setReceiver(new JsonEncoder())
Expand Down
10 changes: 7 additions & 3 deletions src/main/resources/morph-cg-to-es.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,15 @@
<!-- ####################### -->
<!-- ####### Get subject uri of each rda record -->
<!-- ####################### -->
<entity name="hbzId[]" flushWith="record" >
<combine name="@hbzId" value="${id}">
<data source="035??.a" name="id">
<regexp match="^\(DE-605\)(.*)" format="${1}"/>
</data>
</entity>
<data source="084??.a" name="rvk"/>
</combine>
<combine name="rvk" value="${rvk}" >
<data source="084??.a" name="rvk"/>
<data source="@hbzId"/>
</combine>
<data source="@hbzId" name="id" />
</rules>
</metamorph>
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import org.metafacture.io.ObjectWriter;
import org.metafacture.json.JsonEncoder;
import org.metafacture.mangling.LiteralToObject;
import org.metafacture.metamorph.Filter;
import org.metafacture.metamorph.Metamorph;
import org.metafacture.strings.StringReader;
import org.metafacture.xml.XmlDecoder;
Expand Down Expand Up @@ -76,8 +77,9 @@ static void et() {
private static StringReader receiverThread() {
final StringReader sr = new StringReader();
sr.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(
new Metamorph("src/main/resources/morph-cg-to-es.xml"))
.setReceiver(new Filter(
new Metamorph("src/main/resources/morph-cg-to-es.xml")))
.setReceiver(new Metamorph("src/main/resources/morph-cg-to-es.xml"))
.setReceiver(new JsonEncoder())
.setReceiver(new JsonToElasticsearchBulk("rvk", "cg"))
.setReceiver(new ObjectWriter<>(JSON_TEST_FILE));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
<marc:subfield code="8">8\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1=" " tag="035">
<marc:subfield code="a">(DE-605)HT013317056</marc:subfield>
<marc:subfield code="a">(DE-608)HT013317056</marc:subfield>
<marc:subfield code="8">1\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1=" " tag="035">
Expand Down Expand Up @@ -550,7 +550,7 @@
<marc:subfield code="8">9\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1="0" tag="689">
<marc:subfield code="5">DE-605</marc:subfield>
<marc:subfield code="5">DE-608</marc:subfield>
<marc:subfield code="8">1\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2="0" ind1="0" tag="689">
Expand Down Expand Up @@ -1444,7 +1444,7 @@
<marc:subfield code="8">13\p</marc:subfield>
</marc:datafield>
<marc:datafield ind2=" " ind1="0" tag="689">
<marc:subfield code="5">DE-605</marc:subfield>
<marc:subfield code="5">DE-608</marc:subfield>
<marc:subfield code="8">1\p</marc:subfield>
<marc:subfield code="8">7\p</marc:subfield>
<marc:subfield code="8">9\p</marc:subfield>
Expand Down
4 changes: 1 addition & 3 deletions src/test/resources/jsonld-cg/bulk.ndjson
Original file line number Diff line number Diff line change
@@ -1,4 +1,2 @@
{"index":{"_index":"cg","_type":"rvk","_id":null}}
{"rvk":["LG 8100","56","13","6,15","48","LS 30100","FB 4019","LQ 88600","LR 11121","6,12","6,11","LS 16200","13.1c","930","3.1","900","15.07","60","15.17","16.3","63","NH 6880","24.50","9,2"],"hbzId":["HT013317056"]}
{"index":{"_index":"cg","_type":"rvk","_id":null}}
{"rvk":["4.7p","CI 5837","CI 1125","CI 5604","CI 5603","100","IH 34381","CI 1100","10.02","CI 5310","08.31","5,1","EC 2430","10"],"hbzId":["HT013166356","HT018625006","TT000577460"]}
{"rvk":["4.7p","CI 5837","CI 1125","CI 5604","CI 5603","100","IH 34381","CI 1100","10.02","CI 5310","08.31","5,1","EC 2430","10"],"id":["TT000577460","HT018625006","HT013166356"]}

0 comments on commit d12b7a3

Please sign in to comment.