Skip to content

Commit

Permalink
Use fix instead of morph for culturegraph #1058
Browse files Browse the repository at this point in the history
- We do not need a separate filter step since the fix already can do this.
  • Loading branch information
TobiasNx committed Oct 12, 2023
1 parent 23152dc commit 2a661a1
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 37 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,16 @@
import org.metafacture.json.JsonEncoder;
import org.metafacture.mangling.LiteralToObject;
import org.metafacture.metamorph.Filter;
import org.metafacture.metamorph.Metamorph;
import org.metafacture.strings.StringReader;
import org.metafacture.xml.XmlDecoder;
import org.metafacture.xml.XmlElementSplitter;
import org.metafacture.metafix.Metafix;

/**
* Filter resources with hbz holdings from culturegraph marcxml, tranform it
* Filter resources with hbz holdings from culturegraph marcxml while tranform it with reject()
* into JSON and write this as an elasticsearch bulk json file.
*
* @author Pascal Christoph(dr0i)
*
* @author Pascal Christoph(dr0i) & Tobias Bülte(TobiasNx)
**/
@SuppressWarnings("javadoc")
public final class CulturegraphXmlFilterHbzToJson {
Expand All @@ -32,7 +32,7 @@ public final class CulturegraphXmlFilterHbzToJson {

public static void main(String... args) {
String XML_INPUT_FILE =new File(args[0]).getAbsolutePath();

if (args.length >1) JSON_FILE=args[1];

final FileOpener opener = new FileOpener();
Expand All @@ -55,10 +55,8 @@ public static void main(String... args) {
private static StringReader receiverThread() {
final StringReader sr = new StringReader();
sr.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(new Filter( // prevents empty records
new Metamorph("src/main/resources/morph-cg-to-es.xml")))
.setReceiver(
new Metamorph("src/main/resources/morph-cg-to-es.xml"))
new Metafix("src/main/resources/fix-cg-to-es.fix"))
.setReceiver(new JsonEncoder())
.setReceiver(new JsonToElasticsearchBulk("rvk",
ELASTICSEARCH_INDEX_NAME))
Expand Down
29 changes: 29 additions & 0 deletions src/main/resources/fix-cg-to-es.fix
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
set_array("rvk[]")

do list(path: "084??", "var": "$i")
if any_match("$i.2", "rvk")
copy_field("$i.a","rvk[].$append")
end
end

set_array("id")
do list(path: "035??", "var": "$i")
if any_match("$i.a", "^\\(DE-605\\)(.*)")
copy_field("$i.a","id.$append")
end
end
replace_all("id.*","^\\(DE-605\\)(.*)","$1")
join_field("id",", ")

retain("rvk[]","id")
vacuum()

# Filter records without RVK
unless exists("rvk[]")
reject()
end

# Filter records without hbz ids
unless exists("id")
reject()
end
29 changes: 0 additions & 29 deletions src/main/resources/morph-cg-to-es.xml

This file was deleted.

0 comments on commit 2a661a1

Please sign in to comment.