Skip to content

Commit

Permalink
Generate also CSV from CultureGraph as hbz-RVK concordance (#1085)
Browse files Browse the repository at this point in the history
  • Loading branch information
dr0i committed May 28, 2024
1 parent 848c948 commit f6c150f
Show file tree
Hide file tree
Showing 9 changed files with 2,023 additions and 116 deletions.
5 changes: 5 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,11 @@
<artifactId>metafacture-elasticsearch</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.metafacture</groupId>
<artifactId>metafacture-csv</artifactId>
<version>6.0.0</version>
</dependency>
<dependency>
<groupId>org.metafacture</groupId>
<artifactId>metafacture-flowcontrol</artifactId>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/

package org.lobid.resources.run;

import java.io.File;
import java.io.IOException;

import org.metafacture.biblio.marc21.MarcXmlHandler;
import org.metafacture.csv.CsvEncoder;
import org.metafacture.io.FileOpener;
import org.metafacture.io.ObjectWriter;
import org.metafacture.xml.XmlDecoder;
import org.metafacture.metafix.Metafix;

/**
* Filter resources with hbz holdings from culturegraph's MARCXML while tranform it with reject()
* into a CSV file.
*
* @author Pascal Christoph (dr0i)
* @author Tobias Bülte (TobiasNx)
**/
public final class CulturegraphXmlFilterHbzRvkToCsv {
private static String OUTPUT_FILE="cg-concordance.csv";

public static void main(String... args) {
String XML_INPUT_FILE = new File(args[0]).getAbsolutePath();

if (args.length > 1) OUTPUT_FILE = args[1];

final FileOpener opener = new FileOpener();
try {
opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler())
.setReceiver(new Metafix("src/main/resources/fix-cg-to-es.fix"))
.setReceiver(new CsvEncoder())
.setReceiver(new ObjectWriter<>(OUTPUT_FILE));
} catch (IOException e) {
e.printStackTrace();
}
opener.process(
new File(XML_INPUT_FILE).getAbsolutePath());
try {
opener.closeStream();
} catch (final NullPointerException e) {
// ignore, see https://github.com/hbz/lobid-resources/issues/1030
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/

package org.lobid.resources;

import static org.junit.Assert.assertEquals;

import java.io.File;
import java.io.IOException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.junit.Test;
import org.lobid.resources.run.CulturegraphXmlFilterHbzRvkToCsv;

/**
* Test of filtering resources with hbz holdings from culturegraph MARCXML,
* tranforming into a CSV file.
*
* @author Pascal Christoph(dr0i)
**/
public final class CulturegraphXmlFilterHbzRvkToCsvTest {

private static final Logger LOG =
LoggerFactory.getLogger(CulturegraphXmlFilterHbzRvkToCsvTest.class);

private static final String PATH_TO_TEST = "src/test/resources/";
public static final String OUTPUT_FILE =
PATH_TO_TEST + "cg/output.csv";

private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml";

@SuppressWarnings("static-method")
@Test
public void testExtractLookupTableFromCgAsHbzRvk() {
CulturegraphXmlFilterHbzRvkToCsv.main(PATH_TO_TEST + XML_INPUT_FILE,
OUTPUT_FILE);
}

/**private static void ingest() throws IOException {
File jsonFile = new File(OUTPUT_FILE);
}*/


}
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,9 @@ public final class CulturegraphXmlFilterHbzToJsonTest {

private static final String PATH_TO_TEST = "src/test/resources/";
public static final String JSON_OUTPUT_FILE =
PATH_TO_TEST + "jsonld-cg/bulk.ndjson";
PATH_TO_TEST + "cg/output-es-bulk.ndjson";

private static final String XML_INPUT_FILE = "aggregate_20240507_example.marcxml";
private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml";
private static PluginConfigurableNode node;
private static Client client;
private static final int ELASTICSEARCH_HTTP_PORT = 19200;
Expand Down
Loading

0 comments on commit f6c150f

Please sign in to comment.