-
Notifications
You must be signed in to change notification settings - Fork 7
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Generate also CSV from CultureGraph as hbz-RVK concordance (#1085)
- Loading branch information
Showing
9 changed files
with
2,023 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
47 changes: 47 additions & 0 deletions
47
src/main/java/org/lobid/resources/run/CulturegraphXmlFilterHbzRvkToCsv.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ | ||
|
||
package org.lobid.resources.run; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
|
||
import org.metafacture.biblio.marc21.MarcXmlHandler; | ||
import org.metafacture.csv.CsvEncoder; | ||
import org.metafacture.io.FileOpener; | ||
import org.metafacture.io.ObjectWriter; | ||
import org.metafacture.xml.XmlDecoder; | ||
import org.metafacture.metafix.Metafix; | ||
|
||
/** | ||
* Filter resources with hbz holdings from culturegraph's MARCXML while tranform it with reject() | ||
* into a CSV file. | ||
* | ||
* @author Pascal Christoph (dr0i) | ||
* @author Tobias Bülte (TobiasNx) | ||
**/ | ||
public final class CulturegraphXmlFilterHbzRvkToCsv { | ||
private static String OUTPUT_FILE="cg-concordance.csv"; | ||
|
||
public static void main(String... args) { | ||
String XML_INPUT_FILE = new File(args[0]).getAbsolutePath(); | ||
|
||
if (args.length > 1) OUTPUT_FILE = args[1]; | ||
|
||
final FileOpener opener = new FileOpener(); | ||
try { | ||
opener.setReceiver(new XmlDecoder()).setReceiver(new MarcXmlHandler()) | ||
.setReceiver(new Metafix("src/main/resources/fix-cg-to-es.fix")) | ||
.setReceiver(new CsvEncoder()) | ||
.setReceiver(new ObjectWriter<>(OUTPUT_FILE)); | ||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
opener.process( | ||
new File(XML_INPUT_FILE).getAbsolutePath()); | ||
try { | ||
opener.closeStream(); | ||
} catch (final NullPointerException e) { | ||
// ignore, see https://github.com/hbz/lobid-resources/issues/1030 | ||
} | ||
} | ||
} |
45 changes: 45 additions & 0 deletions
45
src/test/java/org/lobid/resources/CulturegraphXmlFilterHbzRvkToCsvTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
/* Copyright 2020 hbz, Pascal Christoph. Licensed under the EPL 2.0*/ | ||
|
||
package org.lobid.resources; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
|
||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import org.junit.Test; | ||
import org.lobid.resources.run.CulturegraphXmlFilterHbzRvkToCsv; | ||
|
||
/** | ||
* Test of filtering resources with hbz holdings from culturegraph MARCXML, | ||
* tranforming into a CSV file. | ||
* | ||
* @author Pascal Christoph(dr0i) | ||
**/ | ||
public final class CulturegraphXmlFilterHbzRvkToCsvTest { | ||
|
||
private static final Logger LOG = | ||
LoggerFactory.getLogger(CulturegraphXmlFilterHbzRvkToCsvTest.class); | ||
|
||
private static final String PATH_TO_TEST = "src/test/resources/"; | ||
public static final String OUTPUT_FILE = | ||
PATH_TO_TEST + "cg/output.csv"; | ||
|
||
private static final String XML_INPUT_FILE = "cg/aggregate_20240507_example.marcxml"; | ||
|
||
@SuppressWarnings("static-method") | ||
@Test | ||
public void testExtractLookupTableFromCgAsHbzRvk() { | ||
CulturegraphXmlFilterHbzRvkToCsv.main(PATH_TO_TEST + XML_INPUT_FILE, | ||
OUTPUT_FILE); | ||
} | ||
|
||
/**private static void ingest() throws IOException { | ||
File jsonFile = new File(OUTPUT_FILE); | ||
}*/ | ||
|
||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.