From e981f30178ebfdac38322e3219ad08dda1c35c4b Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 9 Oct 2024 11:48:33 +0200 Subject: [PATCH 1/4] Rely on autoboxing for entity count stack. --- metafix/src/main/java/org/metafacture/metafix/Metafix.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/Metafix.java b/metafix/src/main/java/org/metafacture/metafix/Metafix.java index cbcfedcb..7d4bd12e 100644 --- a/metafix/src/main/java/org/metafacture/metafix/Metafix.java +++ b/metafix/src/main/java/org/metafacture/metafix/Metafix.java @@ -239,7 +239,7 @@ public void startRecord(final String identifier) { flattener.startRecord(identifier); entityCountStack.clear(); entityCount = 0; - entityCountStack.add(Integer.valueOf(entityCount)); + entityCountStack.add(entityCount); recordIdentifier = identifier; entities = new ArrayList<>(); } @@ -317,13 +317,13 @@ public void startEntity(final String name) { addValue(name, value); entities.add(value); - entityCountStack.push(Integer.valueOf(++entityCount)); + entityCountStack.push(++entityCount); flattener.startEntity(name); } @Override public void endEntity() { - entityCountStack.pop().intValue(); + entityCountStack.pop(); flattener.endEntity(); } From 1fed102d0f1a9786c5536c9eb5beea0689989da1 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Wed, 9 Oct 2024 14:43:55 +0200 Subject: [PATCH 2/4] Optionally specify limit for number of entities in a record. This is a brute-force approach to dealing with OOM situations when Alma records have an excessive number of items (e.g. 99374518570506441: >12000 entities = ~10 GB heap for the Record instance). Use Metafix instance setter `setMaxEntityCount(int)` or set system property `org.metafacture.metafix.maxEntityCount=`. Alternative options: - Increase maximum heap size for JVM. - Significantly reduce memory requirement for Record instances. --- metafix-runner/build.gradle | 10 +++++++ .../java/org/metafacture/metafix/Metafix.java | 29 ++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/metafix-runner/build.gradle b/metafix-runner/build.gradle index fe2f8d9b..9ebdd3e7 100644 --- a/metafix-runner/build.gradle +++ b/metafix-runner/build.gradle @@ -49,3 +49,13 @@ application { ] } } + +tasks.withType(JavaExec) { + doFirst { + def prefix = project.group + '.' + + System.properties.each { k, v -> + if (k.startsWith(prefix)) systemProperties[k] = v + } + } +} diff --git a/metafix/src/main/java/org/metafacture/metafix/Metafix.java b/metafix/src/main/java/org/metafacture/metafix/Metafix.java index 7d4bd12e..7b306041 100644 --- a/metafix/src/main/java/org/metafacture/metafix/Metafix.java +++ b/metafix/src/main/java/org/metafacture/metafix/Metafix.java @@ -101,6 +101,7 @@ public class Metafix implements StreamPipe, Maps { private boolean repeatedFieldsToEntities; private boolean strictnessHandlesProcessExceptions; private int entityCount; + private int maxEntityCount = Integer.getInteger("org.metafacture.metafix.maxEntityCount", -1); public Metafix() { this(NO_VARS); @@ -313,22 +314,36 @@ public void startEntity(final String name) { throw new IllegalArgumentException("Entity name must not be null."); } + ++entityCount; + if (maxEntityCountExceeded()) { + LOG.debug("Maximum number of entities exceeded: {}/{}", entityCount, maxEntityCount); + return; + } + final Value value = isArrayName(name) ? Value.newArray() : Value.newHash(); addValue(name, value); entities.add(value); - entityCountStack.push(++entityCount); + entityCountStack.push(entityCount); flattener.startEntity(name); } @Override public void endEntity() { + if (maxEntityCountExceeded()) { + return; + } + entityCountStack.pop(); flattener.endEntity(); } @Override public void literal(final String name, final String value) { + if (entityCountStack.size() > 1 && maxEntityCountExceeded()) { + return; + } + LOG.debug("Putting '{}': '{}'", name, value); flattener.literal(name, value); } @@ -438,6 +453,18 @@ public String getEntityMemberName() { return entityMemberName; } + public void setMaxEntityCount(final int maxEntityCount) { + this.maxEntityCount = maxEntityCount; + } + + public int getMaxEntityCount() { + return maxEntityCount; + } + + private boolean maxEntityCountExceeded() { + return maxEntityCount >= 0 && entityCount > maxEntityCount; + } + public enum Strictness { /** From dc4f1af86546010736f7a3be1709e8ad63c5a7dc Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Fri, 18 Oct 2024 12:05:24 +0200 Subject: [PATCH 3/4] Implement optimizations for limiting number of entities. (#373) --- .../java/org/metafacture/metafix/Metafix.java | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/metafix/src/main/java/org/metafacture/metafix/Metafix.java b/metafix/src/main/java/org/metafacture/metafix/Metafix.java index 7b306041..478d6268 100644 --- a/metafix/src/main/java/org/metafacture/metafix/Metafix.java +++ b/metafix/src/main/java/org/metafacture/metafix/Metafix.java @@ -76,6 +76,8 @@ public class Metafix implements StreamPipe, Maps { public static final Map NO_VARS = Collections.emptyMap(); + public static final int MAX_ENTITY_COUNT = Integer.getInteger("org.metafacture.metafix.maxEntityCount", -1); + private static final Logger LOG = LoggerFactory.getLogger(Metafix.class); private static final String ENTITIES_NOT_BALANCED = "Entity starts and ends are not balanced"; @@ -101,7 +103,6 @@ public class Metafix implements StreamPipe, Maps { private boolean repeatedFieldsToEntities; private boolean strictnessHandlesProcessExceptions; private int entityCount; - private int maxEntityCount = Integer.getInteger("org.metafacture.metafix.maxEntityCount", -1); public Metafix() { this(NO_VARS); @@ -316,7 +317,7 @@ public void startEntity(final String name) { ++entityCount; if (maxEntityCountExceeded()) { - LOG.debug("Maximum number of entities exceeded: {}/{}", entityCount, maxEntityCount); + LOG.debug("Maximum number of entities exceeded: {}/{}", entityCount, MAX_ENTITY_COUNT); return; } @@ -340,7 +341,7 @@ public void endEntity() { @Override public void literal(final String name, final String value) { - if (entityCountStack.size() > 1 && maxEntityCountExceeded()) { + if (maxEntityCountExceeded()) { return; } @@ -453,16 +454,8 @@ public String getEntityMemberName() { return entityMemberName; } - public void setMaxEntityCount(final int maxEntityCount) { - this.maxEntityCount = maxEntityCount; - } - - public int getMaxEntityCount() { - return maxEntityCount; - } - private boolean maxEntityCountExceeded() { - return maxEntityCount >= 0 && entityCount > maxEntityCount; + return MAX_ENTITY_COUNT >= 0 && entityCount > MAX_ENTITY_COUNT; } public enum Strictness { From a10b3d8e506731a02c7d9c69e2b27e517d1d4891 Mon Sep 17 00:00:00 2001 From: Jens Wille Date: Tue, 3 Dec 2024 15:34:57 +0100 Subject: [PATCH 4/4] Add integration test for limiting number of entities. (#373) --- metafix/integrationTest.sh | 6 ++++-- .../script/fromJson/toJson/maxEntityCount/expected.json | 3 +++ .../script/fromJson/toJson/maxEntityCount/input.json | 3 +++ .../script/fromJson/toJson/maxEntityCount/metafix.args | 1 + .../script/fromJson/toJson/maxEntityCount/test.fix | 1 + .../script/fromJson/toJson/maxEntityCount/test.flux | 8 ++++++++ 6 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/expected.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/input.json create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/metafix.args create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.fix create mode 100644 metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.flux diff --git a/metafix/integrationTest.sh b/metafix/integrationTest.sh index 82e5371b..7533c33c 100755 --- a/metafix/integrationTest.sh +++ b/metafix/integrationTest.sh @@ -74,7 +74,8 @@ function rm_temp() { } function run_metafix() { - $gradle_command --console=plain -p "$root_directory" :metafix-runner:run --args="$1" -P${noprofile}profile="${1%.*}" + local file=$1; shift + $gradle_command --console=plain -p "$root_directory" :metafix-runner:run --args="$file" -P${noprofile}profile="${file%.*}" $@ } function run_catmandu() { @@ -224,10 +225,11 @@ function run_tests() { metafix_command_output="$test_directory/metafix.out" metafix_command_error="$test_directory/metafix.err" + metafix_command_args="$test_directory/metafix.args" metafix_start_time=$(current_time) - run_metafix "$test_directory/$metafix_file" >"$metafix_command_output" 2>"$metafix_command_error" + run_metafix "$test_directory/$metafix_file" $(cat "$metafix_command_args" 2>/dev/null || true) >"$metafix_command_output" 2>"$metafix_command_error" metafix_exit_status=$? metafix_elapsed_time=$(elapsed_time "$metafix_start_time") diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/expected.json b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/expected.json new file mode 100644 index 00000000..3a2407ce --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/expected.json @@ -0,0 +1,3 @@ +{"key1":"value1","key2":"value2","key3":"value3","key4":"value4"} +{"key1":"value1","key2":["v1","v2"]} +{"key1":"value1","key2":["v1","v2"],"key3":"value3","key4":"value4"} diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/input.json b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/input.json new file mode 100644 index 00000000..42c84b5a --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/input.json @@ -0,0 +1,3 @@ +{"key1":"value1","key2":"value2","key3":"value3","key4":"value4"} +{"key1":"value1","key2":["v1","v2"],"key3":["v3"],"key4":"value4"} +{"key1":"value1","key2":["v1","v2"],"key3":"value3","key4":"value4"} diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/metafix.args b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/metafix.args new file mode 100644 index 00000000..d59d5387 --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/metafix.args @@ -0,0 +1 @@ +-Dorg.metafacture.metafix.maxEntityCount=1 diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.fix b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.fix new file mode 100644 index 00000000..174b9a02 --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.fix @@ -0,0 +1 @@ +nothing() diff --git a/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.flux b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.flux new file mode 100644 index 00000000..ec507521 --- /dev/null +++ b/metafix/src/test/resources/org/metafacture/metafix/integration/script/fromJson/toJson/maxEntityCount/test.flux @@ -0,0 +1,8 @@ +FLUX_DIR + "input.json" +|open-file +|as-lines +|decode-json +|fix(FLUX_DIR + "test.fix") +|encode-json +|write(FLUX_DIR + "output-metafix.json") +;