Skip to content

Commit

Permalink
Refactoring validateCorpus methods
Browse files Browse the repository at this point in the history
  • Loading branch information
MaximPlusov committed Apr 30, 2024
1 parent eb71ab6 commit 42ec23a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 53 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -198,56 +198,27 @@ public String toString() {
* @param validator
* @return
*/
public static ResultSet validateCorpus(final TestCorpus corpus, final PDFAValidator validator) {
public static ResultSet validateCorpus(final TestCorpus corpus, final PDFAValidator validator, final PDFAFlavour flavour) {
Set<Result> results = new HashSet<>();
Set<Incomplete> exceptions = new HashSet<>();
Components.Timer batchTimer = Components.Timer.start();
long maxMemUse = 0;
for (String itemName : corpus.getItemNames()) {
System.out.println(itemName);
CorpusItemId id = null;
Components.Timer jobTimer = Components.Timer.start();
try {
id = CorpusItemIdImpl.fromFileName(validator.getProfile().getPDFAFlavour().getPart(), itemName, "");
} catch (IllegalArgumentException excep) {
LOG.log(Level.FINE, "Problem generating ID for corpus item:" + itemName, excep);
}
if (id != null) {
try (PDFAParser loader = Foundries.defaultInstance().createParser(corpus.getItemStream(itemName),
validator.getProfile().getPDFAFlavour())) {
ValidationResult result = validator.validate(loader);
long memUsed = (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed() / MEGABYTE);
maxMemUse = (memUsed > maxMemUse) ? memUsed : maxMemUse;
results.add(new Result(id, result, jobTimer.stop(), memUsed));
} catch (Throwable e) {
LOG.log(Level.SEVERE, String.format("Caught throwable testing %s from corpus %s", itemName,
corpus.getDetails().getName()));
LOG.log(Level.SEVERE, e.getClass().getName());
LOG.log(Level.SEVERE, e.getMessage());
exceptions.add(new Incomplete(id, e));
}
}
}
return new ResultSetImpl(corpus.getDetails(), corpus.getType().getId(), validator.getProfile(), results, exceptions, batchTimer.stop(),
maxMemUse);
}

public static ResultSet validateCorpus(final TestCorpus corpus) {
Set<Result> results = new HashSet<>();
Set<Incomplete> exceptions = new HashSet<>();
Components.Timer batchTimer = Components.Timer.start();
long maxMemUse = 0;
for (String itemName : corpus.getItemNames()) {
CorpusItemId id = null;
Components.Timer jobTimer = Components.Timer.start();
try (PDFAParser loader = Foundries.defaultInstance().createParser(corpus.getItemStream(itemName))) {
PDFAFlavour flavour = loader.getFlavour();
try (PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false)) {
id = CorpusItemIdImpl.fromFileName(validator.getProfile().getPDFAFlavour().getPart(), itemName, "");
ValidationResult result = validator.validate(loader);
long memUsed = (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed() / MEGABYTE);
maxMemUse = (memUsed > maxMemUse) ? memUsed : maxMemUse;
results.add(new Result(id, result, jobTimer.stop(), memUsed));
try (PDFAParser loader = Foundries.defaultInstance().createParser(corpus.getItemStream(itemName), flavour);
PDFAValidator newValidator = flavour != PDFAFlavour.NO_FLAVOUR ? null : Foundries.defaultInstance().createValidator(loader.getFlavour(), false)) {
PDFAValidator currentValidator = flavour != PDFAFlavour.NO_FLAVOUR ? validator : newValidator;
try {
id = CorpusItemIdImpl.fromFileName(currentValidator.getProfile().getPDFAFlavour().getPart(), itemName, "");
} catch (IllegalArgumentException excep) {
LOG.log(Level.FINE, "Problem generating ID for corpus item:" + itemName, excep);
}
ValidationResult result = currentValidator.validate(loader);
long memUsed = (ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getUsed() / MEGABYTE);
maxMemUse = (memUsed > maxMemUse) ? memUsed : maxMemUse;
results.add(new Result(id, result, jobTimer.stop(), memUsed));
} catch (Throwable e) {
LOG.log(Level.SEVERE, String.format("Caught throwable testing %s from corpus %s", itemName,
corpus.getDetails().getName()));
Expand All @@ -256,7 +227,8 @@ public static ResultSet validateCorpus(final TestCorpus corpus) {
exceptions.add(new Incomplete(id, e));
}
}
return new ResultSetImpl(corpus.getDetails(), corpus.getType().getId(), Profiles.defaultProfile(), results, exceptions, batchTimer.stop(),
return new ResultSetImpl(corpus.getDetails(), corpus.getType().getId(), flavour != PDFAFlavour.NO_FLAVOUR ?
validator.getProfile() : Profiles.defaultProfile(), results, exceptions, batchTimer.stop(),
maxMemUse);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,7 @@
import java.io.InputStream;
import java.io.PrintWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.*;

import org.junit.AfterClass;
import org.junit.BeforeClass;
Expand Down Expand Up @@ -127,14 +120,14 @@ private static void testCorpora(final List<ResultSet> resultSets) {
for (TestCorpus corpus : CorpusManager.corporaForFlavour(flavour)) {
if (flavour != PDFAFlavour.NO_FLAVOUR) {
try (PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false)) {
ResultSet results = ResultSetImpl.validateCorpus(corpus, validator);
ResultSet results = ResultSetImpl.validateCorpus(corpus, validator, flavour);
resultSets.add(results);
} catch (IOException excep) {
// Just exception closing validator
excep.printStackTrace();
}
} else {
ResultSet results = ResultSetImpl.validateCorpus(corpus);
ResultSet results = ResultSetImpl.validateCorpus(corpus, null, flavour);
resultSets.add(results);
}
}
Expand Down

0 comments on commit 42ec23a

Please sign in to comment.