Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed fetch issue #103

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,134 +20,134 @@
import gov.loc.repository.bagit.util.PathUtils;

/**
* Part of the BagIt conformance suite.
* This checker checks for various problems related to the manifests in a bag.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

as far as I can tell, this file only contains formatting changes, is that correct?

* Part of the BagIt conformance suite. This checker checks for various problems
* related to the manifests in a bag.
*/
@SuppressWarnings({"PMD.UseLocaleWithCaseConversions"})
public final class ManifestChecker {
public final class ManifestChecker{

private static final Logger logger = LoggerFactory.getLogger(ManifestChecker.class);
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

private static final String THUMBS_DB_FILE = "[Tt][Hh][Uu][Mm][Bb][Ss]\\.[Dd][Bb]";
private static final String DS_STORE_FILE = "\\.[Dd][Ss]_[Ss][Tt][Oo][Rr][Ee]";
private static final String SPOTLIGHT_FILE = "\\.[Ss][Pp][Oo][Tt][Ll][Ii][Gg][Hh][Tt]-[Vv]100";
private static final String TRASHES_FILE = "\\.(_.)?[Tt][Rr][Aa][Ss][Hh][Ee][Ss]";
private static final String FS_EVENTS_FILE = "\\.[Ff][Ss][Ee][Vv][Ee][Nn][Tt][Ss][Dd]";
private static final String OS_FILES_REGEX = ".*data/(" + THUMBS_DB_FILE + "|" + DS_STORE_FILE + "|" + SPOTLIGHT_FILE + "|" + TRASHES_FILE + "|" + FS_EVENTS_FILE + ")";

private ManifestChecker(){
//intentionally left empty
}

/*
* Check for all the manifest specific potential problems
*/
public static void checkManifests(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagitFileFormatException{
public static void checkManifests(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagitFileFormatException{

boolean missingTagManifest = true;
try(final DirectoryStream<Path> files = Files.newDirectoryStream(bagitDir)){
for(final Path file : files){
final String filename = PathUtils.getFilename(file);
if(filename.contains("manifest-")){
if(filename.startsWith("manifest-")){
checkData(file, encoding, warnings, warningsToIgnore, true);
}
else{
} else{
checkData(file, encoding, warnings, warningsToIgnore, false);
missingTagManifest = false;
}

final String algorithm = filename.split("[-\\.]")[1];
checkAlgorthm(algorithm, warnings, warningsToIgnore);
}
}
}

if(!warningsToIgnore.contains(BagitWarning.MISSING_TAG_MANIFEST) && missingTagManifest){
logger.warn(messages.getString("bag_missing_tag_manifest_warning"), bagitDir);
warnings.add(BagitWarning.MISSING_TAG_MANIFEST);
}
}

/*
* Check for a "bag within a bag" and for relative paths in the manifests
*/
private static void checkData(final Path manifestFile, final Charset encoding, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore, final boolean isPayloadManifest) throws IOException, InvalidBagitFileFormatException{
try(final BufferedReader reader = Files.newBufferedReader(manifestFile, encoding)){
final Set<String> paths = new HashSet<>();

String line = reader.readLine();
while(line != null){
String path = parsePath(line);

path = checkForManifestCreatedWithMD5SumTools(path, warnings, warningsToIgnore);

if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_CASE) && paths.contains(path.toLowerCase())){
logger.warn(messages.getString("different_case_warning"), manifestFile, path);
warnings.add(BagitWarning.DIFFERENT_CASE);
}
paths.add(path.toLowerCase());

if(encoding.name().startsWith("UTF")){
checkNormalization(path, manifestFile.getParent(), warnings, warningsToIgnore);
}

checkForBagWithinBag(line, warnings, warningsToIgnore, isPayloadManifest);

checkForRelativePaths(line, warnings, warningsToIgnore, manifestFile);

checkForOSSpecificFiles(line, warnings, warningsToIgnore, manifestFile);

line = reader.readLine();
}
}
}

static String parsePath(final String line) throws InvalidBagitFileFormatException{
final String[] parts = line.split("\\s+", 2);
if(parts.length < 2){
final String formattedMessage = messages.getString("manifest_line_violated_spec_error");
throw new InvalidBagitFileFormatException(MessageFormatter.format(formattedMessage, line).getMessage());
}

return parts[1];
}

private static String checkForManifestCreatedWithMD5SumTools(final String path, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
String fixedPath = path;
final boolean startsWithStar = path.charAt(0) == '*';

if(startsWithStar){
fixedPath = path.substring(1);
}

if(!warningsToIgnore.contains(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST) && startsWithStar){
logger.warn(messages.getString("md5sum_generated_line_warning"), path);
warnings.add(BagitWarning.MD5SUM_TOOL_GENERATED_MANIFEST);
}

return fixedPath;
}

/*
* Check that the file specified has not changed its normalization (i.e. have the bytes changed but it still looks the same?)
*/
private static void checkNormalization(final String path, final Path rootDir, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore) throws IOException{
if(!warningsToIgnore.contains(BagitWarning.DIFFERENT_NORMALIZATION)){

final Path fileToCheck = rootDir.resolve(path).normalize();
final Path dirToCheck = fileToCheck.getParent();
if(dirToCheck == null){
if(dirToCheck == null){
final String formattedMessage = messages.getString("cannot_access_parent_path_error");
throw new IOException(MessageFormatter.format(formattedMessage, fileToCheck).getMessage()); //to satisfy findbugs
}
final String normalizedFileToCheck = normalizePathToNFD(fileToCheck);

try(final DirectoryStream<Path> files = Files.newDirectoryStream(dirToCheck)){
for(final Path file : files){
final String normalizedFile = normalizePathToNFD(file);

if(!file.equals(fileToCheck) && normalizedFileToCheck.equals(normalizedFile)){
logger.warn(messages.getString("different_normalization_in_manifest_warning"), fileToCheck);
warnings.add(BagitWarning.DIFFERENT_NORMALIZATION);
Expand All @@ -156,14 +156,14 @@ private static void checkNormalization(final String path, final Path rootDir, fi
}
}
}

/*
* Normalize to Canonical decomposition.
*/
static String normalizePathToNFD(final Path path){
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}

/*
* check for a bag within a bag
*/
Expand All @@ -173,7 +173,7 @@ private static void checkForBagWithinBag(final String line, final Set<BagitWarni
warnings.add(BagitWarning.BAG_WITHIN_A_BAG);
}
}

/*
* Check for relative paths (i.e. ./) in the manifest
*/
Expand All @@ -183,7 +183,7 @@ private static void checkForRelativePaths(final String line, final Set<BagitWarn
warnings.add(BagitWarning.LEADING_DOT_SLASH);
}
}

/*
* like .DS_Store or Thumbs.db
*/
Expand All @@ -193,26 +193,24 @@ private static void checkForOSSpecificFiles(final String line, final Set<BagitWa
warnings.add(BagitWarning.OS_SPECIFIC_FILES);
}
}

/*
* Check for anything weaker than SHA-512
*/
static void checkAlgorthm(final String algorithm, final Set<BagitWarning> warnings, final Collection<BagitWarning> warningsToIgnore){
final String upperCaseAlg = algorithm.toUpperCase();
if(!warningsToIgnore.contains(BagitWarning.WEAK_CHECKSUM_ALGORITHM) &&
(upperCaseAlg.startsWith("MD") || upperCaseAlg.matches("SHA(1|224|256|384)?"))){
if(!warningsToIgnore.contains(BagitWarning.WEAK_CHECKSUM_ALGORITHM)
&& (upperCaseAlg.startsWith("MD") || upperCaseAlg.matches("SHA(1|224|256|384)?"))){
logger.warn(messages.getString("weak_algorithm_warning"), algorithm);
warnings.add(BagitWarning.WEAK_CHECKSUM_ALGORITHM);
}

else if(!warningsToIgnore.contains(BagitWarning.NON_STANDARD_ALGORITHM) && !"SHA-512".equals(upperCaseAlg)){
} else if(!warningsToIgnore.contains(BagitWarning.NON_STANDARD_ALGORITHM) && !"SHA-512".equals(upperCaseAlg)){
logger.warn(messages.getString("non_standard_algorithm_warning"), algorithm);
warnings.add(BagitWarning.NON_STANDARD_ALGORITHM);
}
}

//for unit test only
static String getOsFilesRegex() {
static String getOsFilesRegex(){
return OS_FILES_REGEX;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,38 +16,39 @@
import gov.loc.repository.bagit.reader.MetadataReader;

/**
* Part of the BagIt conformance suite.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as I can tell, this file only contains formatting changes, is that correct?

* This checker checks the bag metadata (bag-info.txt) for various problems.
* Part of the BagIt conformance suite. This checker checks the bag metadata
* (bag-info.txt) for various problems.
*/
public final class MetadataChecker {
private static final Logger logger = LoggerFactory.getLogger(MetadataChecker.class);
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
private MetadataChecker(){
//intentionally left empty

private MetadataChecker() {
// intentionally left empty
}
public static void checkBagMetadata(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagMetadataException{

public static void checkBagMetadata(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagMetadataException {
checkForPayloadOxumMetadata(bagitDir, encoding, warnings, warningsToIgnore);
}

/*
* Check that the metadata contains the Payload-Oxum key-value pair
*/
private static void checkForPayloadOxumMetadata(final Path bagitDir, final Charset encoding, final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagMetadataException{
if(!warningsToIgnore.contains(BagitWarning.PAYLOAD_OXUM_MISSING)){
private static void checkForPayloadOxumMetadata(final Path bagitDir, final Charset encoding,
final Set<BagitWarning> warnings,
final Collection<BagitWarning> warningsToIgnore) throws IOException, InvalidBagMetadataException {
if (!warningsToIgnore.contains(BagitWarning.PAYLOAD_OXUM_MISSING)) {
final List<SimpleImmutableEntry<String, String>> metadata = MetadataReader.readBagMetadata(bagitDir, encoding);
boolean containsPayloadOxum = false;
for(final SimpleImmutableEntry<String, String> pair : metadata){
if("Payload-Oxum".equals(pair.getKey())){

for (final SimpleImmutableEntry<String, String> pair : metadata) {
if ("Payload-Oxum".equals(pair.getKey())) {
containsPayloadOxum = true;
}
}
if(!containsPayloadOxum){

if (!containsPayloadOxum) {
logger.warn(messages.getString("missing_payload_oxum_warning"));
warnings.add(BagitWarning.PAYLOAD_OXUM_MISSING);
}
Expand Down
Loading