Skip to content

Commit

Permalink
refs #93 - added method for inputting your own metadata when bagging …
Browse files Browse the repository at this point in the history
…in place, and defaults to include date bagged and payload-oxum
  • Loading branch information
johnscancella committed Jun 29, 2017
1 parent b23b6d5 commit f2fe534
Show file tree
Hide file tree
Showing 4 changed files with 87 additions and 30 deletions.
75 changes: 65 additions & 10 deletions src/main/java/gov/loc/repository/bagit/creator/BagCreator.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
import java.nio.file.Path;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.text.SimpleDateFormat;
import java.util.Collection;
import java.util.Date;
import java.util.Locale;
import java.util.Map;
import java.util.ResourceBundle;

Expand All @@ -16,40 +19,78 @@
import gov.loc.repository.bagit.annotation.Incubating;
import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.domain.Metadata;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.hash.Hasher;
import gov.loc.repository.bagit.hash.SupportedAlgorithm;
import gov.loc.repository.bagit.util.PathUtils;
import gov.loc.repository.bagit.writer.BagitFileWriter;
import gov.loc.repository.bagit.writer.ManifestWriter;
import gov.loc.repository.bagit.writer.MetadataWriter;

/**
* Responsible for creating a bag in place.
*/
//TODO look at cleaning up this class so we don't have to ignore CPD
public final class BagCreator {
private static final Logger logger = LoggerFactory.getLogger(BagCreator.class);
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");
private static final String DATE_FORMAT = "yyyy-MM-dd";

private BagCreator(){}

@SuppressWarnings("CPD-START")
/**
* Creates a basic(only required elements) bag in place for version 0.97.
* Creates a bag in place for version 0.97.
* This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
* in an unknown state of transition. Thus this is <b>not thread safe</b>
*
* @param root the directory that will become the base of the bag and where to start searching for content
* @param algorithms an collection of {@link SupportedAlgorithm} implementations
* @param includeHidden to include hidden files when generating the bagit files, like the manifests
* @param metadata the metadata to include when creating the bag. Payload-Oxum and Bagging-Date will be overwritten
*
* @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
* @throws IOException if there is a problem writing or moving file(s)
*
* @return a {@link Bag} object representing the newly created bagit bag
*/
public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
@SuppressWarnings("CPD-START")
public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden, final Metadata metadata) throws NoSuchAlgorithmException, IOException{
final Bag bag = new Bag(new Version(0, 97));
bag.setRootDir(root);
logger.info(messages.getString("creating_bag"), bag.getVersion(), root);

final Path dataDir = root.resolve("data");
moveFilesToDataDirectory(root, dataDir, includeHidden);

BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), root);

createManifests(root, dataDir, bag, algorithms, includeHidden);

createMetadataFile(root, dataDir, bag, metadata);

return bag;
}

/**
* Creates a bag in place for version 0.97.
* This method moves and creates files, thus if an error is thrown during operation it may leave the filesystem
* in an unknown state of transition. Thus this is <b>not thread safe</b>
*
* @param root the directory that will become the base of the bag and where to start searching for content
* @param algorithms an collection of {@link SupportedAlgorithm} implementations
* @param includeHidden to include hidden files when generating the bagit files, like the manifests
*
* @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
* @throws IOException if there is a problem writing or moving file(s)
*
* @return a {@link Bag} object representing the newly created bagit bag
*/
public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
return bagInPlace(root, algorithms, includeHidden, new Metadata());
}

private static void moveFilesToDataDirectory(final Path root, final Path dataDir, final boolean includeHidden) throws IOException{
Files.createDirectory(dataDir);
try(final DirectoryStream<Path> directoryStream = Files.newDirectoryStream(root)){
for(final Path path : directoryStream){
Expand All @@ -58,14 +99,15 @@ public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorith
}
}
}

}

private static void createManifests(final Path root, final Path dataDir, final Bag bag, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws IOException, NoSuchAlgorithmException{
logger.info(messages.getString("creating_payload_manifests"));
final Map<Manifest, MessageDigest> payloadFilesMap = Hasher.createManifestToMessageDigestMap(algorithms);
final CreatePayloadManifestsVistor payloadVisitor = new CreatePayloadManifestsVistor(payloadFilesMap, includeHidden);
Files.walkFileTree(dataDir, payloadVisitor);

bag.getPayLoadManifests().addAll(payloadFilesMap.keySet());
BagitFileWriter.writeBagitFile(bag.getVersion(), bag.getFileEncoding(), root);
ManifestWriter.writePayloadManifests(bag.getPayLoadManifests(), root, root, bag.getFileEncoding());

logger.info(messages.getString("creating_tag_manifests"));
Expand All @@ -75,11 +117,22 @@ public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorith

bag.getTagManifests().addAll(tagFilesMap.keySet());
ManifestWriter.writeTagManifests(bag.getTagManifests(), root, root, bag.getFileEncoding());
}

private static void createMetadataFile(final Path root, final Path dataDir, final Bag bag, final Metadata metadata) throws IOException{
bag.setMetadata(metadata);

return bag;
logger.debug(messages.getString("calculating_payload_oxum"), dataDir);
final String payloadOxum = PathUtils.generatePayloadOxum(PathUtils.getDataDir(bag.getVersion(), root));
bag.getMetadata().upsertPayloadOxum(payloadOxum);

bag.getMetadata().remove("Bagging-Date");
bag.getMetadata().add("Bagging-Date", new SimpleDateFormat(DATE_FORMAT, Locale.ENGLISH).format(new Date()));

logger.info(messages.getString("creating_metadata_file"));
MetadataWriter.writeBagMetadata(bag.getMetadata(), bag.getVersion(), root, bag.getFileEncoding());
}

@SuppressWarnings("CPD-END")
/**
* Creates a basic(only required elements) .bagit bag in place.
* This creates files and directories, thus if an error is thrown during operation it may leave the filesystem
Expand All @@ -88,11 +141,14 @@ public static Bag bagInPlace(final Path root, final Collection<SupportedAlgorith
* @param root the directory that will become the base of the bag and where to start searching for content
* @param algorithms an collection of {@link SupportedAlgorithm} implementations
* @param includeHidden to include hidden files when generating the bagit files, like the manifests
* @return a {@link Bag} object representing the newly created bagit bag
*
* @throws NoSuchAlgorithmException if {@link MessageDigest} can't find the algorithm
* @throws IOException if there is a problem writing files or .bagit directory
*
* @return a {@link Bag} object representing the newly created bagit bag
*/
@Incubating
@SuppressWarnings("CPD-END")
public static Bag createDotBagit(final Path root, final Collection<SupportedAlgorithm> algorithms, final boolean includeHidden) throws NoSuchAlgorithmException, IOException{
final Bag bag = new Bag(new Version(2, 0));
bag.setRootDir(root);
Expand Down Expand Up @@ -120,5 +176,4 @@ public static Bag createDotBagit(final Path root, final Collection<SupportedAlgo

return bag;
}

}
}
18 changes: 18 additions & 0 deletions src/main/java/gov/loc/repository/bagit/util/PathUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import gov.loc.repository.bagit.domain.Bag;
import gov.loc.repository.bagit.domain.Version;
import gov.loc.repository.bagit.verify.FileCountAndTotalSizeVistor;

/**
* Convenience class for dealing with various path issues
Expand Down Expand Up @@ -123,4 +124,21 @@ public static Path getBagitDir(final Version version, final Path bagRoot){

return bagRoot;
}

/**
* Calculate the total file and byte count of the files in the payload directory
*
* @param dataDir the directory to calculate the payload-oxum
*
* @return the string representation of the payload-oxum value
*
* @throws IOException if there is an error reading any of the files
*/
public static String generatePayloadOxum(final Path dataDir) throws IOException{
final FileCountAndTotalSizeVistor visitor = new FileCountAndTotalSizeVistor();

Files.walkFileTree(dataDir, visitor);

return visitor.getTotalSize() + "." + visitor.getCount();
}
}
22 changes: 2 additions & 20 deletions src/main/java/gov/loc/repository/bagit/writer/BagWriter.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.HashSet;
import java.util.ResourceBundle;
import java.util.Map.Entry;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
Expand All @@ -17,7 +17,6 @@
import gov.loc.repository.bagit.domain.Manifest;
import gov.loc.repository.bagit.hash.Hasher;
import gov.loc.repository.bagit.util.PathUtils;
import gov.loc.repository.bagit.verify.FileCountAndTotalSizeVistor;

/**
* responsible for writing out a {@link Bag}
Expand Down Expand Up @@ -47,7 +46,7 @@ public static void write(final Bag bag, final Path outputDir) throws IOException
final Path bagitDir = PayloadWriter.writeVersionDependentPayloadFiles(bag, outputDir);

logger.debug(messages.getString("upsert_payload_oxum"));
final String payloadOxum = generatePayloadOxum(PathUtils.getDataDir(bag.getVersion(), outputDir));
final String payloadOxum = PathUtils.generatePayloadOxum(PathUtils.getDataDir(bag.getVersion(), outputDir));
bag.getMetadata().upsertPayloadOxum(payloadOxum);

logger.debug(messages.getString("writing_bagit_file"));
Expand All @@ -73,23 +72,6 @@ public static void write(final Bag bag, final Path outputDir) throws IOException
}
}

/**
* Calculate the total file and byte count of the files in the payload directory
*
* @param dataDir the directory to calculate the payload-oxum
*
* @return the string representation of the payload-oxum value
*
* @throws IOException if there is an error reading any of the files
*/
private static String generatePayloadOxum(final Path dataDir) throws IOException{
final FileCountAndTotalSizeVistor visitor = new FileCountAndTotalSizeVistor();

Files.walkFileTree(dataDir, visitor);

return visitor.getTotalSize() + "." + visitor.getCount();
}

/*
* Update the tag manifest cause the checksum of the other tag files will have changed since we just wrote them out to disk
*/
Expand Down
2 changes: 2 additions & 0 deletions src/main/resources/MessageBundle.properties
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,8 @@ skipping_ignored_directory=Skipping [{}] since we are ignoring hidden directorie
creating_bag=Creating a bag with version: [{}] in directory: [{}].
creating_payload_manifests=Creating payload manifest(s).
creating_tag_manifests=Creating tag manifest(s).
calculating_payload_oxum=Calculating the payload oxum of the data directory [{}].
creating_metadata_file=Creating the bag metadata file (bag-info.txt or package-info.txt).

#for Hasher.java
adding_checksum=Adding [{}] to manifest with hash [{}].
Expand Down

0 comments on commit f2fe534

Please sign in to comment.