-
Notifications
You must be signed in to change notification settings - Fork 84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add support for --wrap option #129
base: master
Are you sure you want to change the base?
Changes from 3 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14,6 +14,7 @@ | |
import signal | ||
import sys | ||
import tempfile | ||
import textwrap | ||
import unicodedata | ||
import warnings | ||
from collections import defaultdict | ||
|
@@ -137,7 +138,8 @@ def find_locale_dir(): | |
|
||
|
||
def make_bag( | ||
bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, encoding="utf-8" | ||
bag_dir, bag_info=None, processes=1, checksums=None, checksum=None, | ||
encoding="utf-8", line_length=0 | ||
): | ||
""" | ||
Convert a given directory into a bag. You can pass in arbitrary | ||
|
@@ -256,7 +258,7 @@ def make_bag( | |
) | ||
|
||
bag_info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files) | ||
_make_tag_file("bag-info.txt", bag_info) | ||
_make_tag_file("bag-info.txt", bag_info, line_length) | ||
|
||
for c in checksums: | ||
_make_tagmanifest_file(c, bag_dir, encoding="utf-8") | ||
|
@@ -450,7 +452,7 @@ def payload_entries(self): | |
if key.startswith("data" + os.sep) | ||
) | ||
|
||
def save(self, processes=1, manifests=False): | ||
def save(self, processes=1, manifests=False, line_length=0): | ||
""" | ||
save will persist any changes that have been made to the bag | ||
metadata (self.info). | ||
|
@@ -463,6 +465,11 @@ def save(self, processes=1, manifests=False): | |
|
||
If you want to control the number of processes that are used when | ||
recalculating checksums use the processes parameter. | ||
|
||
If you want long tag values to be wrapped by breaking long strings at | ||
whitespace characters, set line_length to a value greater than 0. An | ||
integer value greater than 0 causes line-wrapping to be performed on | ||
a best-effort basis to limit line lengths to the given value. | ||
""" | ||
# Error checking | ||
if not self.path: | ||
|
@@ -514,7 +521,7 @@ def save(self, processes=1, manifests=False): | |
LOGGER.info(_("Updating Payload-Oxum in %s"), self.tag_file_name) | ||
self.info["Payload-Oxum"] = "%s.%s" % (total_bytes, total_files) | ||
|
||
_make_tag_file(self.tag_file_name, self.info) | ||
_make_tag_file(self.tag_file_name, self.info, line_length) | ||
|
||
# Update tag-manifest for changes to manifest & bag-info files | ||
for alg in self.algorithms: | ||
|
@@ -1219,16 +1226,37 @@ def _parse_tags(tag_file): | |
yield (tag_name, tag_value.strip()) | ||
|
||
|
||
def _make_tag_file(bag_info_path, bag_info): | ||
def _make_tag_file(bag_info_path, bag_info, line_length): | ||
headers = sorted(bag_info.keys()) | ||
with open_text_file(bag_info_path, "w") as f: | ||
for h in headers: | ||
values = bag_info[h] | ||
if not isinstance(values, list): | ||
values = [values] | ||
for txt in values: | ||
# strip CR, LF and CRLF so they don't mess up the tag file | ||
txt = re.sub(r"\n|\r|(\r\n)", "", force_unicode(txt)) | ||
txt = force_unicode(txt) | ||
if line_length > 1: | ||
# Account for colon & space written after the property name. | ||
prop_width = len(h) + 2 | ||
first_break = prop_width + len(txt.split(None, 1)[0]) | ||
if line_length <= first_break: | ||
# Start value on the next line. | ||
txt = '\n'.join(textwrap.wrap(txt, width=line_length, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. These two calls are almost identical but it takes time to confirm that. Do you think it would be worth assigning There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regarding the duplication: although Regarding splitting it out to a separate function: agreed. However, I might recommend naming the function differently, if you don't object, because in this case it wouldn't necessarily wrap the tag value. So perhaps something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I went ahead and implemented the changes discussed above in the latest commit. |
||
break_long_words=False, | ||
break_on_hyphens=False, | ||
initial_indent='\n ', | ||
subsequent_indent=' ')) | ||
else: | ||
# Account for tag name by temporarily adding a leading | ||
# space before calling wrap(), then removing the space. | ||
txt = '\n'.join(textwrap.wrap(txt, width=line_length, | ||
break_long_words=False, | ||
break_on_hyphens=False, | ||
initial_indent=' '*prop_width, | ||
subsequent_indent=' ')) | ||
txt = txt[prop_width:] | ||
else: | ||
txt = re.sub(r"\n|\r|(\r\n)", "", txt) | ||
f.write("%s: %s\n" % (h, txt)) | ||
|
||
|
||
|
@@ -1499,6 +1527,17 @@ def _make_parser(): | |
" without performing checksum validation to detect corruption." | ||
), | ||
) | ||
parser.add_argument( | ||
"--wrap", | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I like the idea but I was wondering whether “wrap” might be a little too generic a name for the option. What do you think about something like There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, I struggled with naming the option! The current result was the outcome of trying to satisfy two goals: (1) emulating the existing
I had hoped this was clear enough, but I have to agree that by itself, the current combination leaves ambiguous what is being wrapped. I'm fine with changing it. I lean towards There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. +1 on There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Just mentioning it in case it wasn't obvious: all these discussed changes are in the update to the PR made a few days ago.) |
||
type=int, | ||
dest="line_length", | ||
default=0, | ||
help=_( | ||
"Limit line lengths in the tag file (bag-info.txt) by" | ||
" wrapping long values and indenting subsequent lines with a" | ||
" space character. (Default: don't.)" | ||
), | ||
) | ||
|
||
checksum_args = parser.add_argument_group( | ||
_("Checksum Algorithms"), | ||
|
@@ -1596,6 +1635,7 @@ def main(): | |
bag_info=args.bag_info, | ||
processes=args.processes, | ||
checksums=args.checksums, | ||
line_length=args.line_length, | ||
) | ||
except Exception as exc: | ||
LOGGER.error( | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm wondering whether
line_length
should be something likemax_tag_line_length
so it's a little more obvious what it applies toThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I agree
line_length
is a bit too general. In thinking what would make sense with an option named--wrap-tags
(c.f. conversation above), I worrymax_tag_line_length
might be too much:What about
tag_wrap_column
? That leads to: