diff --git a/eprints2bags/__main__.py b/eprints2bags/__main__.py index e784477..ec449a7 100755 --- a/eprints2bags/__main__.py +++ b/eprints2bags/__main__.py @@ -29,6 +29,7 @@ import bagit from collections import defaultdict +from commonpy.data_utils import flattened, parsed_datetime, pluralized import getpass from humanize import intcomma import keyring @@ -51,7 +52,6 @@ import eprints2bags from eprints2bags import print_version from .constants import ON_WINDOWS, KEYRING_PREFIX -from .data_helpers import flatten, expand_range, parse_datetime, plural from .eprints import * from .exit_codes import ExitCode from .files import create_archive, verify_archive, archive_extension @@ -387,7 +387,7 @@ def main(api_url = 'A', bag_action = 'B', processes = 'C', diff_with = 'D', lastmod = None else: try: - lastmod = parse_datetime(lastmod) + lastmod = parsed_datetime(lastmod) lastmod_str = lastmod.strftime(_LASTMOD_PRINT_FORMAT) if __debug__: log(f'parsed lastmod as {lastmod_str}') except Exception as ex: @@ -458,7 +458,7 @@ def main(api_url = 'A', bag_action = 'B', processes = 'C', diff_with = 'D', inform(f'Fetching full records list from {api_url}') wanted = eprints_records_list(raw_list) - inform(f'Will process {intcomma(len(wanted))} EPrints {plural("record", wanted)}.') + inform(f'Will process {pluralized("EPrints record", wanted, True)}.') if lastmod: inform(f'Will only keep records modified after {lastmod_str}.') if status: @@ -508,7 +508,7 @@ def main(api_url = 'A', bag_action = 'B', processes = 'C', diff_with = 'D', inform('─'*os.get_terminal_size(0)[0]) count = len(wanted) - len(missing) - len(skipped) - inform(f'Wrote {intcomma(count)} EPrints {plural("record", count)} to {output_dir}') + inform(f'Wrote {pluralized("EPrints record", count, True)} to {output_dir}') if len(skipped) > 0: inform('The following records were skipped: '+ ', '.join(skipped) + '.') if len(missing) > 0: @@ -561,7 +561,7 @@ def parsed_id_list(id_list): if ',' not in id_list and '-' not in id_list: alert_fatal('Unable to understand list of record identifiers') exit(int(ExitCode.bad_arg)) - return flatten(expand_range(x) for x in id_list.split(',')) + return flattened(expand_range(x) for x in id_list.split(',')) def credentials(api_url, user, pswd, use_keyring, reset = False): diff --git a/eprints2bags/data_helpers.py b/eprints2bags/data_helpers.py deleted file mode 100644 index 455c141..0000000 --- a/eprints2bags/data_helpers.py +++ /dev/null @@ -1,66 +0,0 @@ -''' -data_helpers: data manipulation utilities -''' - -import dateparser -import datetime - -# Based on http://stackoverflow.com/a/10824484/743730 -def flatten(iterable): - '''Flatten a list produced by an iterable. Non-recursive.''' - iterator, sentinel, stack = iter(iterable), object(), [] - while True: - value = next(iterator, sentinel) - if value is sentinel: - if not stack: - break - iterator = stack.pop() - elif isinstance(value, str): - yield value - else: - try: - new_iterator = iter(value) - except TypeError: - yield value - else: - stack.append(iterator) - iterator = new_iterator - - -def ordinal(n): - '''Print a number followed by "st" or "nd" or "rd", as appropriate.''' - # Spectacular algorithm by user "Gareth" at this posting: - # http://codegolf.stackexchange.com/a/4712 - return '{}{}'.format(n, 'tsnrhtdd'[(n/10%10!=1)*(n%10<4)*n%10::4]) - - -def expand_range(text): - '''Return individual numbers for a range expressed as X-Y.''' - # This makes the range 1-100 be 1, 2, ..., 100 instead of 1, 2, ..., 99 - if '-' in text: - range_list = text.split('-') - range_list.sort(key = int) - return [*map(str, range(int(range_list[0]), int(range_list[1]) + 1))] - else: - return text - - -def parse_datetime(string): - '''Parse a human-written time/date string using dateparser's parse() -function with predefined settings.''' - return dateparser.parse(string, settings = {'RETURN_AS_TIMEZONE_AWARE': True}) - - -def plural(word, count): - '''Simple pluralization; adds "s" to the end of "word" if count > 1.''' - if word.endswith('y'): - pluralized = word[:-1] + 'ies' - else: - pluralized = word + 's' - if isinstance(count, int): - return pluralized if count > 1 else word - elif isinstance(count, (list, set, dict)) or type(count) is {}.values().__class__: - return pluralized if len(count) > 1 else word - else: - # If we don't recognize the kind of thing it is, return it unchanged. - return word diff --git a/eprints2bags/eprints.py b/eprints2bags/eprints.py index d851ca3..cd17053 100644 --- a/eprints2bags/eprints.py +++ b/eprints2bags/eprints.py @@ -16,6 +16,7 @@ import codecs from collections import defaultdict +from commonpy.data_utils import parsed_datetime from lxml import etree import os from os import path @@ -23,7 +24,6 @@ from sidetrack import log import eprints2bags -from .data_helpers import parse_datetime from .exceptions import * from .network import net from .ui import inform, warn, alert, alert_fatal @@ -121,7 +121,7 @@ def eprints_xml(number, base_url, user, password, missing_ok): def eprints_lastmod(xml): lastmod_elem = xml.find('.//{' + _EPRINTS_XMLNS + '}lastmod') - return parse_datetime(lastmod_elem.text) + return parsed_datetime(lastmod_elem.text) def eprints_status(xml):