diff --git a/AUTHORS.rst b/AUTHORS.rst index 976cc91b..592ecae5 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -19,3 +19,4 @@ Patches and Suggestions ----------------------- - VM Brasseur +- Russ Magee diff --git a/internetarchive/cli/ia_download.py b/internetarchive/cli/ia_download.py index 335bb89b..f749811f 100644 --- a/internetarchive/cli/ia_download.py +++ b/internetarchive/cli/ia_download.py @@ -33,6 +33,7 @@ -R, --retries= Set number of retries to [default: 5]. -I, --itemlist= Download items from a specified file. Itemlists should be a plain text file with one identifier per line. + -n, --start-idx= Start immediately at item -S, --search= Download items returned from a specified search query. -P, --search-parameters=... Download items returned from a specified search query. -g, --glob= Only download files whose filename matches the @@ -110,6 +111,7 @@ def main(argv, session: ArchiveSession) -> None: '--download-history': Use(bool), '--parameters': Use(lambda x: get_args_dict(x, query_string=True)), '--source': list, + '--start-idx': Use(lambda x: x[0]), '--exclude-source': list, '--timeout': Or([], And(Use(lambda t: ast.literal_eval(t[0])), Or(int, float), error=timeout_msg)) @@ -128,6 +130,8 @@ def main(argv, session: ArchiveSession) -> None: print(f'{exc}\n{printable_usage(__doc__)}', file=sys.stderr) sys.exit(1) + start_idx = int(args['--start-idx'])-1 + retries = int(args['--retries']) ids: list[File | str] | Search | TextIO @@ -176,44 +180,48 @@ def main(argv, session: ArchiveSession) -> None: else: item_index = None - try: - item = session.get_item(identifier) - except Exception as exc: - print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) - raise - if 'You are attempting to make an HTTPS' in str(exc): - print(f'\n{exc}', file=sys.stderr) - sys.exit(1) - else: - continue - - # Otherwise, download the entire item. - ignore_history_dir = True if not args['--download-history'] else False - _errors = item.download( - files=files, - formats=args['--format'], - glob_pattern=args['--glob'], - exclude_pattern=args['--exclude'], - dry_run=args['--dry-run'], - verbose=not args['--quiet'], - ignore_existing=args['--ignore-existing'], - checksum=args['--checksum'], - destdir=args['--destdir'], - no_directory=args['--no-directories'], - retries=retries, - item_index=item_index, - ignore_errors=True, - on_the_fly=args['--on-the-fly'], - no_change_timestamp=args['--no-change-timestamp'], - params=args['--parameters'], - ignore_history_dir=ignore_history_dir, - source=args['--source'], - exclude_source=args['--exclude-source'], - stdout=args['--stdout'], - timeout=args['--timeout'], - ) - if _errors: - errors.append(_errors) + if start_idx != None and i < start_idx: + pass + else: + try: + item = session.get_item(identifier) + except Exception as exc: + print(f'{identifier}: failed to retrieve item metadata - errors', file=sys.stderr) + raise + if 'You are attempting to make an HTTPS' in str(exc): + print(f'\n{exc}', file=sys.stderr) + sys.exit(1) + else: + continue + + # Otherwise, download the entire item. + ignore_history_dir = True if not args['--download-history'] else False + _errors = item.download( + files=files, + formats=args['--format'], + glob_pattern=args['--glob'], + exclude_pattern=args['--exclude'], + dry_run=args['--dry-run'], + verbose=not args['--quiet'], + ignore_existing=args['--ignore-existing'], + checksum=args['--checksum'], + destdir=args['--destdir'], + no_directory=args['--no-directories'], + retries=retries, + item_index=item_index, + ignore_errors=True, + on_the_fly=args['--on-the-fly'], + no_change_timestamp=args['--no-change-timestamp'], + params=args['--parameters'], + ignore_history_dir=ignore_history_dir, + source=args['--source'], + exclude_source=args['--exclude-source'], + stdout=args['--stdout'], + timeout=args['--timeout'], + ) + if _errors: + errors.append(_errors) + ##endif (start_idx) if errors: # TODO: add option for a summary/report. sys.exit(1)