Skip to content

Commit

Permalink
Don't download history dir by default
Browse files Browse the repository at this point in the history
  • Loading branch information
jjjake committed Mar 15, 2020
1 parent 0f36215 commit 35f07ba
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 2 deletions.
2 changes: 2 additions & 0 deletions HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ Release History
- Added ``ArchiveSession.get_my_catalog()`` method for retrieving running/queued tasks.
- Removed backports.csv requirement for newer Python releases.
- Authorization header is now used for metadata reads, to support privileged access to /metadata.
- ``ia download`` no longer downloads history dir by default.
- Added ``ignore_history_dir`` to ``Item.download()``. The default is False.

**Bugfixes**

Expand Down
8 changes: 6 additions & 2 deletions internetarchive/cli/ia_download.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
-d, --dry-run Print URLs to stdout and exit.
-i, --ignore-existing Clobber files already downloaded.
-C, --checksum Skip files based on checksum [default: False].
-R, --retries=<retries> Set number of retries to <retries> [default: 5]
-R, --retries=<retries> Set number of retries to <retries> [default: 5].
-I, --itemlist=<file> Download items from a specified file. Itemlists should
be a plain text file with one identifier per line.
-S, --search=<query> Download items returned from a specified search query.
Expand All @@ -58,6 +58,7 @@
--no-change-timestamp Don't change the timestamp of downloaded files to reflect
the source material.
-p, --parameters=<key:value>... Parameters to send with your query (e.g. `cnt=0`).
-a, --download-history Do not download any files from the history dir.
"""
from __future__ import print_function, absolute_import
import os
Expand Down Expand Up @@ -98,6 +99,7 @@ def main(argv, session):
'--search-parameters': Use(lambda x: get_args_dict(x, query_string=True)),
'--on-the-fly': Use(bool),
'--no-change-timestamp': Use(bool),
'--download-history': Use(bool),
'--parameters': Use(lambda x: get_args_dict(x, query_string=True)),
})

Expand Down Expand Up @@ -193,6 +195,7 @@ def main(argv, session):
continue

# Otherwise, download the entire item.
ignore_history_dir = True if not args['--download-history'] else False
_errors = item.download(
files=files,
formats=args['--format'],
Expand All @@ -209,7 +212,8 @@ def main(argv, session):
ignore_errors=True,
on_the_fly=args['--on-the-fly'],
no_change_timestamp=args['--no-change-timestamp'],
params=args['--parameters']
params=args['--parameters'],
ignore_history_dir=ignore_history_dir,
)
if _errors:
errors.append(_errors)
Expand Down
9 changes: 9 additions & 0 deletions internetarchive/item.py
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,7 @@ def download(self,
on_the_fly=None,
return_responses=None,
no_change_timestamp=None,
ignore_history_dir=None,
params=None):
"""Download files from an item.
Expand Down Expand Up @@ -495,6 +496,10 @@ def download(self,
:param params: (optional) URL parameters to send with
download request (e.g. `cnt=0`).
:type ignore_history_dir: bool
:param ignore_history_dir: (optional) Do not download any files from the history
dir. This param defaults to ``False``.
:rtype: bool
:returns: True if if all files have been downloaded successfully.
"""
Expand All @@ -507,6 +512,7 @@ def download(self,
no_directory = False if no_directory is None else no_directory
return_responses = False if not return_responses else True
no_change_timestamp = False if not no_change_timestamp else no_change_timestamp
ignore_history_dir = False if ignore_history_dir is None else ignore_history_dir
params = None if not params else params

if not dry_run:
Expand Down Expand Up @@ -559,6 +565,9 @@ def download(self,
responses = list()

for f in files:
if ignore_history_dir is True:
if f.name.startswith('history/'):
continue
if no_directory:
path = f.name
else:
Expand Down

0 comments on commit 35f07ba

Please sign in to comment.