diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..be006de9
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,13 @@
+# Keep GitHub Actions up to date with GitHub's Dependabot...
+# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot
+# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem
+version: 2
+updates:
+ - package-ecosystem: github-actions
+ directory: /
+ groups:
+ github-actions:
+ patterns:
+ - "*" # Group all Actions updates into a single larger pull request
+ schedule:
+ interval: weekly
diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml
index 3b8173e0..10877bce 100644
--- a/.github/workflows/lint_python.yml
+++ b/.github/workflows/lint_python.yml
@@ -4,8 +4,8 @@ jobs:
lint_python:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
with:
cache: pip
python-version: 3.x
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
index 7c40d73c..7669221d 100644
--- a/.github/workflows/pre-commit.yml
+++ b/.github/workflows/pre-commit.yml
@@ -9,8 +9,8 @@ jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
with:
python-version: 3.x
cache: pip
diff --git a/.github/workflows/test_install.yml b/.github/workflows/test_install.yml
index 866d4782..eb365cef 100644
--- a/.github/workflows/test_install.yml
+++ b/.github/workflows/test_install.yml
@@ -11,6 +11,6 @@ jobs:
matrix:
setuptools-version: ["45.2.0", "58.1.0", "62.4.0"]
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
- run: pip install setuptools=="${{ matrix.setuptools-version }}"
- run: pip install .
diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml
index 8cf26cad..4dfd9072 100644
--- a/.github/workflows/tox.yml
+++ b/.github/workflows/tox.yml
@@ -7,10 +7,10 @@ jobs:
fail-fast: false
max-parallel: 1 # Avoid timeout errors
matrix:
- python: ['3.7', '3.8', '3.9', '3.10', '3.11', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9']
+ python: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.9', 'pypy-3.10']
steps:
- - uses: actions/checkout@v3
- - uses: actions/setup-python@v4
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python }}
cache: pip
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b1f255f4..cfe8f628 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
- id: trailing-whitespace
- repo: https://github.com/charliermarsh/ruff-pre-commit
- rev: v0.0.261
+ rev: v0.0.269
hooks:
- id: ruff
@@ -42,7 +42,7 @@ repos:
- id: codespell # See setup.cfg for args
- repo: https://github.com/pre-commit/mirrors-mypy
- rev: v1.1.1
+ rev: v1.3.0
hooks:
- id: mypy
additional_dependencies:
diff --git a/HISTORY.rst b/HISTORY.rst
index 4b92cb97..84a7eed0 100644
--- a/HISTORY.rst
+++ b/HISTORY.rst
@@ -3,13 +3,31 @@
Release History
---------------
-3.6.0 (?)
-+++++++++
+3.7.0 (2024-03-19)
+++++++++++++++++++
+
+**Features and Improvements**
+
+- Added support for JSON Patch test operations, via the ``expect`` parameter.
+- Added support for moving values via --append-list
+ (Now, rather than ignoring any requests where the value is already present,
+ --append-list will move the value to the end of the list).
+- Switched to importlib-metadata to drop deprecated pkg_resources.
+
+**Bugfixes**
+
+- Fixed automatic size hint on uploads.
+- Fixed bug where auth wasn't being sent for searches with user_aggs params.
+
+3.6.0 (2023-12-27)
+++++++++++++++++++
**Features and Improvements**
- Added ``set_scanner`` and ``--no-scanner`` options to upload to stop ia's default behavior
of setting the scanner field in meta.xml on initial upload.
+- ``0`` is now returned instead of an exception when search fails to retrieve the total number
+ of hits for a query.
3.5.0 (2023-05-09)
++++++++++++++++++
diff --git a/README.rst b/README.rst
index 55919367..b6bbdcdb 100644
--- a/README.rst
+++ b/README.rst
@@ -30,7 +30,7 @@ You can install this module via pip:
.. code:: bash
- $ pip install internetarchive
+ $ python3 -m pip install internetarchive
Binaries of the command-line tool are also available:
diff --git a/docs/source/installation.rst b/docs/source/installation.rst
index cadbe425..320cff93 100644
--- a/docs/source/installation.rst
+++ b/docs/source/installation.rst
@@ -5,37 +5,46 @@ Installation
System-Wide Installation
-------------------------
+-------------------------
Installing the ``internetarchive`` library globally on your system can be done with `pip `_.
-This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip)::
+This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip).
+If you are on Mac OS X, refer to the `Mac OS X section `_ below before proceeding.
+Once you're ready to install, run the following command::
- $ sudo pip install internetarchive
+ $ sudo python3 -m pip install internetarchive
-or, with `easy_install `_::
+Updating Your $PATH
+~~~~~~~~~~~~~~~~~~~
- $ sudo easy_install internetarchive
+Once you have successfully installed ``internetarchive``, you may need to update your ``$PATH`` (e.g. if running ``ia`` in your terminal returns an error).
+If you receive a command not found error, run the following command to update your ``$PATH``::
-Either of these commands will install the ``internetarchive`` Python library and ``ia`` command-line tool on your system.
+ $ echo "$(python3 -m site --user-base)/bin" | sudo tee -a /etc/paths
-**Note**: Some versions of Mac OS X come with Python libraries that are required by ``internetarchive`` (e.g. the Python package ``six``).
-This can cause installation issues. If your installation is failing with a message that looks something like::
+Updating ia
+~~~~~~~~~~~
- OSError: [Errno 1] Operation not permitted: '/var/folders/bk/3wx7qs8d0x79tqbmcdmsk1040000gp/T/pip-TGyjVo-uninstall/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/six-1.4.1-py2.7.egg-info'
+To update, you can run the following command::
-You can use the ``--ignore-installed`` parameter in ``pip`` to ignore the libraries that are already installed, and continue with the rest of the installation::
+ $ sudo python3 -m pip install --upgrade internetarchive
- $ sudo pip install --ignore-installed internetarchive
+Mac OS X
+~~~~~~~~
-More details on this issue can be found here: https://github.com/pypa/pip/issues/3165
+While newer versions Mac OS X ship with Python 3 installed, it is recommended to install an updated version of Python 3.
+You can do so with `Homebrew `_::
+
+ $ brew install python3
Installing Pip
~~~~~~~~~~~~~~
-Pip can be `installed with the get-pip.py script `_::
+If you are running Python 3.4+, you should already have ``pip`` installed.
+If it is not already installed, it can be `installed with the get-pip.py script `_::
$ curl -LOs https://bootstrap.pypa.io/get-pip.py
- $ python get-pip.py
+ $ python3 get-pip.py
virtualenv
@@ -45,11 +54,7 @@ If you don't want to, or can't, install the package system-wide you can use ``vi
First, make sure ``virtualenv`` is installed on your system. If it's not, you can do so with pip::
- $ sudo pip install virtualenv
-
-With ``easy_install``::
-
- $ sudo easy_install virtualenv
+ $ sudo python3 -m pip install virtualenv
Or your systems package manager, ``apt-get`` for example::
@@ -92,7 +97,7 @@ If you are on an older operating system that only has Python 2 installed, it's h
You can install and use version v2.3.0 with pip::
- $ pip install internetarchive==2.3.0
+ $ sudo python2 -m pip install internetarchive==2.3.0
You can also download a binary of v2.3.0::
@@ -100,15 +105,6 @@ You can also download a binary of v2.3.0::
$ chmod +x ia-py2
-Snap
-----
-
-You can install the latest ``ia`` `snap `_, and help testing the most recent changes of the master branch in `all the supported Linux distros `_ with::
-
- $ sudo snap install ia --edge
-
-Every time a new version of ``ia`` is pushed to the store, you will get it updated automatically.
-
Get the Code
------------
diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py
index 0736c156..8c3336cc 100644
--- a/internetarchive/__version__.py
+++ b/internetarchive/__version__.py
@@ -1 +1 @@
-__version__ = '3.6.0.dev2'
+__version__ = '3.7.0'
diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py
index 8e044c36..e00e5b6b 100755
--- a/internetarchive/cli/ia.py
+++ b/internetarchive/cli/ia.py
@@ -64,7 +64,11 @@
import sys
from docopt import docopt, printable_usage
-from pkg_resources import DistributionNotFound, iter_entry_points
+
+if sys.version_info < (3, 10):
+ from importlib_metadata import entry_points # type: ignore[import]
+else:
+ from importlib.metadata import entry_points
from schema import Or, Schema, SchemaError # type: ignore[import]
from internetarchive import __version__
@@ -97,11 +101,11 @@ def load_ia_module(cmd: str):
return __import__(_module, fromlist=['internetarchive.cli'])
else:
_module = f'ia_{cmd}'
- for ep in iter_entry_points('internetarchive.cli.plugins'):
+ for ep in entry_points(group='internetarchive.cli.plugins'):
if ep.name == _module:
return ep.load()
raise ImportError
- except (ImportError, DistributionNotFound):
+ except (ImportError):
print(f"error: '{cmd}' is not an ia command! See 'ia help'",
file=sys.stderr)
matches = '\t'.join(difflib.get_close_matches(cmd, cmd_aliases.values()))
diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py
index c8a8d9aa..b4833695 100644
--- a/internetarchive/cli/ia_metadata.py
+++ b/internetarchive/cli/ia_metadata.py
@@ -22,17 +22,20 @@
ia metadata ... [--exists | --formats] [--header=...]
ia metadata ... --modify=... [--target=]
[--priority=] [--header=...]
- [--timeout=]
+ [--timeout=] [--expect=...]
ia metadata ... --remove=... [--priority=]
[--header=...] [--timeout=]
+ [--expect=...]
ia metadata ... [--append=... | --append-list=...]
[--priority=] [--target=]
[--header=...] [--timeout=]
+ [--expect=...]
ia metadata ... --insert=... [--priority=]
[--target=] [--header=...]
- [--timeout=]
+ [--timeout=] [--expect=...]
ia metadata --spreadsheet= [--priority=]
[--modify=...] [--header=...] [--timeout=]
+ [--expect=...]
ia metadata --help
options:
@@ -42,8 +45,10 @@
-t, --target= The metadata target to modify.
-a, --append=... Append a string to a metadata element.
-A, --append-list=... Append a field to a metadata element.
- -i, --insert=... Insert a value into a multi-value field given
+ -i, --insert=... Insert a value into a multi-value field given
an index (e.g. `--insert=collection[0]:foo`).
+ -E, --expect=... Test an expectation server-side before applying
+ patch to item metadata.
-s, --spreadsheet= Modify metadata in bulk using a spreadsheet as
input.
-e, --exists Check if an item exists
@@ -79,13 +84,14 @@
def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Response:
append = bool(args['--append'])
+ expect = get_args_dict(args['--expect'])
append_list = bool(args['--append-list'])
insert = bool(args['--insert'])
try:
r = item.modify_metadata(metadata, target=args['--target'], append=append,
- priority=args['--priority'], append_list=append_list,
- headers=args['--header'], insert=insert,
- timeout=args['--timeout'])
+ expect=expect, priority=args['--priority'],
+ append_list=append_list, headers=args['--header'],
+ insert=insert, timeout=args['--timeout'])
assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response
except ItemLocateError as exc:
print(f'{item.identifier} - error: {exc}', file=sys.stderr)
@@ -178,6 +184,7 @@ def main(argv: dict, session: session.ArchiveSession) -> None:
str: bool,
'': list,
'--modify': list,
+ '--expect': list,
'--header': Or(None, And(Use(get_args_header_dict), dict),
error='--header must be formatted as --header="key:value"'),
'--append': list,
diff --git a/internetarchive/cli/ia_reviews.py b/internetarchive/cli/ia_reviews.py
index d06de76d..759ee536 100644
--- a/internetarchive/cli/ia_reviews.py
+++ b/internetarchive/cli/ia_reviews.py
@@ -68,7 +68,7 @@ def main(argv, session: ArchiveSession) -> None:
print(r.text)
sys.exit(0)
except HTTPError as exc:
- if exc.response.status_code == 404:
+ if exc.response.status_code == 404: # type: ignore
sys.exit(0)
else:
raise exc
diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py
index b1ee3f94..475785c9 100644
--- a/internetarchive/iarequest.py
+++ b/internetarchive/iarequest.py
@@ -173,6 +173,7 @@ def __init__(self,
access_key=None,
secret_key=None,
append=None,
+ expect=None,
append_list=None,
insert=None,
**kwargs):
@@ -188,6 +189,7 @@ def __init__(self,
self.target = target
self.priority = priority
self.append = append
+ self.expect = expect
self.append_list = append_list
self.insert = insert
@@ -210,6 +212,7 @@ def prepare(self):
source_metadata=self.source_metadata,
target=self.target,
append=self.append,
+ expect=self.expect,
append_list=self.append_list,
insert=self.insert,
)
@@ -220,13 +223,14 @@ class MetadataPreparedRequest(requests.models.PreparedRequest):
def prepare(self, method=None, url=None, headers=None, files=None, data=None,
params=None, auth=None, cookies=None, hooks=None, metadata={}, # noqa: B006
source_metadata=None, target=None, priority=None, append=None,
- append_list=None, insert=None):
+ expect=None, append_list=None, insert=None):
self.prepare_method(method)
self.prepare_url(url, params)
+ self.identifier = self.url.split("?")[0].split("/")[-1]
self.prepare_headers(headers)
self.prepare_cookies(cookies)
self.prepare_body(metadata, source_metadata, target, priority, append,
- append_list, insert)
+ append_list, insert, expect)
self.prepare_auth(auth, url)
# Note that prepare_auth must be last to enable authentication schemes
# such as OAuth to work on a fully prepared request.
@@ -235,7 +239,7 @@ def prepare(self, method=None, url=None, headers=None, files=None, data=None,
self.prepare_hooks(hooks)
def prepare_body(self, metadata, source_metadata, target, priority, append,
- append_list, insert):
+ append_list, insert, expect):
priority = priority or -5
if not source_metadata:
@@ -260,21 +264,25 @@ def prepare_body(self, metadata, source_metadata, target, priority, append,
patch = prepare_patch(metadata[key],
source_metadata['metadata'],
append,
+ expect,
append_list,
insert)
except KeyError:
- raise ItemLocateError
+ raise ItemLocateError(f"{self.identifier} cannot be located "
+ "because it is dark or does not exist.")
elif key.startswith('files'):
patch = prepare_files_patch(metadata[key],
source_metadata['files'],
append,
key,
append_list,
- insert)
+ insert,
+ expect)
else:
key = key.split('/')[0]
patch = prepare_target_patch(metadata, source_metadata, append,
- target, append_list, key, insert)
+ target, append_list, key, insert,
+ expect)
changes.append({'target': key, 'patch': patch})
self.data = {
'-changes': json.dumps(changes),
@@ -287,16 +295,18 @@ def prepare_body(self, metadata, source_metadata, target, priority, append,
target = 'metadata'
try:
patch = prepare_patch(metadata, source_metadata['metadata'], append,
- append_list, insert)
+ expect, append_list, insert)
except KeyError:
- raise ItemLocateError
+ raise ItemLocateError(f"{self.identifier} cannot be located "
+ "because it is dark or does not exist.")
elif 'files' in target:
patch = prepare_files_patch(metadata, source_metadata['files'], append,
- target, append_list, insert)
+ target, append_list, insert, expect)
else:
metadata = {target: metadata}
patch = prepare_target_patch(metadata, source_metadata, append,
- target, append_list, target, insert)
+ target, append_list, target, insert,
+ expect)
self.data = {
'-patch': json.dumps(patch),
'-target': target,
@@ -306,7 +316,8 @@ def prepare_body(self, metadata, source_metadata, target, priority, append,
super().prepare_body(self.data, None)
-def prepare_patch(metadata, source_metadata, append, append_list=None, insert=None):
+def prepare_patch(metadata, source_metadata, append,
+ expect=None, append_list=None, insert=None):
destination_metadata = source_metadata.copy()
if isinstance(metadata, list):
prepared_metadata = metadata
@@ -330,11 +341,28 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No
# Delete metadata items where value is REMOVE_TAG.
destination_metadata = delete_items_from_dict(destination_metadata, 'REMOVE_TAG')
patch = make_patch(source_metadata, destination_metadata).patch
- return patch
+
+ # Add test operations to patch.
+ patch_tests = []
+ for expect_key in expect:
+ idx = None
+ if '[' in expect_key:
+ idx = int(expect_key.split('[')[1].strip(']'))
+ key = expect_key.split('[')[0]
+ path = f'/{key}/{idx}'
+ p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]}
+ else:
+ path = f'/{expect_key}'
+ p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]}
+
+ patch_tests.append(p_test)
+ final_patch = patch_tests + patch
+
+ return final_patch
def prepare_target_patch(metadata, source_metadata, append, target, append_list, key,
- insert):
+ insert, expect):
def dictify(lst, key=None, value=None):
if not lst:
@@ -351,18 +379,18 @@ def dictify(lst, key=None, value=None):
source_metadata = source_metadata.get(_k, {})
else:
source_metadata[_k] = source_metadata.get(_k, {}).get(_k, {})
- patch = prepare_patch(metadata, source_metadata, append, append_list, insert)
+ patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert)
return patch
def prepare_files_patch(metadata, source_metadata, append, target, append_list,
- insert):
+ insert, expect):
filename = '/'.join(target.split('/')[1:])
for f in source_metadata:
if f.get('name') == filename:
source_metadata = f
break
- patch = prepare_patch(metadata, source_metadata, append, append_list, insert)
+ patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert)
return patch
@@ -442,7 +470,7 @@ def rm_index(key):
continue
else:
if v in source_metadata[key]:
- continue
+ source_metadata[key] = [x for x in source_metadata[key] if x != v]
if not isinstance(source_metadata[key], list):
prepared_metadata[key] = [source_metadata[key]]
else:
diff --git a/internetarchive/item.py b/internetarchive/item.py
index 18a2d072..7202976b 100644
--- a/internetarchive/item.py
+++ b/internetarchive/item.py
@@ -57,7 +57,7 @@
iter_directory,
json,
norm_filepath,
- recursive_file_count,
+ recursive_file_count_and_size,
validate_s3_identifier,
)
@@ -770,6 +770,7 @@ def modify_metadata(self,
metadata: Mapping,
target: str | None = None,
append: bool = False,
+ expect: Mapping | None = None,
append_list: bool = False,
insert: bool = False,
priority: int = 0,
@@ -794,6 +795,9 @@ def modify_metadata(self,
:param append: Append value to an existing multi-value
metadata field.
+ :param expect: Provide a dict of expectations to be tested
+ server-side before applying patch to item metadata.
+
:param append_list: Append values to an existing multi-value
metadata field. No duplicate values will be added.
@@ -811,6 +815,7 @@ def modify_metadata(self,
secret_key = secret_key or self.session.secret_key
debug = bool(debug)
headers = headers or {}
+ expect = expect or {}
request_kwargs = request_kwargs or {}
if timeout:
request_kwargs["timeout"] = float(timeout) # type: ignore
@@ -835,6 +840,7 @@ def modify_metadata(self,
access_key=access_key,
secret_key=secret_key,
append=append,
+ expect=expect,
append_list=append_list,
insert=insert)
# Must use Session.prepare_request to make sure session settings
@@ -1106,9 +1112,9 @@ def _build_request():
return response
except HTTPError as exc:
try:
- msg = get_s3_xml_text(exc.response.content)
+ msg = get_s3_xml_text(exc.response.content) # type: ignore
except ExpatError: # probably HTTP 500 error and response is invalid XML
- msg = ('IA S3 returned invalid XML '
+ msg = ('IA S3 returned invalid XML ' # type: ignore
f'(HTTP status code {exc.response.status_code}). '
'This is a server side error which is either temporary, '
'or requires the intervention of IA admins.')
@@ -1194,11 +1200,13 @@ def upload(self, files,
responses = []
file_index = 0
- if queue_derive and total_files is None:
- if checksum:
- total_files = recursive_file_count(files, item=self, checksum=True)
- else:
- total_files = recursive_file_count(files, item=self, checksum=False)
+ headers = headers or {}
+ if (queue_derive or not headers.get('x-archive-size-hint')) and total_files == 0:
+ total_files, total_size = recursive_file_count_and_size(files,
+ item=self,
+ checksum=checksum)
+ if not headers.get('x-archive-size-hint'):
+ headers['x-archive-size-hint'] = str(total_size)
file_metadata = None
for f in files:
if isinstance(f, dict):
diff --git a/internetarchive/search.py b/internetarchive/search.py
index 34791467..975a261d 100644
--- a/internetarchive/search.py
+++ b/internetarchive/search.py
@@ -130,7 +130,7 @@ def _advanced_search(self):
auth=self.auth,
**self.request_kwargs)
j = r.json()
- num_found = int(j['response']['numFound'])
+ num_found = int(j.get('response', {}).get('numFound', 0))
if not self._num_found:
self._num_found = num_found
if j.get('error'):
@@ -153,7 +153,7 @@ def _scrape(self):
if j.get('error'):
yield j
if not num_found:
- num_found = int(j['total'])
+ num_found = int(j.get('total') or '0')
if not self._num_found:
self._num_found = num_found
self._handle_scrape_error(j)
@@ -214,7 +214,10 @@ def _user_aggs(self):
self.params['page'] = '1'
self.params['rows'] = '1'
self.params['output'] = 'json'
- r = self.session.get(self.search_url, params=self.params, **self.request_kwargs)
+ r = self.session.get(self.search_url,
+ params=self.params,
+ auth=self.auth,
+ **self.request_kwargs)
j = r.json()
if j.get('error'):
yield j
diff --git a/internetarchive/utils.py b/internetarchive/utils.py
index 38b09546..e9d17206 100644
--- a/internetarchive/utils.py
+++ b/internetarchive/utils.py
@@ -216,15 +216,19 @@ def _get_tag_text(tag_name, xml_obj):
def get_file_size(file_obj) -> int | None:
- try:
- file_obj.seek(0, os.SEEK_END)
- size = file_obj.tell()
- # Avoid OverflowError.
- if size > sys.maxsize:
+ if is_filelike_obj(file_obj):
+ try:
+ file_obj.seek(0, os.SEEK_END)
+ size = file_obj.tell()
+ # Avoid OverflowError.
+ if size > sys.maxsize:
+ size = None
+ file_obj.seek(0, os.SEEK_SET)
+ except OSError:
size = None
- file_obj.seek(0, os.SEEK_SET)
- except OSError:
- size = None
+ else:
+ st = os.stat(file_obj)
+ size = st.st_size
return size
@@ -237,11 +241,14 @@ def iter_directory(directory: str):
yield (filepath, key)
-def recursive_file_count(files, item=None, checksum=False):
- """Given a filepath or list of filepaths, return the total number of files."""
+def recursive_file_count_and_size(files, item=None, checksum=False):
+ """Given a filepath or list of filepaths, return the total number and size of files.
+ If `checksum` is `True`, skip over files whose MD5 hash matches any file in the `item`.
+ """
if not isinstance(files, (list, set)):
files = [files]
total_files = 0
+ total_size = 0
if checksum is True:
md5s = [f.get('md5') for f in item.files]
else:
@@ -264,24 +271,27 @@ def recursive_file_count(files, item=None, checksum=False):
except (AttributeError, TypeError):
is_dir = False
if is_dir:
- for x, _ in iter_directory(f):
- if checksum is True:
- with open(x, 'rb') as fh:
- lmd5 = get_md5(fh)
- if lmd5 in md5s:
- continue
- total_files += 1
+ it = iter_directory(f)
else:
+ it = [(f, None)]
+ for x, _ in it:
if checksum is True:
try:
- with open(f, 'rb') as fh:
+ with open(x, 'rb') as fh:
lmd5 = get_md5(fh)
except TypeError:
# Support file-like objects.
- lmd5 = get_md5(f)
+ lmd5 = get_md5(x)
if lmd5 in md5s:
continue
+ total_size += get_file_size(x)
total_files += 1
+ return total_files, total_size
+
+
+def recursive_file_count(*args, **kwargs):
+ """Like `recursive_file_count_and_size`, but returns only the file count."""
+ total_files, _ = recursive_file_count_and_size(*args, **kwargs)
return total_files
@@ -294,6 +304,16 @@ def is_dir(obj) -> bool:
return False
+def is_filelike_obj(obj) -> bool:
+ """Distinguish file-like from path-like objects"""
+ try:
+ os.fspath(obj)
+ except TypeError:
+ return True
+ else:
+ return False
+
+
def reraise_modify(
caught_exc: Exception,
append_msg: str,
diff --git a/pyproject.toml b/pyproject.toml
index ef31cd55..2ed3e1d8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -80,11 +80,7 @@ max-statements = 124
[tool.ruff.per-file-ignores]
"__init__.py" = ["E402"]
-"tests/*" = [
- "PT017",
- "S101",
-]
-"tests/conftest.py" = ["B018", "F811"]
+"tests/*" = ["PT017", "S101"]
"tests/cli/test_ia_list.py" = ["E741"]
"tests/test_api.py" = ["E712"]
"tests/test_config.py" = ["PT011"]
diff --git a/setup.cfg b/setup.cfg
index 1b6af950..28c63a4d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -31,6 +31,7 @@ install_requires =
schema>=0.4.0
tqdm>=4.0.0
urllib3>=1.26.0
+ importlib-metadata>=3.6.0 ;python_version <= "3.10"
python_requires = >=3.7
include_package_data = True
zip_safe = False
@@ -59,7 +60,7 @@ docs =
test =
pytest==7.1.2
responses==0.20.0
- ruff==0.0.261
+ ruff==0.0.269
types =
tqdm-stubs>=0.2.0
types-colorama
@@ -80,7 +81,6 @@ ignore-words-list = alers
[mypy]
exclude = ^\.git/|^__pycache__/|^docs/source/conf.py$|^old/|^build/|^dist/|\.tox
python_version = 3.9
-install_types = True
pretty = True
scripts_are_modules = True
show_error_codes = True
diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py
index 3255d981..3d876085 100644
--- a/tests/cli/test_ia_upload.py
+++ b/tests/cli/test_ia_upload.py
@@ -124,6 +124,27 @@ def test_ia_upload_size_hint(capsys, tmpdir_ch, nasa_mocker):
assert 'Accept-Encoding:gzip, deflate' in err
+def test_ia_upload_automatic_size_hint_files(capsys, tmpdir_ch, nasa_mocker):
+ with open('foo', 'w') as fh:
+ fh.write('foo')
+ with open('bar', 'w') as fh:
+ fh.write('bar')
+
+ ia_call(['ia', 'upload', '--debug', 'nasa', 'foo', 'bar'])
+ out, err = capsys.readouterr()
+ assert 'x-archive-size-hint:6' in err
+
+def test_ia_upload_automatic_size_hint_dir(capsys, tmpdir_ch, nasa_mocker):
+ with open('foo', 'w') as fh:
+ fh.write('foo')
+ with open('bar', 'w') as fh:
+ fh.write('bar')
+
+ ia_call(['ia', 'upload', '--debug', 'nasa', '.'])
+ out, err = capsys.readouterr()
+ assert 'x-archive-size-hint:6' in err
+
+
def test_ia_upload_unicode(tmpdir_ch, caplog):
with open('தமிழ் - baz ∆.txt', 'w') as fh:
fh.write('unicode foo')
diff --git a/tests/conftest.py b/tests/conftest.py
index 0b1392a7..1f0ef92c 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -12,17 +12,6 @@
from internetarchive.cli import ia
from internetarchive.utils import json
-try:
- FileNotFoundError
-except NameError:
- FileNotFoundError = IOError
-
-try:
- WindowsError # type: ignore[used-before-def]
-except NameError:
- class WindowsError(Exception):
- pass
-
PROTOCOL = 'https:'
BASE_URL = 'https://archive.org/'
METADATA_URL = f'{BASE_URL}metadata/'
@@ -74,7 +63,7 @@ def load_test_data_file(filename):
def call_cmd(cmd, expected_exit_code=0):
- proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE)
+ proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) # noqa: S602
stdout, stderr = proc.communicate()
stdout = stdout.decode('utf-8').strip()
stderr = stderr.decode('utf-8').strip()
@@ -134,5 +123,5 @@ def nasa_metadata():
# TODO: Why is this function defined twice in this file? See issue #505
@pytest.fixture() # type: ignore
-def nasa_item(nasa_mocker):
+def nasa_item(nasa_mocker): # noqa: F811
return get_item('nasa')
diff --git a/tests/requirements.txt b/tests/requirements.txt
index 7b3df803..0cf7ee05 100644
--- a/tests/requirements.txt
+++ b/tests/requirements.txt
@@ -1,3 +1,3 @@
pytest==7.2.2
responses==0.23.1
-ruff==0.0.261
+ruff==0.0.269
diff --git a/tests/test_item.py b/tests/test_item.py
index 9e6c6c50..ee4f2b80 100644
--- a/tests/test_item.py
+++ b/tests/test_item.py
@@ -582,6 +582,40 @@ def test_upload_checksum(tmpdir, nasa_item):
assert r.status_code is None
+def test_upload_automatic_size_hint(tmpdir, nasa_item):
+ with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
+ _expected_headers = deepcopy(EXPECTED_S3_HEADERS)
+ del _expected_headers['x-archive-size-hint']
+ _expected_headers['x-archive-size-hint'] = '15'
+ rsps.add(responses.PUT, S3_URL_RE,
+ adding_headers=_expected_headers)
+
+ files = []
+ with open(os.path.join(tmpdir, 'file'), 'w') as fh:
+ fh.write('a')
+ files.append(os.path.join(tmpdir, 'file'))
+
+ os.mkdir(os.path.join(tmpdir, 'dir'))
+ with open(os.path.join(tmpdir, 'dir', 'file0'), 'w') as fh:
+ fh.write('bb')
+ with open(os.path.join(tmpdir, 'dir', 'file1'), 'w') as fh:
+ fh.write('cccc')
+ files.append(os.path.join(tmpdir, 'dir'))
+
+ with open(os.path.join(tmpdir, 'obj'), 'wb') as fh:
+ fh.write(b'dddddddd')
+ fh.seek(0, os.SEEK_SET)
+ files.append(fh)
+
+ _responses = nasa_item.upload(files,
+ access_key='a',
+ secret_key='b')
+ for r in _responses:
+ headers = {k.lower(): str(v) for k, v in r.headers.items()}
+ del headers['content-type']
+ assert headers == _expected_headers
+
+
def test_modify_metadata(nasa_item, nasa_metadata):
with IaRequestsMock(assert_all_requests_are_fired=False) as rsps:
rsps.add(responses.POST, f'{PROTOCOL}//archive.org/metadata/nasa')
diff --git a/tox.ini b/tox.ini
index 062086a0..82a5cd95 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py37,py38,py39,py310,py311,pypy37,pypy38,pypy39
+envlist = py38,py39,py310,py311,py312,pypy39,pypy310
[testenv]
deps = -r tests/requirements.txt
@@ -7,9 +7,6 @@ deps = -r tests/requirements.txt
commands = ruff .
pytest {posargs}
-[testenv:py37]
-basepython=python3.7
-
[testenv:py38]
basepython=python3.8
@@ -21,3 +18,6 @@ basepython=python3.10
[testenv:py311]
basepython=python3.11
+
+[testenv:py312]
+basepython=python3.12