From 0e883ff1794eb0cd6629cf4a3e12cc3d4d44f04b Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Fri, 19 May 2023 09:35:02 +0200 Subject: [PATCH 01/30] pre-commit autoupdate 2023_05_19 --- .pre-commit-config.yaml | 4 ++-- pyproject.toml | 6 +----- tests/conftest.py | 15 ++------------- 3 files changed, 5 insertions(+), 20 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index b1f255f4..cfe8f628 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -21,7 +21,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/charliermarsh/ruff-pre-commit - rev: v0.0.261 + rev: v0.0.269 hooks: - id: ruff @@ -42,7 +42,7 @@ repos: - id: codespell # See setup.cfg for args - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.1.1 + rev: v1.3.0 hooks: - id: mypy additional_dependencies: diff --git a/pyproject.toml b/pyproject.toml index ef31cd55..2ed3e1d8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,11 +80,7 @@ max-statements = 124 [tool.ruff.per-file-ignores] "__init__.py" = ["E402"] -"tests/*" = [ - "PT017", - "S101", -] -"tests/conftest.py" = ["B018", "F811"] +"tests/*" = ["PT017", "S101"] "tests/cli/test_ia_list.py" = ["E741"] "tests/test_api.py" = ["E712"] "tests/test_config.py" = ["PT011"] diff --git a/tests/conftest.py b/tests/conftest.py index 0b1392a7..1f0ef92c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -12,17 +12,6 @@ from internetarchive.cli import ia from internetarchive.utils import json -try: - FileNotFoundError -except NameError: - FileNotFoundError = IOError - -try: - WindowsError # type: ignore[used-before-def] -except NameError: - class WindowsError(Exception): - pass - PROTOCOL = 'https:' BASE_URL = 'https://archive.org/' METADATA_URL = f'{BASE_URL}metadata/' @@ -74,7 +63,7 @@ def load_test_data_file(filename): def call_cmd(cmd, expected_exit_code=0): - proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) + proc = Popen(cmd, shell=True, stdout=PIPE, stderr=PIPE) # noqa: S602 stdout, stderr = proc.communicate() stdout = stdout.decode('utf-8').strip() stderr = stderr.decode('utf-8').strip() @@ -134,5 +123,5 @@ def nasa_metadata(): # TODO: Why is this function defined twice in this file? See issue #505 @pytest.fixture() # type: ignore -def nasa_item(nasa_mocker): +def nasa_item(nasa_mocker): # noqa: F811 return get_item('nasa') From 5f876f1b37e689be5422883c05ed2cf95a34eaed Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Fri, 19 May 2023 09:41:19 +0200 Subject: [PATCH 02/30] Upgrade ruff in tests/requirements.txt and setup.cfg --- setup.cfg | 2 +- tests/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 1b6af950..bec6295d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -59,7 +59,7 @@ docs = test = pytest==7.1.2 responses==0.20.0 - ruff==0.0.261 + ruff==0.0.269 types = tqdm-stubs>=0.2.0 types-colorama diff --git a/tests/requirements.txt b/tests/requirements.txt index 7b3df803..0cf7ee05 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,3 +1,3 @@ pytest==7.2.2 responses==0.23.1 -ruff==0.0.261 +ruff==0.0.269 From 7eee5a96b794562277ab9c1c2ce91e37469aed0b Mon Sep 17 00:00:00 2001 From: DuncanDHall Date: Wed, 23 Aug 2023 12:53:27 -0400 Subject: [PATCH 03/30] Patch search error Currently key error is thrown when elastic search returns no 'response' key in its json when we try to grab the num_found. --- internetarchive/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/search.py b/internetarchive/search.py index 34791467..0205d691 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -130,7 +130,7 @@ def _advanced_search(self): auth=self.auth, **self.request_kwargs) j = r.json() - num_found = int(j['response']['numFound']) + num_found = int(j.get('response', {}).get('numFound', 0)) if not self._num_found: self._num_found = num_found if j.get('error'): From 911b5b8cc0cee196fcabe08f9174cc2b8aae2b9d Mon Sep 17 00:00:00 2001 From: DuncanDHall Date: Tue, 28 Nov 2023 16:37:08 -0500 Subject: [PATCH 04/30] protect against nonetype return --- internetarchive/search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/search.py b/internetarchive/search.py index 0205d691..07e510c3 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -153,7 +153,7 @@ def _scrape(self): if j.get('error'): yield j if not num_found: - num_found = int(j['total']) + num_found = int(j.get('total') or '0') if not self._num_found: self._num_found = num_found self._handle_scrape_error(j) From cf6efee4b41ebdab99af7304da5fdc2ed76fc26b Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 29 Nov 2023 10:25:54 -0800 Subject: [PATCH 05/30] Updated installation instructions --- docs/source/installation.rst | 43 ++++++++---------------------------- 1 file changed, 9 insertions(+), 34 deletions(-) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index cadbe425..2a0c78bc 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -5,37 +5,25 @@ Installation System-Wide Installation ------------------------- +------------------------- Installing the ``internetarchive`` library globally on your system can be done with `pip `_. This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip):: - $ sudo pip install internetarchive + $ sudo python3 -m pip install internetarchive -or, with `easy_install `_:: +To update, you can run the following command:: - $ sudo easy_install internetarchive - -Either of these commands will install the ``internetarchive`` Python library and ``ia`` command-line tool on your system. - -**Note**: Some versions of Mac OS X come with Python libraries that are required by ``internetarchive`` (e.g. the Python package ``six``). -This can cause installation issues. If your installation is failing with a message that looks something like:: - - OSError: [Errno 1] Operation not permitted: '/var/folders/bk/3wx7qs8d0x79tqbmcdmsk1040000gp/T/pip-TGyjVo-uninstall/System/Library/Frameworks/Python.framework/Versions/2.7/Extras/lib/python/six-1.4.1-py2.7.egg-info' - -You can use the ``--ignore-installed`` parameter in ``pip`` to ignore the libraries that are already installed, and continue with the rest of the installation:: - - $ sudo pip install --ignore-installed internetarchive - -More details on this issue can be found here: https://github.com/pypa/pip/issues/3165 + $ sudo python3 -m pip install --upgrade internetarchive Installing Pip ~~~~~~~~~~~~~~ -Pip can be `installed with the get-pip.py script `_:: +If you are running Python 3.4+, you should already have ``pip`` installed. +If it is not already installed, it can be `installed with the get-pip.py script `_:: $ curl -LOs https://bootstrap.pypa.io/get-pip.py - $ python get-pip.py + $ python3 get-pip.py virtualenv @@ -45,11 +33,7 @@ If you don't want to, or can't, install the package system-wide you can use ``vi First, make sure ``virtualenv`` is installed on your system. If it's not, you can do so with pip:: - $ sudo pip install virtualenv - -With ``easy_install``:: - - $ sudo easy_install virtualenv + $ sudo python3 -m pip install virtualenv Or your systems package manager, ``apt-get`` for example:: @@ -92,7 +76,7 @@ If you are on an older operating system that only has Python 2 installed, it's h You can install and use version v2.3.0 with pip:: - $ pip install internetarchive==2.3.0 + $ sudo python2 -m pip install internetarchive==2.3.0 You can also download a binary of v2.3.0:: @@ -100,15 +84,6 @@ You can also download a binary of v2.3.0:: $ chmod +x ia-py2 -Snap ----- - -You can install the latest ``ia`` `snap `_, and help testing the most recent changes of the master branch in `all the supported Linux distros `_ with:: - - $ sudo snap install ia --edge - -Every time a new version of ``ia`` is pushed to the store, you will get it updated automatically. - Get the Code ------------ From d59083b4573f715bae98d10d9fe8b584b096805c Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 27 Dec 2023 10:37:05 -0800 Subject: [PATCH 06/30] mypy......... --- internetarchive/iarequest.py | 7 +++++-- internetarchive/item.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index b1ee3f94..60505dd6 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -223,6 +223,7 @@ def prepare(self, method=None, url=None, headers=None, files=None, data=None, append_list=None, insert=None): self.prepare_method(method) self.prepare_url(url, params) + self.identifier = self.url.split("?")[0].split("/")[-1] self.prepare_headers(headers) self.prepare_cookies(cookies) self.prepare_body(metadata, source_metadata, target, priority, append, @@ -263,7 +264,8 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, append_list, insert) except KeyError: - raise ItemLocateError + raise ItemLocateError(f"{self.identifier} cannot be located " + "because it is dark or does not exist.") elif key.startswith('files'): patch = prepare_files_patch(metadata[key], source_metadata['files'], @@ -289,7 +291,8 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, patch = prepare_patch(metadata, source_metadata['metadata'], append, append_list, insert) except KeyError: - raise ItemLocateError + raise ItemLocateError(f"{self.identifier} cannot be located " + "because it is dark or does not exist.") elif 'files' in target: patch = prepare_files_patch(metadata, source_metadata['files'], append, target, append_list, insert) diff --git a/internetarchive/item.py b/internetarchive/item.py index 18a2d072..1cd3cfe0 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -1106,9 +1106,9 @@ def _build_request(): return response except HTTPError as exc: try: - msg = get_s3_xml_text(exc.response.content) + msg = get_s3_xml_text(exc.response.content) # type: ignore except ExpatError: # probably HTTP 500 error and response is invalid XML - msg = ('IA S3 returned invalid XML ' + msg = ('IA S3 returned invalid XML ' # type: ignore f'(HTTP status code {exc.response.status_code}). ' 'This is a server side error which is either temporary, ' 'or requires the intervention of IA admins.') From 3f75dfa566785fcc38dce790e8c338ed00097da7 Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 27 Dec 2023 10:38:07 -0800 Subject: [PATCH 07/30] installation doc updates --- README.rst | 2 +- docs/source/installation.rst | 23 ++++++++++++++++++++++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 55919367..b6bbdcdb 100644 --- a/README.rst +++ b/README.rst @@ -30,7 +30,7 @@ You can install this module via pip: .. code:: bash - $ pip install internetarchive + $ python3 -m pip install internetarchive Binaries of the command-line tool are also available: diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 2a0c78bc..320cff93 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -8,14 +8,35 @@ System-Wide Installation ------------------------- Installing the ``internetarchive`` library globally on your system can be done with `pip `_. -This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip):: +This is the recommended method for installing ``internetarchive`` (`see below `_ for details on installing pip). +If you are on Mac OS X, refer to the `Mac OS X section `_ below before proceeding. +Once you're ready to install, run the following command:: $ sudo python3 -m pip install internetarchive +Updating Your $PATH +~~~~~~~~~~~~~~~~~~~ + +Once you have successfully installed ``internetarchive``, you may need to update your ``$PATH`` (e.g. if running ``ia`` in your terminal returns an error). +If you receive a command not found error, run the following command to update your ``$PATH``:: + + $ echo "$(python3 -m site --user-base)/bin" | sudo tee -a /etc/paths + +Updating ia +~~~~~~~~~~~ + To update, you can run the following command:: $ sudo python3 -m pip install --upgrade internetarchive +Mac OS X +~~~~~~~~ + +While newer versions Mac OS X ship with Python 3 installed, it is recommended to install an updated version of Python 3. +You can do so with `Homebrew `_:: + + $ brew install python3 + Installing Pip ~~~~~~~~~~~~~~ From 6e2321652ab2bd13c517dabf5a6e01f420bca380 Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 27 Dec 2023 10:46:32 -0800 Subject: [PATCH 08/30] v3.6.0 --- HISTORY.rst | 6 ++++-- internetarchive/__version__.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/HISTORY.rst b/HISTORY.rst index 4b92cb97..84627aac 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,13 +3,15 @@ Release History --------------- -3.6.0 (?) -+++++++++ +3.6.0 (2023-12-27) +++++++++++++++++++ **Features and Improvements** - Added ``set_scanner`` and ``--no-scanner`` options to upload to stop ia's default behavior of setting the scanner field in meta.xml on initial upload. +- ``0`` is now returned instead of an exception when search fails to retrieve the total number + of hits for a query. 3.5.0 (2023-05-09) ++++++++++++++++++ diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py index 863d4694..826cf62c 100644 --- a/internetarchive/__version__.py +++ b/internetarchive/__version__.py @@ -1 +1 @@ -__version__ = '3.6.0.dev1' +__version__ = '3.6.0' From 942cfd6c812f54cf21f57f01de84966676690cc3 Mon Sep 17 00:00:00 2001 From: jake Date: Fri, 2 Feb 2024 13:54:37 -0800 Subject: [PATCH 09/30] first pass at adding support for JSON Patch test operations. --- internetarchive/iarequest.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 60505dd6..9366faac 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -333,7 +333,24 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No # Delete metadata items where value is REMOVE_TAG. destination_metadata = delete_items_from_dict(destination_metadata, 'REMOVE_TAG') patch = make_patch(source_metadata, destination_metadata).patch - return patch + + # Add test operations to patch. + patch_tests = [] + for p in patch: + patch_parts = p['path'].split('/') + if not source_metadata.get(patch_parts[1]): + continue + if len(patch_parts) == 2: + src_val = source_metadata.get(patch_parts[-1]) + else: + index = int(patch_parts[-1]) - 1 + src_val = source_metadata.get(patch_parts[1], [])[index] + p_test = {'op': 'test', 'path': p['path'], 'value': src_val} + patch_tests.append(p_test) + final_patch = patch_tests + patch + print(f"final patch being submitted to archive.org: {final_patch}") + + return final_patch def prepare_target_patch(metadata, source_metadata, append, target, append_list, key, From b0e7e759bb6904e351da140a2e8bf210d9446c2f Mon Sep 17 00:00:00 2001 From: jake Date: Fri, 2 Feb 2024 14:02:13 -0800 Subject: [PATCH 10/30] removed debugging print statement --- internetarchive/iarequest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 9366faac..fd9ba16d 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -347,8 +347,8 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No src_val = source_metadata.get(patch_parts[1], [])[index] p_test = {'op': 'test', 'path': p['path'], 'value': src_val} patch_tests.append(p_test) + final_patch = patch_tests + patch - print(f"final patch being submitted to archive.org: {final_patch}") return final_patch From c27440e17164f8bf0903337248fafee0234766c4 Mon Sep 17 00:00:00 2001 From: jake Date: Fri, 2 Feb 2024 14:13:51 -0800 Subject: [PATCH 11/30] typos --- internetarchive/iarequest.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index fd9ba16d..4df3bb63 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -337,14 +337,14 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No # Add test operations to patch. patch_tests = [] for p in patch: - patch_parts = p['path'].split('/') - if not source_metadata.get(patch_parts[1]): + path_parts = p['path'].split('/') + if not source_metadata.get(path_parts[1]): continue - if len(patch_parts) == 2: - src_val = source_metadata.get(patch_parts[-1]) + if len(path_parts) == 2: + src_val = source_metadata.get(path_parts[-1]) else: - index = int(patch_parts[-1]) - 1 - src_val = source_metadata.get(patch_parts[1], [])[index] + index = int(path_parts[-1]) - 1 + src_val = source_metadata.get(path_parts[1], [])[index] p_test = {'op': 'test', 'path': p['path'], 'value': src_val} patch_tests.append(p_test) From 19c9df2ce4d7d5333467e81a478aebaf9e70a922 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 6 Feb 2024 09:51:35 -0800 Subject: [PATCH 12/30] Fixed bug in append mode --- internetarchive/iarequest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 4df3bb63..f93d3fd4 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -342,10 +342,10 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No continue if len(path_parts) == 2: src_val = source_metadata.get(path_parts[-1]) + p_test = {'op': 'test', 'path': p['path'], 'value': src_val} else: - index = int(path_parts[-1]) - 1 - src_val = source_metadata.get(path_parts[1], [])[index] - p_test = {'op': 'test', 'path': p['path'], 'value': src_val} + src_val = source_metadata.get(path_parts[1], []) + p_test = {'op': 'test', 'path': '/' + path_parts[1], 'value': src_val} patch_tests.append(p_test) final_patch = patch_tests + patch From 6bae1f2a454535d95477e08ce12c954b958f6008 Mon Sep 17 00:00:00 2001 From: Letu Ren Date: Tue, 9 Jan 2024 18:28:50 +0800 Subject: [PATCH 13/30] Switch to importlib-metadata to drop deprecated pkg_resources According to https://setuptools.pypa.io/en/latest/pkg_resources.html, pkg_resources has been deprecated and importlib-metadata is recommended. `DistributionNotFound` only can be thrown from `find_plugins()` which is not used by ia. Tested with plugin https://github.com/JesseWeinstein/ia_recent. Closes: https://github.com/jjjake/internetarchive/issues/613 --- internetarchive/cli/ia.py | 9 ++++++--- setup.cfg | 1 + 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index 8e044c36..e674dcb9 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -64,7 +64,10 @@ import sys from docopt import docopt, printable_usage -from pkg_resources import DistributionNotFound, iter_entry_points +if sys.version_info < (3, 10): + from importlib_metadata import entry_points +else: + from importlib.metadata import entry_points from schema import Or, Schema, SchemaError # type: ignore[import] from internetarchive import __version__ @@ -97,11 +100,11 @@ def load_ia_module(cmd: str): return __import__(_module, fromlist=['internetarchive.cli']) else: _module = f'ia_{cmd}' - for ep in iter_entry_points('internetarchive.cli.plugins'): + for ep in entry_points(group='internetarchive.cli.plugins'): if ep.name == _module: return ep.load() raise ImportError - except (ImportError, DistributionNotFound): + except (ImportError): print(f"error: '{cmd}' is not an ia command! See 'ia help'", file=sys.stderr) matches = '\t'.join(difflib.get_close_matches(cmd, cmd_aliases.values())) diff --git a/setup.cfg b/setup.cfg index bec6295d..a8b481e9 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,6 +26,7 @@ packages = internetarchive.cli install_requires = docopt>=0.6.0,<0.7.0 + importlib-metadata >= 3.6.0 ; python_version <= "3.10" jsonpatch>=0.4 requests>=2.25.0,<3.0.0 schema>=0.4.0 From 1c4e9c53a530ae68539db61551d5476fa1eecbde Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 13 Feb 2024 19:45:37 +0000 Subject: [PATCH 14/30] Fix automatic size hint on uploads When uploading without a size hint (via headers or `--size-hint`), a x-archive-size-hint header is added automatically. However, prior to this commit, the value was the individual file size on each file's `PUT` request. This effectively made the size hint useless because it does not, in fact, provide a size hint for the total item size to the S3 backend at item creation time. Notes on detailed changes to implement this: * Rename `internetarchive.utils.recursive_file_count` to `recursive_file_count_and_size`, adding a wrapper for backwards compatibility * Add support for paths (rather than only file-like objects) to `internetarchive.utils.get_file_size` * Add a `internetarchive.utils.is_filelike_obj` helper function * Fix a bug introduced by 62c85133090e21659f09964fedd4c18ae4a27483 where `total_files` would never be `None` and so `recursive_file_count` was never called, possibly leading to incorrect derive queueing. * Add tests for the fixed behaviour --- internetarchive/item.py | 14 +++++---- internetarchive/utils.py | 58 +++++++++++++++++++++++++------------ tests/cli/test_ia_upload.py | 21 ++++++++++++++ tests/test_item.py | 34 ++++++++++++++++++++++ 4 files changed, 102 insertions(+), 25 deletions(-) diff --git a/internetarchive/item.py b/internetarchive/item.py index 1cd3cfe0..929805db 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -57,7 +57,7 @@ iter_directory, json, norm_filepath, - recursive_file_count, + recursive_file_count_and_size, validate_s3_identifier, ) @@ -1194,11 +1194,13 @@ def upload(self, files, responses = [] file_index = 0 - if queue_derive and total_files is None: - if checksum: - total_files = recursive_file_count(files, item=self, checksum=True) - else: - total_files = recursive_file_count(files, item=self, checksum=False) + headers = headers or {} + if (queue_derive or not headers.get('x-archive-size-hint')) and total_files == 0: + total_files, total_size = recursive_file_count_and_size(files, + item=self, + checksum=checksum) + if not headers.get('x-archive-size-hint'): + headers['x-archive-size-hint'] = str(total_size) file_metadata = None for f in files: if isinstance(f, dict): diff --git a/internetarchive/utils.py b/internetarchive/utils.py index 38b09546..e9d17206 100644 --- a/internetarchive/utils.py +++ b/internetarchive/utils.py @@ -216,15 +216,19 @@ def _get_tag_text(tag_name, xml_obj): def get_file_size(file_obj) -> int | None: - try: - file_obj.seek(0, os.SEEK_END) - size = file_obj.tell() - # Avoid OverflowError. - if size > sys.maxsize: + if is_filelike_obj(file_obj): + try: + file_obj.seek(0, os.SEEK_END) + size = file_obj.tell() + # Avoid OverflowError. + if size > sys.maxsize: + size = None + file_obj.seek(0, os.SEEK_SET) + except OSError: size = None - file_obj.seek(0, os.SEEK_SET) - except OSError: - size = None + else: + st = os.stat(file_obj) + size = st.st_size return size @@ -237,11 +241,14 @@ def iter_directory(directory: str): yield (filepath, key) -def recursive_file_count(files, item=None, checksum=False): - """Given a filepath or list of filepaths, return the total number of files.""" +def recursive_file_count_and_size(files, item=None, checksum=False): + """Given a filepath or list of filepaths, return the total number and size of files. + If `checksum` is `True`, skip over files whose MD5 hash matches any file in the `item`. + """ if not isinstance(files, (list, set)): files = [files] total_files = 0 + total_size = 0 if checksum is True: md5s = [f.get('md5') for f in item.files] else: @@ -264,24 +271,27 @@ def recursive_file_count(files, item=None, checksum=False): except (AttributeError, TypeError): is_dir = False if is_dir: - for x, _ in iter_directory(f): - if checksum is True: - with open(x, 'rb') as fh: - lmd5 = get_md5(fh) - if lmd5 in md5s: - continue - total_files += 1 + it = iter_directory(f) else: + it = [(f, None)] + for x, _ in it: if checksum is True: try: - with open(f, 'rb') as fh: + with open(x, 'rb') as fh: lmd5 = get_md5(fh) except TypeError: # Support file-like objects. - lmd5 = get_md5(f) + lmd5 = get_md5(x) if lmd5 in md5s: continue + total_size += get_file_size(x) total_files += 1 + return total_files, total_size + + +def recursive_file_count(*args, **kwargs): + """Like `recursive_file_count_and_size`, but returns only the file count.""" + total_files, _ = recursive_file_count_and_size(*args, **kwargs) return total_files @@ -294,6 +304,16 @@ def is_dir(obj) -> bool: return False +def is_filelike_obj(obj) -> bool: + """Distinguish file-like from path-like objects""" + try: + os.fspath(obj) + except TypeError: + return True + else: + return False + + def reraise_modify( caught_exc: Exception, append_msg: str, diff --git a/tests/cli/test_ia_upload.py b/tests/cli/test_ia_upload.py index 3255d981..3d876085 100644 --- a/tests/cli/test_ia_upload.py +++ b/tests/cli/test_ia_upload.py @@ -124,6 +124,27 @@ def test_ia_upload_size_hint(capsys, tmpdir_ch, nasa_mocker): assert 'Accept-Encoding:gzip, deflate' in err +def test_ia_upload_automatic_size_hint_files(capsys, tmpdir_ch, nasa_mocker): + with open('foo', 'w') as fh: + fh.write('foo') + with open('bar', 'w') as fh: + fh.write('bar') + + ia_call(['ia', 'upload', '--debug', 'nasa', 'foo', 'bar']) + out, err = capsys.readouterr() + assert 'x-archive-size-hint:6' in err + +def test_ia_upload_automatic_size_hint_dir(capsys, tmpdir_ch, nasa_mocker): + with open('foo', 'w') as fh: + fh.write('foo') + with open('bar', 'w') as fh: + fh.write('bar') + + ia_call(['ia', 'upload', '--debug', 'nasa', '.']) + out, err = capsys.readouterr() + assert 'x-archive-size-hint:6' in err + + def test_ia_upload_unicode(tmpdir_ch, caplog): with open('தமிழ் - baz ∆.txt', 'w') as fh: fh.write('unicode foo') diff --git a/tests/test_item.py b/tests/test_item.py index 9ae7d7c7..adaf0bcd 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -580,6 +580,40 @@ def test_upload_checksum(tmpdir, nasa_item): assert r.status_code is None +def test_upload_automatic_size_hint(tmpdir, nasa_item): + with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: + _expected_headers = deepcopy(EXPECTED_S3_HEADERS) + del _expected_headers['x-archive-size-hint'] + _expected_headers['x-archive-size-hint'] = '15' + rsps.add(responses.PUT, S3_URL_RE, + adding_headers=_expected_headers) + + files = [] + with open(os.path.join(tmpdir, 'file'), 'w') as fh: + fh.write('a') + files.append(os.path.join(tmpdir, 'file')) + + os.mkdir(os.path.join(tmpdir, 'dir')) + with open(os.path.join(tmpdir, 'dir', 'file0'), 'w') as fh: + fh.write('bb') + with open(os.path.join(tmpdir, 'dir', 'file1'), 'w') as fh: + fh.write('cccc') + files.append(os.path.join(tmpdir, 'dir')) + + with open(os.path.join(tmpdir, 'obj'), 'wb') as fh: + fh.write(b'dddddddd') + fh.seek(0, os.SEEK_SET) + files.append(fh) + + _responses = nasa_item.upload(files, + access_key='a', + secret_key='b') + for r in _responses: + headers = {k.lower(): str(v) for k, v in r.headers.items()} + del headers['content-type'] + assert headers == _expected_headers + + def test_modify_metadata(nasa_item, nasa_metadata): with IaRequestsMock(assert_all_requests_are_fired=False) as rsps: rsps.add(responses.POST, f'{PROTOCOL}//archive.org/metadata/nasa') From 67523b49db2eb7b4ccd920286d139815797e2a59 Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Tue, 13 Feb 2024 22:10:02 +0000 Subject: [PATCH 15/30] Fix linting: disable mypy --install-types `pip install .[all]` already installs the third-party hints, and `--install-types` prompts for confirmation, breaking the CI. --- setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.cfg b/setup.cfg index bec6295d..fb2e88dd 100644 --- a/setup.cfg +++ b/setup.cfg @@ -80,7 +80,6 @@ ignore-words-list = alers [mypy] exclude = ^\.git/|^__pycache__/|^docs/source/conf.py$|^old/|^build/|^dist/|\.tox python_version = 3.9 -install_types = True pretty = True scripts_are_modules = True show_error_codes = True From 4fc889a66d2a1780046c25ae84a70a76b6bbbb8f Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 14 Feb 2024 09:48:14 -0800 Subject: [PATCH 16/30] updated accept-encoding header --- tests/test_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_item.py b/tests/test_item.py index 9ae7d7c7..d50f013f 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -32,7 +32,7 @@ 'x-archive-auto-make-bucket': '1', 'authorization': 'LOW a:b', 'accept': '*/*', - 'accept-encoding': 'gzip, deflate', + 'accept-encoding': 'gzip, deflate, br', 'connection': 'close', } From 15172ed4a44a65c9a3dcd60abcec68ab6f330780 Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 14 Feb 2024 09:52:57 -0800 Subject: [PATCH 17/30] Have users define expectations. --- internetarchive/cli/ia_metadata.py | 19 ++++++---- internetarchive/iarequest.py | 57 +++++++++++++++++------------- internetarchive/item.py | 6 ++++ 3 files changed, 51 insertions(+), 31 deletions(-) diff --git a/internetarchive/cli/ia_metadata.py b/internetarchive/cli/ia_metadata.py index c8a8d9aa..b4833695 100644 --- a/internetarchive/cli/ia_metadata.py +++ b/internetarchive/cli/ia_metadata.py @@ -22,17 +22,20 @@ ia metadata ... [--exists | --formats] [--header=...] ia metadata ... --modify=... [--target=] [--priority=] [--header=...] - [--timeout=] + [--timeout=] [--expect=...] ia metadata ... --remove=... [--priority=] [--header=...] [--timeout=] + [--expect=...] ia metadata ... [--append=... | --append-list=...] [--priority=] [--target=] [--header=...] [--timeout=] + [--expect=...] ia metadata ... --insert=... [--priority=] [--target=] [--header=...] - [--timeout=] + [--timeout=] [--expect=...] ia metadata --spreadsheet= [--priority=] [--modify=...] [--header=...] [--timeout=] + [--expect=...] ia metadata --help options: @@ -42,8 +45,10 @@ -t, --target= The metadata target to modify. -a, --append=... Append a string to a metadata element. -A, --append-list=... Append a field to a metadata element. - -i, --insert=... Insert a value into a multi-value field given + -i, --insert=... Insert a value into a multi-value field given an index (e.g. `--insert=collection[0]:foo`). + -E, --expect=... Test an expectation server-side before applying + patch to item metadata. -s, --spreadsheet= Modify metadata in bulk using a spreadsheet as input. -e, --exists Check if an item exists @@ -79,13 +84,14 @@ def modify_metadata(item: item.Item, metadata: Mapping, args: Mapping) -> Response: append = bool(args['--append']) + expect = get_args_dict(args['--expect']) append_list = bool(args['--append-list']) insert = bool(args['--insert']) try: r = item.modify_metadata(metadata, target=args['--target'], append=append, - priority=args['--priority'], append_list=append_list, - headers=args['--header'], insert=insert, - timeout=args['--timeout']) + expect=expect, priority=args['--priority'], + append_list=append_list, headers=args['--header'], + insert=insert, timeout=args['--timeout']) assert isinstance(r, Response) # mypy: modify_metadata() -> Request | Response except ItemLocateError as exc: print(f'{item.identifier} - error: {exc}', file=sys.stderr) @@ -178,6 +184,7 @@ def main(argv: dict, session: session.ArchiveSession) -> None: str: bool, '': list, '--modify': list, + '--expect': list, '--header': Or(None, And(Use(get_args_header_dict), dict), error='--header must be formatted as --header="key:value"'), '--append': list, diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index f93d3fd4..b59fa768 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -173,6 +173,7 @@ def __init__(self, access_key=None, secret_key=None, append=None, + expect=None, append_list=None, insert=None, **kwargs): @@ -188,6 +189,7 @@ def __init__(self, self.target = target self.priority = priority self.append = append + self.expect = expect self.append_list = append_list self.insert = insert @@ -210,6 +212,7 @@ def prepare(self): source_metadata=self.source_metadata, target=self.target, append=self.append, + expect=self.expect, append_list=self.append_list, insert=self.insert, ) @@ -220,14 +223,14 @@ class MetadataPreparedRequest(requests.models.PreparedRequest): def prepare(self, method=None, url=None, headers=None, files=None, data=None, params=None, auth=None, cookies=None, hooks=None, metadata={}, # noqa: B006 source_metadata=None, target=None, priority=None, append=None, - append_list=None, insert=None): + expect=None, append_list=None, insert=None): self.prepare_method(method) self.prepare_url(url, params) self.identifier = self.url.split("?")[0].split("/")[-1] self.prepare_headers(headers) self.prepare_cookies(cookies) self.prepare_body(metadata, source_metadata, target, priority, append, - append_list, insert) + append_list, insert, expect) self.prepare_auth(auth, url) # Note that prepare_auth must be last to enable authentication schemes # such as OAuth to work on a fully prepared request. @@ -236,7 +239,7 @@ def prepare(self, method=None, url=None, headers=None, files=None, data=None, self.prepare_hooks(hooks) def prepare_body(self, metadata, source_metadata, target, priority, append, - append_list, insert): + append_list, insert, expect): priority = priority or -5 if not source_metadata: @@ -261,22 +264,25 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, patch = prepare_patch(metadata[key], source_metadata['metadata'], append, + expect, append_list, insert) except KeyError: raise ItemLocateError(f"{self.identifier} cannot be located " - "because it is dark or does not exist.") + "because it is dark or does not exist.") elif key.startswith('files'): patch = prepare_files_patch(metadata[key], source_metadata['files'], append, key, append_list, - insert) + insert, + expect) else: key = key.split('/')[0] patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, key, insert) + target, append_list, key, insert, + expect) changes.append({'target': key, 'patch': patch}) self.data = { '-changes': json.dumps(changes), @@ -289,17 +295,18 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, target = 'metadata' try: patch = prepare_patch(metadata, source_metadata['metadata'], append, - append_list, insert) + expect, append_list, insert) except KeyError: raise ItemLocateError(f"{self.identifier} cannot be located " - "because it is dark or does not exist.") + "because it is dark or does not exist.") elif 'files' in target: patch = prepare_files_patch(metadata, source_metadata['files'], append, - target, append_list, insert) + target, append_list, insert, expect) else: metadata = {target: metadata} patch = prepare_target_patch(metadata, source_metadata, append, - target, append_list, target, insert) + target, append_list, target, insert, + expect) self.data = { '-patch': json.dumps(patch), '-target': target, @@ -309,7 +316,8 @@ def prepare_body(self, metadata, source_metadata, target, priority, append, super().prepare_body(self.data, None) -def prepare_patch(metadata, source_metadata, append, append_list=None, insert=None): +def prepare_patch(metadata, source_metadata, append, + expect=None, append_list=None, insert=None): destination_metadata = source_metadata.copy() if isinstance(metadata, list): prepared_metadata = metadata @@ -336,25 +344,24 @@ def prepare_patch(metadata, source_metadata, append, append_list=None, insert=No # Add test operations to patch. patch_tests = [] - for p in patch: - path_parts = p['path'].split('/') - if not source_metadata.get(path_parts[1]): - continue - if len(path_parts) == 2: - src_val = source_metadata.get(path_parts[-1]) - p_test = {'op': 'test', 'path': p['path'], 'value': src_val} + for key in expect: + idx = None + if '[' in key: + idx = int(key.split('[')[1].strip(']')) + key = key.split('[')[0] + path = f'/{key}/{idx}' else: - src_val = source_metadata.get(path_parts[1], []) - p_test = {'op': 'test', 'path': '/' + path_parts[1], 'value': src_val} - patch_tests.append(p_test) + path = f'/{key}' + p_test = {'op': 'test', 'path': path, 'value': expect[key]} + patch_tests.append(p_test) final_patch = patch_tests + patch return final_patch def prepare_target_patch(metadata, source_metadata, append, target, append_list, key, - insert): + insert, expect): def dictify(lst, key=None, value=None): if not lst: @@ -371,18 +378,18 @@ def dictify(lst, key=None, value=None): source_metadata = source_metadata.get(_k, {}) else: source_metadata[_k] = source_metadata.get(_k, {}).get(_k, {}) - patch = prepare_patch(metadata, source_metadata, append, append_list, insert) + patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert) return patch def prepare_files_patch(metadata, source_metadata, append, target, append_list, - insert): + insert, expect): filename = '/'.join(target.split('/')[1:]) for f in source_metadata: if f.get('name') == filename: source_metadata = f break - patch = prepare_patch(metadata, source_metadata, append, append_list, insert) + patch = prepare_patch(metadata, source_metadata, append, expect, append_list, insert) return patch diff --git a/internetarchive/item.py b/internetarchive/item.py index 1cd3cfe0..6b0000f2 100644 --- a/internetarchive/item.py +++ b/internetarchive/item.py @@ -770,6 +770,7 @@ def modify_metadata(self, metadata: Mapping, target: str | None = None, append: bool = False, + expect: Mapping | None = None, append_list: bool = False, insert: bool = False, priority: int = 0, @@ -794,6 +795,9 @@ def modify_metadata(self, :param append: Append value to an existing multi-value metadata field. + :param expect: Provide a dict of expectations to be tested + server-side before applying patch to item metadata. + :param append_list: Append values to an existing multi-value metadata field. No duplicate values will be added. @@ -811,6 +815,7 @@ def modify_metadata(self, secret_key = secret_key or self.session.secret_key debug = bool(debug) headers = headers or {} + expect = expect or {} request_kwargs = request_kwargs or {} if timeout: request_kwargs["timeout"] = float(timeout) # type: ignore @@ -835,6 +840,7 @@ def modify_metadata(self, access_key=access_key, secret_key=secret_key, append=append, + expect=expect, append_list=append_list, insert=insert) # Must use Session.prepare_request to make sure session settings From b10fad5398a662c544da2714dd2f9c5858e74560 Mon Sep 17 00:00:00 2001 From: jake Date: Wed, 14 Feb 2024 10:30:54 -0800 Subject: [PATCH 18/30] Fixed bug for cases where expect keys have indexes --- internetarchive/iarequest.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index b59fa768..5939b762 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -344,16 +344,17 @@ def prepare_patch(metadata, source_metadata, append, # Add test operations to patch. patch_tests = [] - for key in expect: + for expect_key in expect: idx = None - if '[' in key: - idx = int(key.split('[')[1].strip(']')) - key = key.split('[')[0] + if '[' in expect_key: + idx = int(expect_key.split('[')[1].strip(']')) + key = expect_key.split('[')[0] path = f'/{key}/{idx}' + p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} else: path = f'/{key}' + p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} - p_test = {'op': 'test', 'path': path, 'value': expect[key]} patch_tests.append(p_test) final_patch = patch_tests + patch From af396716ceb86f374565081c89c225125d02c8c6 Mon Sep 17 00:00:00 2001 From: Christian Clauss Date: Sat, 17 Feb 2024 18:39:43 +0100 Subject: [PATCH 19/30] Keep GitHub Actions up to date with GitHub's Dependabot Fixes ##630 Auto-generates GitHub Action upgrade pull requests like * #630 --- .github/dependabot.yml | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 .github/dependabot.yml diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..be006de9 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,13 @@ +# Keep GitHub Actions up to date with GitHub's Dependabot... +# https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot +# https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem +version: 2 +updates: + - package-ecosystem: github-actions + directory: / + groups: + github-actions: + patterns: + - "*" # Group all Actions updates into a single larger pull request + schedule: + interval: weekly From 61eb58dfc5bacbf6b188818800160f435fcfe14e Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Feb 2024 17:40:53 +0000 Subject: [PATCH 20/30] Bump the github-actions group with 2 updates Bumps the github-actions group with 2 updates: [actions/checkout](https://github.com/actions/checkout) and [actions/setup-python](https://github.com/actions/setup-python). Updates `actions/checkout` from 3 to 4 - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v3...v4) Updates `actions/setup-python` from 4 to 5 - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions - dependency-name: actions/setup-python dependency-type: direct:production update-type: version-update:semver-major dependency-group: github-actions ... Signed-off-by: dependabot[bot] --- .github/workflows/lint_python.yml | 4 ++-- .github/workflows/pre-commit.yml | 4 ++-- .github/workflows/test_install.yml | 2 +- .github/workflows/tox.yml | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/lint_python.yml b/.github/workflows/lint_python.yml index 3b8173e0..10877bce 100644 --- a/.github/workflows/lint_python.yml +++ b/.github/workflows/lint_python.yml @@ -4,8 +4,8 @@ jobs: lint_python: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: cache: pip python-version: 3.x diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml index 7c40d73c..7669221d 100644 --- a/.github/workflows/pre-commit.yml +++ b/.github/workflows/pre-commit.yml @@ -9,8 +9,8 @@ jobs: pre-commit: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: 3.x cache: pip diff --git a/.github/workflows/test_install.yml b/.github/workflows/test_install.yml index 866d4782..eb365cef 100644 --- a/.github/workflows/test_install.yml +++ b/.github/workflows/test_install.yml @@ -11,6 +11,6 @@ jobs: matrix: setuptools-version: ["45.2.0", "58.1.0", "62.4.0"] steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - run: pip install setuptools=="${{ matrix.setuptools-version }}" - run: pip install . diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8cf26cad..8f3d477d 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -9,8 +9,8 @@ jobs: matrix: python: ['3.7', '3.8', '3.9', '3.10', '3.11', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9'] steps: - - uses: actions/checkout@v3 - - uses: actions/setup-python@v4 + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: python-version: ${{ matrix.python }} cache: pip From f1e9b59552b4f72e45a9f02ee176708306903d70 Mon Sep 17 00:00:00 2001 From: jake Date: Mon, 26 Feb 2024 13:14:07 -0800 Subject: [PATCH 21/30] Auth wasn't being sent for searches with user_aggs params --- internetarchive/search.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/internetarchive/search.py b/internetarchive/search.py index 07e510c3..975a261d 100644 --- a/internetarchive/search.py +++ b/internetarchive/search.py @@ -214,7 +214,10 @@ def _user_aggs(self): self.params['page'] = '1' self.params['rows'] = '1' self.params['output'] = 'json' - r = self.session.get(self.search_url, params=self.params, **self.request_kwargs) + r = self.session.get(self.search_url, + params=self.params, + auth=self.auth, + **self.request_kwargs) j = r.json() if j.get('error'): yield j From c894888029b949af5846cb14bb59ccc326d50c28 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 12 Mar 2024 10:10:43 -0700 Subject: [PATCH 22/30] Support for moving values via --append-list Previously `--append-list` would've ignored anything that was already in a fields list of values. Now it will move the value to the end of the list. --- internetarchive/iarequest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 60505dd6..62f2ac50 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -445,7 +445,7 @@ def rm_index(key): continue else: if v in source_metadata[key]: - continue + source_metadata[key] = [x for x in source_metadata[key] if x != v] if not isinstance(source_metadata[key], list): prepared_metadata[key] = [source_metadata[key]] else: From ae8e7a43f5d7a4f6e78c04d0c0ad4911d73e9af1 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 10:47:45 -0700 Subject: [PATCH 23/30] fixed test --- tests/test_item.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_item.py b/tests/test_item.py index 2a64edde..adaf0bcd 100644 --- a/tests/test_item.py +++ b/tests/test_item.py @@ -32,7 +32,7 @@ 'x-archive-auto-make-bucket': '1', 'authorization': 'LOW a:b', 'accept': '*/*', - 'accept-encoding': 'gzip, deflate, br', + 'accept-encoding': 'gzip, deflate', 'connection': 'close', } From e84edd1cb9beaa8b56a870d4ad0d493bf029189b Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 11:31:45 -0700 Subject: [PATCH 24/30] updated python versions to test with --- .github/workflows/tox.yml | 2 +- tox.ini | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index 8f3d477d..f177e9e1 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -7,7 +7,7 @@ jobs: fail-fast: false max-parallel: 1 # Avoid timeout errors matrix: - python: ['3.7', '3.8', '3.9', '3.10', '3.11', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/tox.ini b/tox.ini index 062086a0..3eef780b 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py37,py38,py39,py310,py311,pypy37,pypy38,pypy39 +envlist = py38,py39,py310,py311,py312,pypy37,pypy38,pypy39 [testenv] deps = -r tests/requirements.txt @@ -7,9 +7,6 @@ deps = -r tests/requirements.txt commands = ruff . pytest {posargs} -[testenv:py37] -basepython=python3.7 - [testenv:py38] basepython=python3.8 @@ -21,3 +18,6 @@ basepython=python3.10 [testenv:py311] basepython=python3.11 + +[testenv:py312] +basepython=python3.12 From 5b958e5fe0410e9147132526cbbc36800d7a5599 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 11:32:13 -0700 Subject: [PATCH 25/30] pep8 fix --- internetarchive/cli/ia.py | 1 + 1 file changed, 1 insertion(+) diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index e674dcb9..0ffc3435 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -64,6 +64,7 @@ import sys from docopt import docopt, printable_usage + if sys.version_info < (3, 10): from importlib_metadata import entry_points else: From 0bb8eb80d2f8c1b558501d3ee50bb39679156d65 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 11:44:28 -0700 Subject: [PATCH 26/30] test fix --- internetarchive/cli/ia_reviews.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/cli/ia_reviews.py b/internetarchive/cli/ia_reviews.py index d06de76d..759ee536 100644 --- a/internetarchive/cli/ia_reviews.py +++ b/internetarchive/cli/ia_reviews.py @@ -68,7 +68,7 @@ def main(argv, session: ArchiveSession) -> None: print(r.text) sys.exit(0) except HTTPError as exc: - if exc.response.status_code == 404: + if exc.response.status_code == 404: # type: ignore sys.exit(0) else: raise exc From b5d80699badd186ba67900edc4c9f76a3ab1a2e7 Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 11:52:40 -0700 Subject: [PATCH 27/30] test fix --- internetarchive/cli/ia.py | 4 ++-- setup.cfg | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internetarchive/cli/ia.py b/internetarchive/cli/ia.py index 0ffc3435..e00e5b6b 100755 --- a/internetarchive/cli/ia.py +++ b/internetarchive/cli/ia.py @@ -66,9 +66,9 @@ from docopt import docopt, printable_usage if sys.version_info < (3, 10): - from importlib_metadata import entry_points + from importlib_metadata import entry_points # type: ignore[import] else: - from importlib.metadata import entry_points + from importlib.metadata import entry_points from schema import Or, Schema, SchemaError # type: ignore[import] from internetarchive import __version__ diff --git a/setup.cfg b/setup.cfg index da2da6e0..28c63a4d 100644 --- a/setup.cfg +++ b/setup.cfg @@ -26,12 +26,12 @@ packages = internetarchive.cli install_requires = docopt>=0.6.0,<0.7.0 - importlib-metadata >= 3.6.0 ; python_version <= "3.10" jsonpatch>=0.4 requests>=2.25.0,<3.0.0 schema>=0.4.0 tqdm>=4.0.0 urllib3>=1.26.0 + importlib-metadata>=3.6.0 ;python_version <= "3.10" python_requires = >=3.7 include_package_data = True zip_safe = False From 00b3966cde1c2cddadb236b59620196b8e1c7b0c Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 14:07:07 -0700 Subject: [PATCH 28/30] fixed small bug in expect workflow --- internetarchive/iarequest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internetarchive/iarequest.py b/internetarchive/iarequest.py index 82c0bfd3..475785c9 100644 --- a/internetarchive/iarequest.py +++ b/internetarchive/iarequest.py @@ -352,7 +352,7 @@ def prepare_patch(metadata, source_metadata, append, path = f'/{key}/{idx}' p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} else: - path = f'/{key}' + path = f'/{expect_key}' p_test = {'op': 'test', 'path': path, 'value': expect[expect_key]} patch_tests.append(p_test) From 46e204d30e8495cc6a3ef9518d8a52178bb819dd Mon Sep 17 00:00:00 2001 From: jake Date: Tue, 19 Mar 2024 14:12:17 -0700 Subject: [PATCH 29/30] v3.7.0 --- HISTORY.rst | 16 ++++++++++++++++ internetarchive/__version__.py | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/HISTORY.rst b/HISTORY.rst index 84627aac..84a7eed0 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -3,6 +3,22 @@ Release History --------------- +3.7.0 (2024-03-19) +++++++++++++++++++ + +**Features and Improvements** + +- Added support for JSON Patch test operations, via the ``expect`` parameter. +- Added support for moving values via --append-list + (Now, rather than ignoring any requests where the value is already present, + --append-list will move the value to the end of the list). +- Switched to importlib-metadata to drop deprecated pkg_resources. + +**Bugfixes** + +- Fixed automatic size hint on uploads. +- Fixed bug where auth wasn't being sent for searches with user_aggs params. + 3.6.0 (2023-12-27) ++++++++++++++++++ diff --git a/internetarchive/__version__.py b/internetarchive/__version__.py index 826cf62c..8c3336cc 100644 --- a/internetarchive/__version__.py +++ b/internetarchive/__version__.py @@ -1 +1 @@ -__version__ = '3.6.0' +__version__ = '3.7.0' From 5affdf9a6173cb47b06deda195ad024545d3b86c Mon Sep 17 00:00:00 2001 From: JustAnotherArchivist Date: Wed, 20 Mar 2024 08:18:22 +0000 Subject: [PATCH 30/30] Bump PyPy tests to current versions PyPy 3.7 and PyPy 3.8 are EOL and unsupported (although CPython 3.8 is not yet). --- .github/workflows/tox.yml | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/tox.yml b/.github/workflows/tox.yml index f177e9e1..4dfd9072 100644 --- a/.github/workflows/tox.yml +++ b/.github/workflows/tox.yml @@ -7,7 +7,7 @@ jobs: fail-fast: false max-parallel: 1 # Avoid timeout errors matrix: - python: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.7', 'pypy-3.8', 'pypy-3.9'] + python: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.9', 'pypy-3.10'] steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 diff --git a/tox.ini b/tox.ini index 3eef780b..82a5cd95 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py38,py39,py310,py311,py312,pypy37,pypy38,pypy39 +envlist = py38,py39,py310,py311,py312,pypy39,pypy310 [testenv] deps = -r tests/requirements.txt