From 502433b7c0c315a3a80d3aa76395f3f4d0a2d72b Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Mon, 15 Nov 2021 09:20:38 -0800 Subject: [PATCH 1/7] #1536: get hashes for all installation candidates Get json from all index servers instead of just the first, then computes hashes on all remaining index links not already returned from servers. --- piptools/repositories/pypi.py | 104 ++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 36 deletions(-) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 1c59201df..409f87e71 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -11,6 +11,7 @@ ContextManager, Dict, Iterator, + Iterable, List, NamedTuple, Optional, @@ -239,6 +240,37 @@ def get_dependencies(self, ireq: InstallRequirement) -> Set[InstallRequirement]: return self._dependencies_cache[ireq] + def _get_json_from_index(self, link: Link): + url = f"{link.url}/json" + try: + response = self.session.get(url) + except RequestException as e: + log.debug(f"Fetch package info from PyPI failed: {url}: {e}") + return + + # Skip this PyPI server, because there is no package + # or JSON API might be not supported + if response.status_code == 404: + return + + try: + return response.json() + except ValueError as e: + log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") + + + def _get_all_package_links(self, ireq: InstallRequirement): + package_indexes = ( + PackageIndex(url=index_url, file_storage_domain="") + for index_url in self.finder.search_scope.index_urls + ) + package_links = ( + Link(f"{package_index.pypi_url}/{ireq.name}", comes_from=package_index.simple_url) + for package_index in package_indexes + ) + return package_links + + def _get_project(self, ireq: InstallRequirement) -> Any: """ Return a dict of a project info from PyPI JSON API for a given @@ -247,29 +279,10 @@ def _get_project(self, ireq: InstallRequirement) -> Any: API reference: https://warehouse.readthedocs.io/api-reference/json/ """ - package_indexes = ( - PackageIndex(url=index_url, file_storage_domain="") - for index_url in self.finder.search_scope.index_urls - ) - for package_index in package_indexes: - url = f"{package_index.pypi_url}/{ireq.name}/json" - try: - response = self.session.get(url) - except RequestException as e: - log.debug(f"Fetch package info from PyPI failed: {url}: {e}") - continue - - # Skip this PyPI server, because there is no package - # or JSON API might be not supported - if response.status_code == 404: - continue - - try: - data = response.json() - except ValueError as e: - log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") - continue - return data + for _,link in self._get_all_package_links(ireq): + data = self._get_json_from_index(link) + if data is not None: + return data return None def _get_download_path(self, ireq: InstallRequirement) -> str: @@ -320,26 +333,37 @@ def get_hashes(self, ireq: InstallRequirement) -> Set[str]: log.debug(ireq.name) with log.indentation(): - hashes = self._get_hashes_from_pypi(ireq) - if hashes is None: - log.debug("Couldn't get hashes from PyPI, fallback to hashing files") - return self._get_hashes_from_files(ireq) + # get pypi hashes using the json API, once per server + pypi_hashes = self._get_hashes_from_pypi(ireq) + # Compute hashes for ALL candidate installation links, using hashes from json APIs if available + hashes = self._get_hashes_from_files(ireq, pypi_hashes) return hashes - def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> Optional[Set[str]]: + def _get_hashes_from_pypi(self, ireq: InstallRequirement): + # package links help us construct a json query URL for index servers. + # Note the same type but different usage from installation candidates because we only care about one per server. + all_package_links = self._get_all_package_links(ireq) + + # Get json from each index server + pypi_json = {link.comes_from: self._get_json_from_index(link) for link in all_package_links} + pypi_hashes = { + url: self._get_hash_from_json(json_resp, ireq) + for url, json_resp in pypi_json.items() if json_resp + } + + # remove duplicates and empty json responses + return {url: hashes for url, hashes in pypi_hashes.items() if hashes is not None} + + def _get_hash_from_json(self, pypi_json: object, ireq: InstallRequirement) -> Optional[Set[str]]: """ Return a set of hashes from PyPI JSON API for a given InstallRequirement. Return None if fetching data is failed or missing digests. """ - project = self._get_project(ireq) - if project is None: - return None - _, version, _ = as_tuple(ireq) try: - release_files = project["releases"][version] + release_files = pypi_json["releases"][version] except KeyError: log.debug("Missing release files on PyPI") return None @@ -356,7 +380,7 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> Optional[Set[str]]: return hashes - def _get_hashes_from_files(self, ireq: InstallRequirement) -> Set[str]: + def _get_hashes_from_files(self, ireq: InstallRequirement, pypi_hashes=None) -> Set[str]: """ Return a set of hashes for all release files of a given InstallRequirement. """ @@ -371,10 +395,18 @@ def _get_hashes_from_files(self, ireq: InstallRequirement) -> Set[str]: matching_candidates = candidates_by_version[matching_versions[0]] return { - self._get_file_hash(candidate.link) for candidate in matching_candidates + self._get_file_hash(candidate.link, pypi_hashes) for candidate in matching_candidates } - def _get_file_hash(self, link: Link) -> str: + def _get_file_hash(self, link: Link, pypi_hashes=None) -> str: + if link.has_hash: + return f"{link.hash_name}:{link.hash}" + + # This conditional feels too peculiar to tolerate future maintenance. + # But figuring out how the Link is constructed in find_all_candidates smells like it isn't a guaranteed API + if pypi_hashes and link.comes_from in pypi_hashes: + return pypi_hashes[link.url] + log.debug(f"Hashing {link.show_url}") h = hashlib.new(FAVORITE_HASH) with open_local_or_remote_file(link, self.session) as f: From 407895c85ca79ca88a8ef7e934c995418eb07198 Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Mon, 15 Nov 2021 16:26:50 -0800 Subject: [PATCH 2/7] get hashes for all installation candidates: fix unit test failure --- piptools/repositories/pypi.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 409f87e71..f4d4c264b 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -279,7 +279,7 @@ def _get_project(self, ireq: InstallRequirement) -> Any: API reference: https://warehouse.readthedocs.io/api-reference/json/ """ - for _,link in self._get_all_package_links(ireq): + for link in self._get_all_package_links(ireq): data = self._get_json_from_index(link) if data is not None: return data @@ -353,7 +353,10 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement): } # remove duplicates and empty json responses - return {url: hashes for url, hashes in pypi_hashes.items() if hashes is not None} + hashes_by_index = { + url: hashes for url, hashes in pypi_hashes.items() if hashes is not None + } + return hashes_by_index or None def _get_hash_from_json(self, pypi_json: object, ireq: InstallRequirement) -> Optional[Set[str]]: """ From ad5c79476a4e81bc1cbbc4ebfb46e7eec566c061 Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Mon, 15 Nov 2021 16:28:32 -0800 Subject: [PATCH 3/7] whitespace --- piptools/repositories/pypi.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index f4d4c264b..96422bc5d 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -11,7 +11,6 @@ ContextManager, Dict, Iterator, - Iterable, List, NamedTuple, Optional, @@ -258,19 +257,20 @@ def _get_json_from_index(self, link: Link): except ValueError as e: log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") - def _get_all_package_links(self, ireq: InstallRequirement): package_indexes = ( PackageIndex(url=index_url, file_storage_domain="") for index_url in self.finder.search_scope.index_urls ) package_links = ( - Link(f"{package_index.pypi_url}/{ireq.name}", comes_from=package_index.simple_url) + Link( + f"{package_index.pypi_url}/{ireq.name}", + comes_from=package_index.simple_url, + ) for package_index in package_indexes ) return package_links - def _get_project(self, ireq: InstallRequirement) -> Any: """ Return a dict of a project info from PyPI JSON API for a given @@ -346,10 +346,14 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement): all_package_links = self._get_all_package_links(ireq) # Get json from each index server - pypi_json = {link.comes_from: self._get_json_from_index(link) for link in all_package_links} + pypi_json = { + link.comes_from: self._get_json_from_index(link) + for link in all_package_links + } pypi_hashes = { url: self._get_hash_from_json(json_resp, ireq) - for url, json_resp in pypi_json.items() if json_resp + for url, json_resp in pypi_json.items() + if json_resp } # remove duplicates and empty json responses @@ -358,7 +362,9 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement): } return hashes_by_index or None - def _get_hash_from_json(self, pypi_json: object, ireq: InstallRequirement) -> Optional[Set[str]]: + def _get_hash_from_json( + self, pypi_json: object, ireq: InstallRequirement + ) -> Optional[Set[str]]: """ Return a set of hashes from PyPI JSON API for a given InstallRequirement. Return None if fetching data is failed or missing digests. @@ -383,7 +389,9 @@ def _get_hash_from_json(self, pypi_json: object, ireq: InstallRequirement) -> Op return hashes - def _get_hashes_from_files(self, ireq: InstallRequirement, pypi_hashes=None) -> Set[str]: + def _get_hashes_from_files( + self, ireq: InstallRequirement, pypi_hashes=None + ) -> Set[str]: """ Return a set of hashes for all release files of a given InstallRequirement. """ @@ -398,7 +406,8 @@ def _get_hashes_from_files(self, ireq: InstallRequirement, pypi_hashes=None) -> matching_candidates = candidates_by_version[matching_versions[0]] return { - self._get_file_hash(candidate.link, pypi_hashes) for candidate in matching_candidates + self._get_file_hash(candidate.link, pypi_hashes) + for candidate in matching_candidates } def _get_file_hash(self, link: Link, pypi_hashes=None) -> str: From 5f39343967d62829dca9b1e63065bf68743c5c20 Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Mon, 15 Nov 2021 16:29:02 -0800 Subject: [PATCH 4/7] Change unit tests on internal implementation methods --- tests/test_repository_pypi.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index 078a181e9..1d3957f52 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -172,7 +172,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash"}, + {'https://pypi.org/simple': {"sha256:fake-hash"}}, id="return single hash", ), pytest.param( @@ -190,7 +190,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, id="return multiple hashes", ), pytest.param( @@ -212,7 +212,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, id="return only bdist_wheel and sdist hashes", ), pytest.param(None, None, id="not found project data"), @@ -243,7 +243,7 @@ def test_get_hashes_from_pypi(from_line, tmpdir, project_data, expected_hashes): """ class MockPyPIRepository(PyPIRepository): - def _get_project(self, ireq): + def _get_json_from_index(self, ireq): return project_data pypi_repository = MockPyPIRepository( From 8335870c1166d19686609e7d7d8d1f70032c363a Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Mon, 24 Jan 2022 14:15:19 -0800 Subject: [PATCH 5/7] add explicit tests for combining json api with simple index servers --- piptools/repositories/pypi.py | 19 +++-- tests/test_repository_pypi.py | 152 ++++++++++++++++++++++++++++++++++ 2 files changed, 162 insertions(+), 9 deletions(-) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index 96422bc5d..cbb904a4c 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -405,19 +405,20 @@ def _get_hashes_from_files( ) matching_candidates = candidates_by_version[matching_versions[0]] - return { - self._get_file_hash(candidate.link, pypi_hashes) - for candidate in matching_candidates - } - - def _get_file_hash(self, link: Link, pypi_hashes=None) -> str: - if link.has_hash: - return f"{link.hash_name}:{link.hash}" + return set(itertools.chain.from_iterable(self._get_hashes_for_link(candidate.link, pypi_hashes) for candidate in matching_candidates)) + def _get_hashes_for_link(self, link: Link, pypi_hashes: Dict[str,Set[str]]) -> Set[str]: # This conditional feels too peculiar to tolerate future maintenance. # But figuring out how the Link is constructed in find_all_candidates smells like it isn't a guaranteed API if pypi_hashes and link.comes_from in pypi_hashes: - return pypi_hashes[link.url] + yield from pypi_hashes[link.comes_from] + + else: + yield self._get_file_hash(link) + + def _get_file_hash(self, link: Link) -> str: + if link.has_hash: + return f"{link.hash_name}:{link.hash}" log.debug(f"Hashing {link.show_url}") h = hashlib.new(FAVORITE_HASH) diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index 1d3957f52..85d3bb371 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -1,13 +1,16 @@ import os +from pathlib import Path from unittest import mock import pytest +from pip._internal.models.candidate import InstallationCandidate from pip._internal.models.link import Link from pip._internal.utils.urls import path_to_url from pip._vendor.requests import HTTPError, Session from piptools.repositories import PyPIRepository from piptools.repositories.pypi import open_local_or_remote_file +from tests.constants import MINIMAL_WHEELS_PATH def test_generate_hashes_all_platforms(capsys, pip_conf, from_line, pypi_repository): @@ -255,6 +258,155 @@ def _get_json_from_index(self, ireq): assert actual_hashes == expected_hashes +@pytest.mark.parametrize( + ("pypi_json", "expected_hashes"), + ( + pytest.param( + { + "https://pypi.org/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash"}, + } + ] + }, + "comes_from": "https://pypi.org/simple" + }, + }, + {"sha256:fake-hash"}, + id="return single hash", + ), + pytest.param( + { + "https://pypi.org/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash-number1"}, + }, + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number2"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple" + } + }, + {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + id="return multiple hashes", + ), + pytest.param( + { + "https://pypi.org/simple/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "bdist_wheel", + "digests": {"sha256": "fake-hash-number1"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple" + }, + "https://internal-index-server.com/pypi/fake-package": { + "releases": { + "0.1": [ + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number2"}, + }, + ] + }, + "comes_from": "https://internal-index-server.com/simple" + } + }, + {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, + id="return multiple indices", + ), + ), +) +def test_get_hashes_multiple_indices_json_api(from_line, tmpdir, pypi_json, expected_hashes): + """ + Test PyPIRepository.get_hashes() returns expected hashes for multiple indices. + """ + + # I don't love how specific the details of this test are. If PyPIRepository changes internal + # implementation details, this mock class will need to change too. + class MockPyPIRepository(PyPIRepository): + def _get_all_package_links(self, _): + return [Link(url, comes_from=pypi_json[url]['comes_from']) for url in pypi_json] + + def _get_json_from_index(self, link: Link): + return pypi_json.get(link.url) + + def find_all_candidates(self, req_name: str): + # I don't know why, but candidates have a different URL the the json links + return [ + InstallationCandidate( + 'fake-package', + '0.1', + Link(url, comes_from=pypi_json[url]['comes_from']) + ) + for url in pypi_json + ] + + pypi_repository = MockPyPIRepository( + ["--no-cache-dir"], cache_dir=(tmpdir / "pypi-repo-cache") + ) + ireq = from_line("fake-package==0.1") + + actual_hashes = pypi_repository.get_hashes(ireq) + assert actual_hashes == expected_hashes + + +def test_get_hashes_pypi_and_simple_index_server(from_line, tmpdir): + """ + test PyPIRepository.get_hashes() when a file hash needs to be computed + """ + expected_hashes = { + 'sha256:fake-hash-number1', + 'sha256:5e6071ee6e4c59e0d0408d366fe9b66781d2cf01be9a6e19a2433bb3c5336330', + } + + simple_server_address = "https://internal-index-server.com/pypi/small-fake-a" + hashable_path = Path(MINIMAL_WHEELS_PATH) / 'small_fake_a-0.1-py2.py3-none-any.whl' + + pypi_response = { + "releases": { + "0.1": [ + { + "packagetype": "sdist", + "digests": {"sha256": "fake-hash-number1"}, + }, + ] + }, + "comes_from": "https://pypi.org/simple" + } + installation_links = [ + Link("https://pypi.org/pypi/small-fake-a", comes_from=pypi_response['comes_from']), + Link(f"file://{hashable_path.absolute()}", comes_from=simple_server_address), + ] + installation_candidates = [InstallationCandidate('small-fake-a', '0.1', link) for link in installation_links] + + ireq = from_line("small-fake-a==0.1") + + pypi_repository = PyPIRepository( + ["--index-url", PyPIRepository.DEFAULT_INDEX_URL, + '--extra-index-url', simple_server_address], + cache_dir=(tmpdir / "pypi-repo"), + ) + + with mock.patch.object(pypi_repository, '_get_json_from_index', return_value=pypi_response): + with mock.patch.object(pypi_repository, 'find_all_candidates', return_value=installation_candidates): + actual_hashes = pypi_repository.get_hashes(ireq) + + assert actual_hashes == expected_hashes + + def test_get_project__returns_data(from_line, tmpdir, monkeypatch, pypi_repository): """ Test PyPIRepository._get_project() returns expected project data. From 6436826961c66770cea5e762207fe0ab87d34095 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 25 Jan 2022 22:44:13 +0000 Subject: [PATCH 6/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- tests/test_repository_pypi.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index 1d3957f52..5231f60f1 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -172,7 +172,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash"}}, + {"https://pypi.org/simple": {"sha256:fake-hash"}}, id="return single hash", ), pytest.param( @@ -190,7 +190,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return multiple hashes", ), pytest.param( @@ -212,7 +217,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return only bdist_wheel and sdist hashes", ), pytest.param(None, None, id="not found project data"), From f14ee7e7e4109db3520a5cfeb86fc8afe3e12e1b Mon Sep 17 00:00:00 2001 From: Stefan Sullivan Date: Tue, 25 Jan 2022 12:36:02 -0800 Subject: [PATCH 7/7] code style --- piptools/repositories/pypi.py | 60 +++++++++++++++++++------- tests/test_repository_pypi.py | 80 +++++++++++++++++++++++------------ 2 files changed, 99 insertions(+), 41 deletions(-) diff --git a/piptools/repositories/pypi.py b/piptools/repositories/pypi.py index cbb904a4c..eeb305d1c 100644 --- a/piptools/repositories/pypi.py +++ b/piptools/repositories/pypi.py @@ -10,6 +10,7 @@ BinaryIO, ContextManager, Dict, + Iterable, Iterator, List, NamedTuple, @@ -27,7 +28,7 @@ from pip._internal.models.index import PackageIndex from pip._internal.models.link import Link from pip._internal.models.wheel import Wheel -from pip._internal.network.session import PipSession +from pip._internal.network.session import PipSession, Response from pip._internal.req import InstallRequirement, RequirementSet from pip._internal.req.req_tracker import get_requirement_tracker from pip._internal.utils.hashes import FAVORITE_HASH @@ -239,25 +240,41 @@ def get_dependencies(self, ireq: InstallRequirement) -> Set[InstallRequirement]: return self._dependencies_cache[ireq] - def _get_json_from_index(self, link: Link): + def _get_json_from_index(self, link: Link) -> Optional[Dict[str, Any]]: url = f"{link.url}/json" try: response = self.session.get(url) except RequestException as e: log.debug(f"Fetch package info from PyPI failed: {url}: {e}") - return + return None # Skip this PyPI server, because there is no package # or JSON API might be not supported if response.status_code == 404: - return + return None try: - return response.json() + return self._get_json_obj(response) except ValueError as e: log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}") + return None + + @staticmethod + def _get_json_obj(resp: Response) -> Optional[Dict[str, Any]]: + decoded_json = resp.json() + + if decoded_json is None: + return decoded_json + + # This is actually guaranteed by the python stdlib. Json can only contain these types + if not isinstance(decoded_json, dict): + raise ValueError( + f"Invalid json type {type(decoded_json)}. Expected 'object'" + ) - def _get_all_package_links(self, ireq: InstallRequirement): + return decoded_json + + def _get_all_package_links(self, ireq: InstallRequirement) -> Iterable[Link]: package_indexes = ( PackageIndex(url=index_url, file_storage_domain="") for index_url in self.finder.search_scope.index_urls @@ -271,7 +288,7 @@ def _get_all_package_links(self, ireq: InstallRequirement): ) return package_links - def _get_project(self, ireq: InstallRequirement) -> Any: + def _get_project(self, ireq: InstallRequirement) -> Optional[Dict[str, Any]]: """ Return a dict of a project info from PyPI JSON API for a given InstallRequirement. Return None on HTTP/JSON error or if a package @@ -335,14 +352,17 @@ def get_hashes(self, ireq: InstallRequirement) -> Set[str]: with log.indentation(): # get pypi hashes using the json API, once per server pypi_hashes = self._get_hashes_from_pypi(ireq) - # Compute hashes for ALL candidate installation links, using hashes from json APIs if available + # Compute hashes for ALL candidate installation links, using hashes from json APIs hashes = self._get_hashes_from_files(ireq, pypi_hashes) return hashes - def _get_hashes_from_pypi(self, ireq: InstallRequirement): + def _get_hashes_from_pypi( + self, ireq: InstallRequirement + ) -> Optional[Dict[str, Set[str]]]: # package links help us construct a json query URL for index servers. - # Note the same type but different usage from installation candidates because we only care about one per server. + # Note the same type but different usage from installation candidates because we only care + # about one per server. all_package_links = self._get_all_package_links(ireq) # Get json from each index server @@ -363,7 +383,7 @@ def _get_hashes_from_pypi(self, ireq: InstallRequirement): return hashes_by_index or None def _get_hash_from_json( - self, pypi_json: object, ireq: InstallRequirement + self, pypi_json: Dict[str, Any], ireq: InstallRequirement ) -> Optional[Set[str]]: """ Return a set of hashes from PyPI JSON API for a given InstallRequirement. @@ -390,7 +410,9 @@ def _get_hash_from_json( return hashes def _get_hashes_from_files( - self, ireq: InstallRequirement, pypi_hashes=None + self, + ireq: InstallRequirement, + pypi_hashes: Optional[Dict[str, Set[str]]] = None, ) -> Set[str]: """ Return a set of hashes for all release files of a given InstallRequirement. @@ -405,11 +427,19 @@ def _get_hashes_from_files( ) matching_candidates = candidates_by_version[matching_versions[0]] - return set(itertools.chain.from_iterable(self._get_hashes_for_link(candidate.link, pypi_hashes) for candidate in matching_candidates)) + return set( + itertools.chain.from_iterable( + self._get_hashes_for_link(candidate.link, pypi_hashes) + for candidate in matching_candidates + ) + ) - def _get_hashes_for_link(self, link: Link, pypi_hashes: Dict[str,Set[str]]) -> Set[str]: + def _get_hashes_for_link( + self, link: Link, pypi_hashes: Optional[Dict[str, Set[str]]] + ) -> Iterator[str]: # This conditional feels too peculiar to tolerate future maintenance. - # But figuring out how the Link is constructed in find_all_candidates smells like it isn't a guaranteed API + # But figuring out how the Link is constructed in find_all_candidates smells like + # it isn't a guaranteed API if pypi_hashes and link.comes_from in pypi_hashes: yield from pypi_hashes[link.comes_from] diff --git a/tests/test_repository_pypi.py b/tests/test_repository_pypi.py index 85d3bb371..e3605663a 100644 --- a/tests/test_repository_pypi.py +++ b/tests/test_repository_pypi.py @@ -1,5 +1,6 @@ import os from pathlib import Path +from typing import Any, Iterable, List from unittest import mock import pytest @@ -175,7 +176,7 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash"}}, + {"https://pypi.org/simple": {"sha256:fake-hash"}}, id="return single hash", ), pytest.param( @@ -193,7 +194,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return multiple hashes", ), pytest.param( @@ -215,7 +221,12 @@ def test_pip_cache_dir_is_empty(from_line, tmpdir): ] } }, - {'https://pypi.org/simple': {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}}, + { + "https://pypi.org/simple": { + "sha256:fake-hash-number1", + "sha256:fake-hash-number2", + } + }, id="return only bdist_wheel and sdist hashes", ), pytest.param(None, None, id="not found project data"), @@ -272,7 +283,7 @@ def _get_json_from_index(self, ireq): } ] }, - "comes_from": "https://pypi.org/simple" + "comes_from": "https://pypi.org/simple", }, }, {"sha256:fake-hash"}, @@ -293,7 +304,7 @@ def _get_json_from_index(self, ireq): }, ] }, - "comes_from": "https://pypi.org/simple" + "comes_from": "https://pypi.org/simple", } }, {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, @@ -310,7 +321,7 @@ def _get_json_from_index(self, ireq): }, ] }, - "comes_from": "https://pypi.org/simple" + "comes_from": "https://pypi.org/simple", }, "https://internal-index-server.com/pypi/fake-package": { "releases": { @@ -321,15 +332,17 @@ def _get_json_from_index(self, ireq): }, ] }, - "comes_from": "https://internal-index-server.com/simple" - } + "comes_from": "https://internal-index-server.com/simple", + }, }, {"sha256:fake-hash-number1", "sha256:fake-hash-number2"}, id="return multiple indices", ), ), ) -def test_get_hashes_multiple_indices_json_api(from_line, tmpdir, pypi_json, expected_hashes): +def test_get_hashes_multiple_indices_json_api( + from_line, tmpdir, pypi_json, expected_hashes +): """ Test PyPIRepository.get_hashes() returns expected hashes for multiple indices. """ @@ -337,19 +350,21 @@ def test_get_hashes_multiple_indices_json_api(from_line, tmpdir, pypi_json, expe # I don't love how specific the details of this test are. If PyPIRepository changes internal # implementation details, this mock class will need to change too. class MockPyPIRepository(PyPIRepository): - def _get_all_package_links(self, _): - return [Link(url, comes_from=pypi_json[url]['comes_from']) for url in pypi_json] + def _get_all_package_links(self, _: Any) -> Iterable[Any]: + return [ + Link(url, comes_from=pypi_json[url]["comes_from"]) for url in pypi_json + ] - def _get_json_from_index(self, link: Link): + def _get_json_from_index(self, link: Link) -> Any: return pypi_json.get(link.url) - def find_all_candidates(self, req_name: str): + def find_all_candidates(self, req_name: str) -> List[InstallationCandidate]: # I don't know why, but candidates have a different URL the the json links return [ InstallationCandidate( - 'fake-package', - '0.1', - Link(url, comes_from=pypi_json[url]['comes_from']) + "fake-package", + "0.1", + Link(url, comes_from=pypi_json[url]["comes_from"]), ) for url in pypi_json ] @@ -368,12 +383,12 @@ def test_get_hashes_pypi_and_simple_index_server(from_line, tmpdir): test PyPIRepository.get_hashes() when a file hash needs to be computed """ expected_hashes = { - 'sha256:fake-hash-number1', - 'sha256:5e6071ee6e4c59e0d0408d366fe9b66781d2cf01be9a6e19a2433bb3c5336330', + "sha256:fake-hash-number1", + "sha256:5e6071ee6e4c59e0d0408d366fe9b66781d2cf01be9a6e19a2433bb3c5336330", } simple_server_address = "https://internal-index-server.com/pypi/small-fake-a" - hashable_path = Path(MINIMAL_WHEELS_PATH) / 'small_fake_a-0.1-py2.py3-none-any.whl' + hashable_path = Path(MINIMAL_WHEELS_PATH) / "small_fake_a-0.1-py2.py3-none-any.whl" pypi_response = { "releases": { @@ -384,24 +399,37 @@ def test_get_hashes_pypi_and_simple_index_server(from_line, tmpdir): }, ] }, - "comes_from": "https://pypi.org/simple" + "comes_from": "https://pypi.org/simple", } installation_links = [ - Link("https://pypi.org/pypi/small-fake-a", comes_from=pypi_response['comes_from']), + Link( + "https://pypi.org/pypi/small-fake-a", comes_from=pypi_response["comes_from"] + ), Link(f"file://{hashable_path.absolute()}", comes_from=simple_server_address), ] - installation_candidates = [InstallationCandidate('small-fake-a', '0.1', link) for link in installation_links] + installation_candidates = [ + InstallationCandidate("small-fake-a", "0.1", link) + for link in installation_links + ] ireq = from_line("small-fake-a==0.1") pypi_repository = PyPIRepository( - ["--index-url", PyPIRepository.DEFAULT_INDEX_URL, - '--extra-index-url', simple_server_address], + [ + "--index-url", + PyPIRepository.DEFAULT_INDEX_URL, + "--extra-index-url", + simple_server_address, + ], cache_dir=(tmpdir / "pypi-repo"), ) - with mock.patch.object(pypi_repository, '_get_json_from_index', return_value=pypi_response): - with mock.patch.object(pypi_repository, 'find_all_candidates', return_value=installation_candidates): + with mock.patch.object( + pypi_repository, "_get_json_from_index", return_value=pypi_response + ): + with mock.patch.object( + pypi_repository, "find_all_candidates", return_value=installation_candidates + ): actual_hashes = pypi_repository.get_hashes(ireq) assert actual_hashes == expected_hashes