503 lines
19 KiB
Python
503 lines
19 KiB
Python
from __future__ import annotations
|
|
|
|
import contextlib
|
|
import hashlib
|
|
import itertools
|
|
import optparse
|
|
import os
|
|
from contextlib import contextmanager
|
|
from shutil import rmtree
|
|
from typing import Any, BinaryIO, ContextManager, Iterator, NamedTuple
|
|
|
|
from click import progressbar
|
|
from pip._internal.cache import WheelCache
|
|
from pip._internal.commands import create_command
|
|
from pip._internal.commands.install import InstallCommand
|
|
from pip._internal.index.package_finder import PackageFinder
|
|
from pip._internal.models.candidate import InstallationCandidate
|
|
from pip._internal.models.index import PackageIndex
|
|
from pip._internal.models.link import Link
|
|
from pip._internal.models.wheel import Wheel
|
|
from pip._internal.network.session import PipSession
|
|
from pip._internal.operations.build.build_tracker import get_build_tracker
|
|
from pip._internal.req import InstallRequirement, RequirementSet
|
|
from pip._internal.utils.hashes import FAVORITE_HASH
|
|
from pip._internal.utils.logging import indent_log, setup_logging
|
|
from pip._internal.utils.misc import normalize_path
|
|
from pip._internal.utils.temp_dir import TempDirectory, global_tempdir_manager
|
|
from pip._internal.utils.urls import path_to_url, url_to_path
|
|
from pip._vendor.packaging.tags import Tag
|
|
from pip._vendor.packaging.version import _BaseVersion
|
|
from pip._vendor.requests import RequestException, Session
|
|
|
|
from .._compat import create_wheel_cache
|
|
from ..exceptions import NoCandidateFound
|
|
from ..logging import log
|
|
from ..utils import (
|
|
as_tuple,
|
|
is_pinned_requirement,
|
|
is_url_requirement,
|
|
lookup_table,
|
|
make_install_requirement,
|
|
)
|
|
from .base import BaseRepository
|
|
|
|
FILE_CHUNK_SIZE = 4096
|
|
|
|
|
|
class FileStream(NamedTuple):
|
|
stream: BinaryIO
|
|
size: float | None
|
|
|
|
|
|
class PyPIRepository(BaseRepository):
|
|
HASHABLE_PACKAGE_TYPES = {"bdist_wheel", "sdist"}
|
|
|
|
"""
|
|
The PyPIRepository will use the provided Finder instance to lookup
|
|
packages. Typically, it looks up packages on PyPI (the default implicit
|
|
config), but any other PyPI mirror can be used if index_urls is
|
|
changed/configured on the Finder.
|
|
"""
|
|
|
|
def __init__(self, pip_args: list[str], cache_dir: str):
|
|
# Use pip's parser for pip.conf management and defaults.
|
|
# General options (find_links, index_url, extra_index_url, trusted_host,
|
|
# and pre) are deferred to pip.
|
|
self._command: InstallCommand = create_command("install")
|
|
|
|
options, _ = self.command.parse_args(pip_args)
|
|
if options.cache_dir:
|
|
options.cache_dir = normalize_path(options.cache_dir)
|
|
options.require_hashes = False
|
|
options.ignore_dependencies = False
|
|
|
|
self._options: optparse.Values = options
|
|
self._session = self.command._build_session(options)
|
|
self._finder = self.command._build_package_finder(
|
|
options=options, session=self.session
|
|
)
|
|
|
|
# Caches
|
|
# stores project_name => InstallationCandidate mappings for all
|
|
# versions reported by PyPI, so we only have to ask once for each
|
|
# project
|
|
self._available_candidates_cache: dict[str, list[InstallationCandidate]] = {}
|
|
|
|
# stores InstallRequirement => list(InstallRequirement) mappings
|
|
# of all secondary dependencies for the given requirement, so we
|
|
# only have to go to disk once for each requirement
|
|
self._dependencies_cache: dict[InstallRequirement, set[InstallRequirement]] = {}
|
|
|
|
# Setup file paths
|
|
self._cache_dir = normalize_path(str(cache_dir))
|
|
self._download_dir = os.path.join(self._cache_dir, "pkgs")
|
|
|
|
# Default pip's logger is noisy, so decrease it's verbosity
|
|
setup_logging(
|
|
verbosity=log.verbosity - 1,
|
|
no_color=self.options.no_color,
|
|
user_log_file=self.options.log,
|
|
)
|
|
|
|
def clear_caches(self) -> None:
|
|
rmtree(self._download_dir, ignore_errors=True)
|
|
|
|
@property
|
|
def options(self) -> optparse.Values:
|
|
return self._options
|
|
|
|
@property
|
|
def session(self) -> PipSession:
|
|
return self._session
|
|
|
|
@property
|
|
def finder(self) -> PackageFinder:
|
|
return self._finder
|
|
|
|
@property
|
|
def command(self) -> InstallCommand:
|
|
"""Return an install command instance."""
|
|
return self._command
|
|
|
|
def find_all_candidates(self, req_name: str) -> list[InstallationCandidate]:
|
|
if req_name not in self._available_candidates_cache:
|
|
candidates = self.finder.find_all_candidates(req_name)
|
|
self._available_candidates_cache[req_name] = candidates
|
|
return self._available_candidates_cache[req_name]
|
|
|
|
def find_best_match(
|
|
self, ireq: InstallRequirement, prereleases: bool | None = None
|
|
) -> InstallRequirement:
|
|
"""
|
|
Returns a pinned InstallRequirement object that indicates the best match
|
|
for the given InstallRequirement according to the external repository.
|
|
"""
|
|
if ireq.editable or is_url_requirement(ireq):
|
|
return ireq # return itself as the best match
|
|
|
|
all_candidates = self.find_all_candidates(ireq.name)
|
|
candidates_by_version = lookup_table(all_candidates, key=candidate_version)
|
|
matching_versions = ireq.specifier.filter(
|
|
(candidate.version for candidate in all_candidates), prereleases=prereleases
|
|
)
|
|
|
|
matching_candidates = list(
|
|
itertools.chain.from_iterable(
|
|
candidates_by_version[ver] for ver in matching_versions
|
|
)
|
|
)
|
|
if not matching_candidates:
|
|
raise NoCandidateFound(ireq, all_candidates, self.finder)
|
|
|
|
evaluator = self.finder.make_candidate_evaluator(ireq.name)
|
|
best_candidate_result = evaluator.compute_best_candidate(matching_candidates)
|
|
best_candidate = best_candidate_result.best_candidate
|
|
|
|
# Turn the candidate into a pinned InstallRequirement
|
|
return make_install_requirement(
|
|
best_candidate.name,
|
|
best_candidate.version,
|
|
ireq,
|
|
)
|
|
|
|
def resolve_reqs(
|
|
self,
|
|
download_dir: str | None,
|
|
ireq: InstallRequirement,
|
|
wheel_cache: WheelCache,
|
|
) -> set[InstallationCandidate]:
|
|
with get_build_tracker() as build_tracker, TempDirectory(
|
|
kind="resolver"
|
|
) as temp_dir, indent_log():
|
|
preparer_kwargs = {
|
|
"temp_build_dir": temp_dir,
|
|
"options": self.options,
|
|
"session": self.session,
|
|
"finder": self.finder,
|
|
"use_user_site": False,
|
|
"download_dir": download_dir,
|
|
"build_tracker": build_tracker,
|
|
}
|
|
preparer = self.command.make_requirement_preparer(**preparer_kwargs)
|
|
|
|
reqset = RequirementSet()
|
|
ireq.user_supplied = True
|
|
if getattr(ireq, "name", None):
|
|
reqset.add_named_requirement(ireq)
|
|
else:
|
|
reqset.add_unnamed_requirement(ireq)
|
|
|
|
resolver = self.command.make_resolver(
|
|
preparer=preparer,
|
|
finder=self.finder,
|
|
options=self.options,
|
|
wheel_cache=wheel_cache,
|
|
use_user_site=False,
|
|
ignore_installed=True,
|
|
ignore_requires_python=False,
|
|
force_reinstall=False,
|
|
upgrade_strategy="to-satisfy-only",
|
|
)
|
|
results = resolver._resolve_one(reqset, ireq)
|
|
if not ireq.prepared:
|
|
# If still not prepared, e.g. a constraint, do enough to assign
|
|
# the ireq a name:
|
|
resolver._get_dist_for(ireq)
|
|
|
|
return set(results)
|
|
|
|
def get_dependencies(self, ireq: InstallRequirement) -> set[InstallRequirement]:
|
|
"""
|
|
Given a pinned, URL, or editable InstallRequirement, returns a set of
|
|
dependencies (also InstallRequirements, but not necessarily pinned).
|
|
They indicate the secondary dependencies for the given requirement.
|
|
"""
|
|
if not (
|
|
ireq.editable or is_url_requirement(ireq) or is_pinned_requirement(ireq)
|
|
):
|
|
raise TypeError(
|
|
f"Expected url, pinned or editable InstallRequirement, got {ireq}"
|
|
)
|
|
|
|
if ireq not in self._dependencies_cache:
|
|
if ireq.editable and (ireq.source_dir and os.path.exists(ireq.source_dir)):
|
|
# No download_dir for locally available editable requirements.
|
|
# If a download_dir is passed, pip will unnecessarily archive
|
|
# the entire source directory
|
|
download_dir = None
|
|
elif ireq.link and ireq.link.is_vcs:
|
|
# No download_dir for VCS sources. This also works around pip
|
|
# using git-checkout-index, which gets rid of the .git dir.
|
|
download_dir = None
|
|
else:
|
|
download_dir = self._get_download_path(ireq)
|
|
os.makedirs(download_dir, exist_ok=True)
|
|
|
|
with global_tempdir_manager():
|
|
wheel_cache = create_wheel_cache(
|
|
cache_dir=self._cache_dir,
|
|
format_control=self.options.format_control,
|
|
)
|
|
self._dependencies_cache[ireq] = self.resolve_reqs(
|
|
download_dir, ireq, wheel_cache
|
|
)
|
|
|
|
return self._dependencies_cache[ireq]
|
|
|
|
def _get_project(self, ireq: InstallRequirement) -> Any:
|
|
"""
|
|
Return a dict of a project info from PyPI JSON API for a given
|
|
InstallRequirement. Return None on HTTP/JSON error or if a package
|
|
is not found on PyPI server.
|
|
|
|
API reference: https://warehouse.readthedocs.io/api-reference/json/
|
|
"""
|
|
package_indexes = (
|
|
PackageIndex(url=index_url, file_storage_domain="")
|
|
for index_url in self.finder.search_scope.index_urls
|
|
)
|
|
for package_index in package_indexes:
|
|
url = f"{package_index.pypi_url}/{ireq.name}/json"
|
|
try:
|
|
response = self.session.get(url)
|
|
except RequestException as e:
|
|
log.debug(f"Fetch package info from PyPI failed: {url}: {e}")
|
|
continue
|
|
|
|
# Skip this PyPI server, because there is no package
|
|
# or JSON API might be not supported
|
|
if response.status_code == 404:
|
|
continue
|
|
|
|
try:
|
|
data = response.json()
|
|
except ValueError as e:
|
|
log.debug(f"Cannot parse JSON response from PyPI: {url}: {e}")
|
|
continue
|
|
return data
|
|
return None
|
|
|
|
def _get_download_path(self, ireq: InstallRequirement) -> str:
|
|
"""
|
|
Determine the download dir location in a way which avoids name
|
|
collisions.
|
|
"""
|
|
if ireq.link:
|
|
salt = hashlib.sha224(ireq.link.url_without_fragment.encode()).hexdigest()
|
|
# Nest directories to avoid running out of top level dirs on some FS
|
|
# (see pypi _get_cache_path_parts, which inspired this)
|
|
return os.path.join(
|
|
self._download_dir, salt[:2], salt[2:4], salt[4:6], salt[6:]
|
|
)
|
|
else:
|
|
return self._download_dir
|
|
|
|
def get_hashes(self, ireq: InstallRequirement) -> set[str]:
|
|
"""
|
|
Given an InstallRequirement, return a set of hashes that represent all
|
|
of the files for a given requirement. Unhashable requirements return an
|
|
empty set. Unpinned requirements raise a TypeError.
|
|
"""
|
|
|
|
if ireq.link:
|
|
link = ireq.link
|
|
|
|
if link.is_vcs or (link.is_file and link.is_existing_dir()):
|
|
# Return empty set for unhashable requirements.
|
|
# Unhashable logic modeled on pip's
|
|
# RequirementPreparer.prepare_linked_requirement
|
|
return set()
|
|
|
|
if is_url_requirement(ireq):
|
|
# Directly hash URL requirements.
|
|
# URL requirements may have been previously downloaded and cached
|
|
# locally by self.resolve_reqs()
|
|
cached_path = os.path.join(self._get_download_path(ireq), link.filename)
|
|
if os.path.exists(cached_path):
|
|
cached_link = Link(path_to_url(cached_path))
|
|
else:
|
|
cached_link = link
|
|
return {self._get_file_hash(cached_link)}
|
|
|
|
if not is_pinned_requirement(ireq):
|
|
raise TypeError(f"Expected pinned requirement, got {ireq}")
|
|
|
|
log.debug(ireq.name)
|
|
|
|
with log.indentation():
|
|
return self._get_req_hashes(ireq)
|
|
|
|
def _get_req_hashes(self, ireq: InstallRequirement) -> set[str]:
|
|
"""
|
|
Collects the hashes for all candidates satisfying the given InstallRequirement. Computes
|
|
the hashes for the candidates that don't have one reported by their index.
|
|
"""
|
|
matching_candidates = self._get_matching_candidates(ireq)
|
|
pypi_hashes_by_link = self._get_hashes_from_pypi(ireq)
|
|
pypi_hashes = {
|
|
pypi_hashes_by_link[candidate.link.url]
|
|
for candidate in matching_candidates
|
|
if candidate.link.url in pypi_hashes_by_link
|
|
}
|
|
local_hashes = {
|
|
self._get_file_hash(candidate.link)
|
|
for candidate in matching_candidates
|
|
if candidate.link.url not in pypi_hashes_by_link
|
|
}
|
|
return pypi_hashes | local_hashes
|
|
|
|
def _get_hashes_from_pypi(self, ireq: InstallRequirement) -> dict[str, str]:
|
|
"""
|
|
Builds a mapping from the release URLs to their hashes as reported by the PyPI JSON API
|
|
for a given InstallRequirement.
|
|
"""
|
|
project = self._get_project(ireq)
|
|
if project is None:
|
|
return {}
|
|
|
|
_, version, _ = as_tuple(ireq)
|
|
|
|
try:
|
|
release_files = project["releases"][version]
|
|
except KeyError:
|
|
log.debug("Missing release files on PyPI")
|
|
return {}
|
|
|
|
try:
|
|
hashes = {
|
|
file_["url"]: f"{FAVORITE_HASH}:{file_['digests'][FAVORITE_HASH]}"
|
|
for file_ in release_files
|
|
if file_["packagetype"] in self.HASHABLE_PACKAGE_TYPES
|
|
}
|
|
except KeyError:
|
|
log.debug("Missing digests of release files on PyPI")
|
|
return {}
|
|
|
|
return hashes
|
|
|
|
def _get_matching_candidates(
|
|
self, ireq: InstallRequirement
|
|
) -> set[InstallationCandidate]:
|
|
"""
|
|
Returns all candidates that satisfy the given InstallRequirement.
|
|
"""
|
|
# We need to get all of the candidates that match our current version
|
|
# pin, these will represent all of the files that could possibly
|
|
# satisfy this constraint.
|
|
all_candidates = self.find_all_candidates(ireq.name)
|
|
candidates_by_version = lookup_table(all_candidates, key=candidate_version)
|
|
matching_versions = list(
|
|
ireq.specifier.filter(candidate.version for candidate in all_candidates)
|
|
)
|
|
return candidates_by_version[matching_versions[0]]
|
|
|
|
def _get_file_hash(self, link: Link) -> str:
|
|
log.debug(f"Hashing {link.show_url}")
|
|
h = hashlib.new(FAVORITE_HASH)
|
|
with open_local_or_remote_file(link, self.session) as f:
|
|
# Chunks to iterate
|
|
chunks = iter(lambda: f.stream.read(FILE_CHUNK_SIZE), b"")
|
|
|
|
# Choose a context manager depending on verbosity
|
|
context_manager: ContextManager[Iterator[bytes]]
|
|
if log.verbosity >= 1:
|
|
iter_length = int(f.size / FILE_CHUNK_SIZE) if f.size else None
|
|
bar_template = f"{' ' * log.current_indent} |%(bar)s| %(info)s"
|
|
context_manager = progressbar(
|
|
chunks,
|
|
length=iter_length,
|
|
# Make it look like default pip progress bar
|
|
fill_char="█",
|
|
empty_char=" ",
|
|
bar_template=bar_template,
|
|
width=32,
|
|
)
|
|
else:
|
|
context_manager = contextlib.nullcontext(chunks)
|
|
|
|
# Iterate over the chosen context manager
|
|
with context_manager as bar:
|
|
for chunk in bar:
|
|
h.update(chunk)
|
|
return ":".join([FAVORITE_HASH, h.hexdigest()])
|
|
|
|
@contextmanager
|
|
def allow_all_wheels(self) -> Iterator[None]:
|
|
"""
|
|
Monkey patches pip.Wheel to allow wheels from all platforms and Python versions.
|
|
|
|
This also saves the candidate cache and set a new one, or else the results from
|
|
the previous non-patched calls will interfere.
|
|
"""
|
|
|
|
def _wheel_supported(self: Wheel, tags: list[Tag]) -> bool:
|
|
# Ignore current platform. Support everything.
|
|
return True
|
|
|
|
def _wheel_support_index_min(self: Wheel, tags: list[Tag]) -> int:
|
|
# All wheels are equal priority for sorting.
|
|
return 0
|
|
|
|
original_wheel_supported = Wheel.supported
|
|
original_support_index_min = Wheel.support_index_min
|
|
original_cache = self._available_candidates_cache
|
|
|
|
Wheel.supported = _wheel_supported
|
|
Wheel.support_index_min = _wheel_support_index_min
|
|
self._available_candidates_cache = {}
|
|
|
|
# If we don't clear this cache then it can contain results from an
|
|
# earlier call when allow_all_wheels wasn't active. See GH-1532
|
|
self.finder.find_all_candidates.cache_clear()
|
|
|
|
try:
|
|
yield
|
|
finally:
|
|
Wheel.supported = original_wheel_supported
|
|
Wheel.support_index_min = original_support_index_min
|
|
self._available_candidates_cache = original_cache
|
|
|
|
|
|
@contextmanager
|
|
def open_local_or_remote_file(link: Link, session: Session) -> Iterator[FileStream]:
|
|
"""
|
|
Open local or remote file for reading.
|
|
|
|
:type link: pip.index.Link
|
|
:type session: requests.Session
|
|
:raises ValueError: If link points to a local directory.
|
|
:return: a context manager to a FileStream with the opened file-like object
|
|
"""
|
|
url = link.url_without_fragment
|
|
|
|
if link.is_file:
|
|
# Local URL
|
|
local_path = url_to_path(url)
|
|
if os.path.isdir(local_path):
|
|
raise ValueError(f"Cannot open directory for read: {url}")
|
|
else:
|
|
st = os.stat(local_path)
|
|
with open(local_path, "rb") as local_file:
|
|
yield FileStream(stream=local_file, size=st.st_size)
|
|
else:
|
|
# Remote URL
|
|
headers = {"Accept-Encoding": "identity"}
|
|
response = session.get(url, headers=headers, stream=True)
|
|
|
|
# Content length must be int or None
|
|
content_length: int | None
|
|
try:
|
|
content_length = int(response.headers["content-length"])
|
|
except (ValueError, KeyError, TypeError):
|
|
content_length = None
|
|
|
|
try:
|
|
yield FileStream(stream=response.raw, size=content_length)
|
|
finally:
|
|
response.close()
|
|
|
|
|
|
def candidate_version(candidate: InstallationCandidate) -> _BaseVersion:
|
|
return candidate.version
|