comparison env/lib/python3.9/site-packages/pip/_internal/cache.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Cache Management
2 """
3
4 import hashlib
5 import json
6 import logging
7 import os
8
9 from pip._vendor.packaging.tags import interpreter_name, interpreter_version
10 from pip._vendor.packaging.utils import canonicalize_name
11
12 from pip._internal.exceptions import InvalidWheelFilename
13 from pip._internal.models.link import Link
14 from pip._internal.models.wheel import Wheel
15 from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds
16 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
17 from pip._internal.utils.urls import path_to_url
18
19 if MYPY_CHECK_RUNNING:
20 from typing import Any, Dict, List, Optional, Set
21
22 from pip._vendor.packaging.tags import Tag
23
24 from pip._internal.models.format_control import FormatControl
25
26 logger = logging.getLogger(__name__)
27
28
29 def _hash_dict(d):
30 # type: (Dict[str, str]) -> str
31 """Return a stable sha224 of a dictionary."""
32 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True)
33 return hashlib.sha224(s.encode("ascii")).hexdigest()
34
35
36 class Cache:
37 """An abstract class - provides cache directories for data from links
38
39
40 :param cache_dir: The root of the cache.
41 :param format_control: An object of FormatControl class to limit
42 binaries being read from the cache.
43 :param allowed_formats: which formats of files the cache should store.
44 ('binary' and 'source' are the only allowed values)
45 """
46
47 def __init__(self, cache_dir, format_control, allowed_formats):
48 # type: (str, FormatControl, Set[str]) -> None
49 super().__init__()
50 assert not cache_dir or os.path.isabs(cache_dir)
51 self.cache_dir = cache_dir or None
52 self.format_control = format_control
53 self.allowed_formats = allowed_formats
54
55 _valid_formats = {"source", "binary"}
56 assert self.allowed_formats.union(_valid_formats) == _valid_formats
57
58 def _get_cache_path_parts(self, link):
59 # type: (Link) -> List[str]
60 """Get parts of part that must be os.path.joined with cache_dir
61 """
62
63 # We want to generate an url to use as our cache key, we don't want to
64 # just re-use the URL because it might have other items in the fragment
65 # and we don't care about those.
66 key_parts = {"url": link.url_without_fragment}
67 if link.hash_name is not None and link.hash is not None:
68 key_parts[link.hash_name] = link.hash
69 if link.subdirectory_fragment:
70 key_parts["subdirectory"] = link.subdirectory_fragment
71
72 # Include interpreter name, major and minor version in cache key
73 # to cope with ill-behaved sdists that build a different wheel
74 # depending on the python version their setup.py is being run on,
75 # and don't encode the difference in compatibility tags.
76 # https://github.com/pypa/pip/issues/7296
77 key_parts["interpreter_name"] = interpreter_name()
78 key_parts["interpreter_version"] = interpreter_version()
79
80 # Encode our key url with sha224, we'll use this because it has similar
81 # security properties to sha256, but with a shorter total output (and
82 # thus less secure). However the differences don't make a lot of
83 # difference for our use case here.
84 hashed = _hash_dict(key_parts)
85
86 # We want to nest the directories some to prevent having a ton of top
87 # level directories where we might run out of sub directories on some
88 # FS.
89 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]]
90
91 return parts
92
93 def _get_candidates(self, link, canonical_package_name):
94 # type: (Link, str) -> List[Any]
95 can_not_cache = (
96 not self.cache_dir or
97 not canonical_package_name or
98 not link
99 )
100 if can_not_cache:
101 return []
102
103 formats = self.format_control.get_allowed_formats(
104 canonical_package_name
105 )
106 if not self.allowed_formats.intersection(formats):
107 return []
108
109 candidates = []
110 path = self.get_path_for_link(link)
111 if os.path.isdir(path):
112 for candidate in os.listdir(path):
113 candidates.append((candidate, path))
114 return candidates
115
116 def get_path_for_link(self, link):
117 # type: (Link) -> str
118 """Return a directory to store cached items in for link.
119 """
120 raise NotImplementedError()
121
122 def get(
123 self,
124 link, # type: Link
125 package_name, # type: Optional[str]
126 supported_tags, # type: List[Tag]
127 ):
128 # type: (...) -> Link
129 """Returns a link to a cached item if it exists, otherwise returns the
130 passed link.
131 """
132 raise NotImplementedError()
133
134
135 class SimpleWheelCache(Cache):
136 """A cache of wheels for future installs.
137 """
138
139 def __init__(self, cache_dir, format_control):
140 # type: (str, FormatControl) -> None
141 super().__init__(cache_dir, format_control, {"binary"})
142
143 def get_path_for_link(self, link):
144 # type: (Link) -> str
145 """Return a directory to store cached wheels for link
146
147 Because there are M wheels for any one sdist, we provide a directory
148 to cache them in, and then consult that directory when looking up
149 cache hits.
150
151 We only insert things into the cache if they have plausible version
152 numbers, so that we don't contaminate the cache with things that were
153 not unique. E.g. ./package might have dozens of installs done for it
154 and build a version of 0.0...and if we built and cached a wheel, we'd
155 end up using the same wheel even if the source has been edited.
156
157 :param link: The link of the sdist for which this will cache wheels.
158 """
159 parts = self._get_cache_path_parts(link)
160 assert self.cache_dir
161 # Store wheels within the root cache_dir
162 return os.path.join(self.cache_dir, "wheels", *parts)
163
164 def get(
165 self,
166 link, # type: Link
167 package_name, # type: Optional[str]
168 supported_tags, # type: List[Tag]
169 ):
170 # type: (...) -> Link
171 candidates = []
172
173 if not package_name:
174 return link
175
176 canonical_package_name = canonicalize_name(package_name)
177 for wheel_name, wheel_dir in self._get_candidates(
178 link, canonical_package_name
179 ):
180 try:
181 wheel = Wheel(wheel_name)
182 except InvalidWheelFilename:
183 continue
184 if canonicalize_name(wheel.name) != canonical_package_name:
185 logger.debug(
186 "Ignoring cached wheel %s for %s as it "
187 "does not match the expected distribution name %s.",
188 wheel_name, link, package_name,
189 )
190 continue
191 if not wheel.supported(supported_tags):
192 # Built for a different python/arch/etc
193 continue
194 candidates.append(
195 (
196 wheel.support_index_min(supported_tags),
197 wheel_name,
198 wheel_dir,
199 )
200 )
201
202 if not candidates:
203 return link
204
205 _, wheel_name, wheel_dir = min(candidates)
206 return Link(path_to_url(os.path.join(wheel_dir, wheel_name)))
207
208
209 class EphemWheelCache(SimpleWheelCache):
210 """A SimpleWheelCache that creates it's own temporary cache directory
211 """
212
213 def __init__(self, format_control):
214 # type: (FormatControl) -> None
215 self._temp_dir = TempDirectory(
216 kind=tempdir_kinds.EPHEM_WHEEL_CACHE,
217 globally_managed=True,
218 )
219
220 super().__init__(self._temp_dir.path, format_control)
221
222
223 class CacheEntry:
224 def __init__(
225 self,
226 link, # type: Link
227 persistent, # type: bool
228 ):
229 self.link = link
230 self.persistent = persistent
231
232
233 class WheelCache(Cache):
234 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache
235
236 This Cache allows for gracefully degradation, using the ephem wheel cache
237 when a certain link is not found in the simple wheel cache first.
238 """
239
240 def __init__(self, cache_dir, format_control):
241 # type: (str, FormatControl) -> None
242 super().__init__(cache_dir, format_control, {'binary'})
243 self._wheel_cache = SimpleWheelCache(cache_dir, format_control)
244 self._ephem_cache = EphemWheelCache(format_control)
245
246 def get_path_for_link(self, link):
247 # type: (Link) -> str
248 return self._wheel_cache.get_path_for_link(link)
249
250 def get_ephem_path_for_link(self, link):
251 # type: (Link) -> str
252 return self._ephem_cache.get_path_for_link(link)
253
254 def get(
255 self,
256 link, # type: Link
257 package_name, # type: Optional[str]
258 supported_tags, # type: List[Tag]
259 ):
260 # type: (...) -> Link
261 cache_entry = self.get_cache_entry(link, package_name, supported_tags)
262 if cache_entry is None:
263 return link
264 return cache_entry.link
265
266 def get_cache_entry(
267 self,
268 link, # type: Link
269 package_name, # type: Optional[str]
270 supported_tags, # type: List[Tag]
271 ):
272 # type: (...) -> Optional[CacheEntry]
273 """Returns a CacheEntry with a link to a cached item if it exists or
274 None. The cache entry indicates if the item was found in the persistent
275 or ephemeral cache.
276 """
277 retval = self._wheel_cache.get(
278 link=link,
279 package_name=package_name,
280 supported_tags=supported_tags,
281 )
282 if retval is not link:
283 return CacheEntry(retval, persistent=True)
284
285 retval = self._ephem_cache.get(
286 link=link,
287 package_name=package_name,
288 supported_tags=supported_tags,
289 )
290 if retval is not link:
291 return CacheEntry(retval, persistent=False)
292
293 return None