Mercurial > repos > shellac > guppy_basecaller
comparison env/lib/python3.7/site-packages/pip/_internal/cache.py @ 0:26e78fe6e8c4 draft
"planemo upload commit c699937486c35866861690329de38ec1a5d9f783"
| author | shellac |
|---|---|
| date | Sat, 02 May 2020 07:14:21 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:26e78fe6e8c4 |
|---|---|
| 1 """Cache Management | |
| 2 """ | |
| 3 | |
| 4 # The following comment should be removed at some point in the future. | |
| 5 # mypy: strict-optional=False | |
| 6 | |
| 7 import hashlib | |
| 8 import json | |
| 9 import logging | |
| 10 import os | |
| 11 | |
| 12 from pip._vendor.packaging.tags import interpreter_name, interpreter_version | |
| 13 from pip._vendor.packaging.utils import canonicalize_name | |
| 14 | |
| 15 from pip._internal.exceptions import InvalidWheelFilename | |
| 16 from pip._internal.models.link import Link | |
| 17 from pip._internal.models.wheel import Wheel | |
| 18 from pip._internal.utils.temp_dir import TempDirectory, tempdir_kinds | |
| 19 from pip._internal.utils.typing import MYPY_CHECK_RUNNING | |
| 20 from pip._internal.utils.urls import path_to_url | |
| 21 | |
| 22 if MYPY_CHECK_RUNNING: | |
| 23 from typing import Optional, Set, List, Any, Dict | |
| 24 | |
| 25 from pip._vendor.packaging.tags import Tag | |
| 26 | |
| 27 from pip._internal.models.format_control import FormatControl | |
| 28 | |
| 29 logger = logging.getLogger(__name__) | |
| 30 | |
| 31 | |
| 32 def _hash_dict(d): | |
| 33 # type: (Dict[str, str]) -> str | |
| 34 """Return a stable sha224 of a dictionary.""" | |
| 35 s = json.dumps(d, sort_keys=True, separators=(",", ":"), ensure_ascii=True) | |
| 36 return hashlib.sha224(s.encode("ascii")).hexdigest() | |
| 37 | |
| 38 | |
| 39 class Cache(object): | |
| 40 """An abstract class - provides cache directories for data from links | |
| 41 | |
| 42 | |
| 43 :param cache_dir: The root of the cache. | |
| 44 :param format_control: An object of FormatControl class to limit | |
| 45 binaries being read from the cache. | |
| 46 :param allowed_formats: which formats of files the cache should store. | |
| 47 ('binary' and 'source' are the only allowed values) | |
| 48 """ | |
| 49 | |
| 50 def __init__(self, cache_dir, format_control, allowed_formats): | |
| 51 # type: (str, FormatControl, Set[str]) -> None | |
| 52 super(Cache, self).__init__() | |
| 53 assert not cache_dir or os.path.isabs(cache_dir) | |
| 54 self.cache_dir = cache_dir or None | |
| 55 self.format_control = format_control | |
| 56 self.allowed_formats = allowed_formats | |
| 57 | |
| 58 _valid_formats = {"source", "binary"} | |
| 59 assert self.allowed_formats.union(_valid_formats) == _valid_formats | |
| 60 | |
| 61 def _get_cache_path_parts_legacy(self, link): | |
| 62 # type: (Link) -> List[str] | |
| 63 """Get parts of part that must be os.path.joined with cache_dir | |
| 64 | |
| 65 Legacy cache key (pip < 20) for compatibility with older caches. | |
| 66 """ | |
| 67 | |
| 68 # We want to generate an url to use as our cache key, we don't want to | |
| 69 # just re-use the URL because it might have other items in the fragment | |
| 70 # and we don't care about those. | |
| 71 key_parts = [link.url_without_fragment] | |
| 72 if link.hash_name is not None and link.hash is not None: | |
| 73 key_parts.append("=".join([link.hash_name, link.hash])) | |
| 74 key_url = "#".join(key_parts) | |
| 75 | |
| 76 # Encode our key url with sha224, we'll use this because it has similar | |
| 77 # security properties to sha256, but with a shorter total output (and | |
| 78 # thus less secure). However the differences don't make a lot of | |
| 79 # difference for our use case here. | |
| 80 hashed = hashlib.sha224(key_url.encode()).hexdigest() | |
| 81 | |
| 82 # We want to nest the directories some to prevent having a ton of top | |
| 83 # level directories where we might run out of sub directories on some | |
| 84 # FS. | |
| 85 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] | |
| 86 | |
| 87 return parts | |
| 88 | |
| 89 def _get_cache_path_parts(self, link): | |
| 90 # type: (Link) -> List[str] | |
| 91 """Get parts of part that must be os.path.joined with cache_dir | |
| 92 """ | |
| 93 | |
| 94 # We want to generate an url to use as our cache key, we don't want to | |
| 95 # just re-use the URL because it might have other items in the fragment | |
| 96 # and we don't care about those. | |
| 97 key_parts = {"url": link.url_without_fragment} | |
| 98 if link.hash_name is not None and link.hash is not None: | |
| 99 key_parts[link.hash_name] = link.hash | |
| 100 if link.subdirectory_fragment: | |
| 101 key_parts["subdirectory"] = link.subdirectory_fragment | |
| 102 | |
| 103 # Include interpreter name, major and minor version in cache key | |
| 104 # to cope with ill-behaved sdists that build a different wheel | |
| 105 # depending on the python version their setup.py is being run on, | |
| 106 # and don't encode the difference in compatibility tags. | |
| 107 # https://github.com/pypa/pip/issues/7296 | |
| 108 key_parts["interpreter_name"] = interpreter_name() | |
| 109 key_parts["interpreter_version"] = interpreter_version() | |
| 110 | |
| 111 # Encode our key url with sha224, we'll use this because it has similar | |
| 112 # security properties to sha256, but with a shorter total output (and | |
| 113 # thus less secure). However the differences don't make a lot of | |
| 114 # difference for our use case here. | |
| 115 hashed = _hash_dict(key_parts) | |
| 116 | |
| 117 # We want to nest the directories some to prevent having a ton of top | |
| 118 # level directories where we might run out of sub directories on some | |
| 119 # FS. | |
| 120 parts = [hashed[:2], hashed[2:4], hashed[4:6], hashed[6:]] | |
| 121 | |
| 122 return parts | |
| 123 | |
| 124 def _get_candidates(self, link, canonical_package_name): | |
| 125 # type: (Link, Optional[str]) -> List[Any] | |
| 126 can_not_cache = ( | |
| 127 not self.cache_dir or | |
| 128 not canonical_package_name or | |
| 129 not link | |
| 130 ) | |
| 131 if can_not_cache: | |
| 132 return [] | |
| 133 | |
| 134 formats = self.format_control.get_allowed_formats( | |
| 135 canonical_package_name | |
| 136 ) | |
| 137 if not self.allowed_formats.intersection(formats): | |
| 138 return [] | |
| 139 | |
| 140 candidates = [] | |
| 141 path = self.get_path_for_link(link) | |
| 142 if os.path.isdir(path): | |
| 143 for candidate in os.listdir(path): | |
| 144 candidates.append((candidate, path)) | |
| 145 # TODO remove legacy path lookup in pip>=21 | |
| 146 legacy_path = self.get_path_for_link_legacy(link) | |
| 147 if os.path.isdir(legacy_path): | |
| 148 for candidate in os.listdir(legacy_path): | |
| 149 candidates.append((candidate, legacy_path)) | |
| 150 return candidates | |
| 151 | |
| 152 def get_path_for_link_legacy(self, link): | |
| 153 # type: (Link) -> str | |
| 154 raise NotImplementedError() | |
| 155 | |
| 156 def get_path_for_link(self, link): | |
| 157 # type: (Link) -> str | |
| 158 """Return a directory to store cached items in for link. | |
| 159 """ | |
| 160 raise NotImplementedError() | |
| 161 | |
| 162 def get( | |
| 163 self, | |
| 164 link, # type: Link | |
| 165 package_name, # type: Optional[str] | |
| 166 supported_tags, # type: List[Tag] | |
| 167 ): | |
| 168 # type: (...) -> Link | |
| 169 """Returns a link to a cached item if it exists, otherwise returns the | |
| 170 passed link. | |
| 171 """ | |
| 172 raise NotImplementedError() | |
| 173 | |
| 174 | |
| 175 class SimpleWheelCache(Cache): | |
| 176 """A cache of wheels for future installs. | |
| 177 """ | |
| 178 | |
| 179 def __init__(self, cache_dir, format_control): | |
| 180 # type: (str, FormatControl) -> None | |
| 181 super(SimpleWheelCache, self).__init__( | |
| 182 cache_dir, format_control, {"binary"} | |
| 183 ) | |
| 184 | |
| 185 def get_path_for_link_legacy(self, link): | |
| 186 # type: (Link) -> str | |
| 187 parts = self._get_cache_path_parts_legacy(link) | |
| 188 return os.path.join(self.cache_dir, "wheels", *parts) | |
| 189 | |
| 190 def get_path_for_link(self, link): | |
| 191 # type: (Link) -> str | |
| 192 """Return a directory to store cached wheels for link | |
| 193 | |
| 194 Because there are M wheels for any one sdist, we provide a directory | |
| 195 to cache them in, and then consult that directory when looking up | |
| 196 cache hits. | |
| 197 | |
| 198 We only insert things into the cache if they have plausible version | |
| 199 numbers, so that we don't contaminate the cache with things that were | |
| 200 not unique. E.g. ./package might have dozens of installs done for it | |
| 201 and build a version of 0.0...and if we built and cached a wheel, we'd | |
| 202 end up using the same wheel even if the source has been edited. | |
| 203 | |
| 204 :param link: The link of the sdist for which this will cache wheels. | |
| 205 """ | |
| 206 parts = self._get_cache_path_parts(link) | |
| 207 | |
| 208 # Store wheels within the root cache_dir | |
| 209 return os.path.join(self.cache_dir, "wheels", *parts) | |
| 210 | |
| 211 def get( | |
| 212 self, | |
| 213 link, # type: Link | |
| 214 package_name, # type: Optional[str] | |
| 215 supported_tags, # type: List[Tag] | |
| 216 ): | |
| 217 # type: (...) -> Link | |
| 218 candidates = [] | |
| 219 | |
| 220 if not package_name: | |
| 221 return link | |
| 222 | |
| 223 canonical_package_name = canonicalize_name(package_name) | |
| 224 for wheel_name, wheel_dir in self._get_candidates( | |
| 225 link, canonical_package_name | |
| 226 ): | |
| 227 try: | |
| 228 wheel = Wheel(wheel_name) | |
| 229 except InvalidWheelFilename: | |
| 230 continue | |
| 231 if canonicalize_name(wheel.name) != canonical_package_name: | |
| 232 logger.debug( | |
| 233 "Ignoring cached wheel {} for {} as it " | |
| 234 "does not match the expected distribution name {}.".format( | |
| 235 wheel_name, link, package_name | |
| 236 ) | |
| 237 ) | |
| 238 continue | |
| 239 if not wheel.supported(supported_tags): | |
| 240 # Built for a different python/arch/etc | |
| 241 continue | |
| 242 candidates.append( | |
| 243 ( | |
| 244 wheel.support_index_min(supported_tags), | |
| 245 wheel_name, | |
| 246 wheel_dir, | |
| 247 ) | |
| 248 ) | |
| 249 | |
| 250 if not candidates: | |
| 251 return link | |
| 252 | |
| 253 _, wheel_name, wheel_dir = min(candidates) | |
| 254 return Link(path_to_url(os.path.join(wheel_dir, wheel_name))) | |
| 255 | |
| 256 | |
| 257 class EphemWheelCache(SimpleWheelCache): | |
| 258 """A SimpleWheelCache that creates it's own temporary cache directory | |
| 259 """ | |
| 260 | |
| 261 def __init__(self, format_control): | |
| 262 # type: (FormatControl) -> None | |
| 263 self._temp_dir = TempDirectory( | |
| 264 kind=tempdir_kinds.EPHEM_WHEEL_CACHE, | |
| 265 globally_managed=True, | |
| 266 ) | |
| 267 | |
| 268 super(EphemWheelCache, self).__init__( | |
| 269 self._temp_dir.path, format_control | |
| 270 ) | |
| 271 | |
| 272 | |
| 273 class CacheEntry(object): | |
| 274 def __init__( | |
| 275 self, | |
| 276 link, # type: Link | |
| 277 persistent, # type: bool | |
| 278 ): | |
| 279 self.link = link | |
| 280 self.persistent = persistent | |
| 281 | |
| 282 | |
| 283 class WheelCache(Cache): | |
| 284 """Wraps EphemWheelCache and SimpleWheelCache into a single Cache | |
| 285 | |
| 286 This Cache allows for gracefully degradation, using the ephem wheel cache | |
| 287 when a certain link is not found in the simple wheel cache first. | |
| 288 """ | |
| 289 | |
| 290 def __init__(self, cache_dir, format_control): | |
| 291 # type: (str, FormatControl) -> None | |
| 292 super(WheelCache, self).__init__( | |
| 293 cache_dir, format_control, {'binary'} | |
| 294 ) | |
| 295 self._wheel_cache = SimpleWheelCache(cache_dir, format_control) | |
| 296 self._ephem_cache = EphemWheelCache(format_control) | |
| 297 | |
| 298 def get_path_for_link_legacy(self, link): | |
| 299 # type: (Link) -> str | |
| 300 return self._wheel_cache.get_path_for_link_legacy(link) | |
| 301 | |
| 302 def get_path_for_link(self, link): | |
| 303 # type: (Link) -> str | |
| 304 return self._wheel_cache.get_path_for_link(link) | |
| 305 | |
| 306 def get_ephem_path_for_link(self, link): | |
| 307 # type: (Link) -> str | |
| 308 return self._ephem_cache.get_path_for_link(link) | |
| 309 | |
| 310 def get( | |
| 311 self, | |
| 312 link, # type: Link | |
| 313 package_name, # type: Optional[str] | |
| 314 supported_tags, # type: List[Tag] | |
| 315 ): | |
| 316 # type: (...) -> Link | |
| 317 cache_entry = self.get_cache_entry(link, package_name, supported_tags) | |
| 318 if cache_entry is None: | |
| 319 return link | |
| 320 return cache_entry.link | |
| 321 | |
| 322 def get_cache_entry( | |
| 323 self, | |
| 324 link, # type: Link | |
| 325 package_name, # type: Optional[str] | |
| 326 supported_tags, # type: List[Tag] | |
| 327 ): | |
| 328 # type: (...) -> Optional[CacheEntry] | |
| 329 """Returns a CacheEntry with a link to a cached item if it exists or | |
| 330 None. The cache entry indicates if the item was found in the persistent | |
| 331 or ephemeral cache. | |
| 332 """ | |
| 333 retval = self._wheel_cache.get( | |
| 334 link=link, | |
| 335 package_name=package_name, | |
| 336 supported_tags=supported_tags, | |
| 337 ) | |
| 338 if retval is not link: | |
| 339 return CacheEntry(retval, persistent=True) | |
| 340 | |
| 341 retval = self._ephem_cache.get( | |
| 342 link=link, | |
| 343 package_name=package_name, | |
| 344 supported_tags=supported_tags, | |
| 345 ) | |
| 346 if retval is not link: | |
| 347 return CacheEntry(retval, persistent=False) | |
| 348 | |
| 349 return None |
