comparison env/lib/python3.9/site-packages/setuptools/package_index.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """PyPI and direct package downloading"""
2 import sys
3 import os
4 import re
5 import io
6 import shutil
7 import socket
8 import base64
9 import hashlib
10 import itertools
11 import warnings
12 import configparser
13 import html
14 import http.client
15 import urllib.parse
16 import urllib.request
17 import urllib.error
18 from functools import wraps
19
20 import setuptools
21 from pkg_resources import (
22 CHECKOUT_DIST, Distribution, BINARY_DIST, normalize_path, SOURCE_DIST,
23 Environment, find_distributions, safe_name, safe_version,
24 to_filename, Requirement, DEVELOP_DIST, EGG_DIST,
25 )
26 from setuptools import ssl_support
27 from distutils import log
28 from distutils.errors import DistutilsError
29 from fnmatch import translate
30 from setuptools.wheel import Wheel
31
32 EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$')
33 HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I)
34 PYPI_MD5 = re.compile(
35 r'<a href="([^"#]+)">([^<]+)</a>\n\s+\(<a (?:title="MD5 hash"\n\s+)'
36 r'href="[^?]+\?:action=show_md5&amp;digest=([0-9a-f]{32})">md5</a>\)'
37 )
38 URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match
39 EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split()
40
41 __all__ = [
42 'PackageIndex', 'distros_for_url', 'parse_bdist_wininst',
43 'interpret_distro_name',
44 ]
45
46 _SOCKET_TIMEOUT = 15
47
48 _tmpl = "setuptools/{setuptools.__version__} Python-urllib/{py_major}"
49 user_agent = _tmpl.format(
50 py_major='{}.{}'.format(*sys.version_info), setuptools=setuptools)
51
52
53 def parse_requirement_arg(spec):
54 try:
55 return Requirement.parse(spec)
56 except ValueError as e:
57 raise DistutilsError(
58 "Not a URL, existing file, or requirement spec: %r" % (spec,)
59 ) from e
60
61
62 def parse_bdist_wininst(name):
63 """Return (base,pyversion) or (None,None) for possible .exe name"""
64
65 lower = name.lower()
66 base, py_ver, plat = None, None, None
67
68 if lower.endswith('.exe'):
69 if lower.endswith('.win32.exe'):
70 base = name[:-10]
71 plat = 'win32'
72 elif lower.startswith('.win32-py', -16):
73 py_ver = name[-7:-4]
74 base = name[:-16]
75 plat = 'win32'
76 elif lower.endswith('.win-amd64.exe'):
77 base = name[:-14]
78 plat = 'win-amd64'
79 elif lower.startswith('.win-amd64-py', -20):
80 py_ver = name[-7:-4]
81 base = name[:-20]
82 plat = 'win-amd64'
83 return base, py_ver, plat
84
85
86 def egg_info_for_url(url):
87 parts = urllib.parse.urlparse(url)
88 scheme, server, path, parameters, query, fragment = parts
89 base = urllib.parse.unquote(path.split('/')[-1])
90 if server == 'sourceforge.net' and base == 'download': # XXX Yuck
91 base = urllib.parse.unquote(path.split('/')[-2])
92 if '#' in base:
93 base, fragment = base.split('#', 1)
94 return base, fragment
95
96
97 def distros_for_url(url, metadata=None):
98 """Yield egg or source distribution objects that might be found at a URL"""
99 base, fragment = egg_info_for_url(url)
100 for dist in distros_for_location(url, base, metadata):
101 yield dist
102 if fragment:
103 match = EGG_FRAGMENT.match(fragment)
104 if match:
105 for dist in interpret_distro_name(
106 url, match.group(1), metadata, precedence=CHECKOUT_DIST
107 ):
108 yield dist
109
110
111 def distros_for_location(location, basename, metadata=None):
112 """Yield egg or source distribution objects based on basename"""
113 if basename.endswith('.egg.zip'):
114 basename = basename[:-4] # strip the .zip
115 if basename.endswith('.egg') and '-' in basename:
116 # only one, unambiguous interpretation
117 return [Distribution.from_location(location, basename, metadata)]
118 if basename.endswith('.whl') and '-' in basename:
119 wheel = Wheel(basename)
120 if not wheel.is_compatible():
121 return []
122 return [Distribution(
123 location=location,
124 project_name=wheel.project_name,
125 version=wheel.version,
126 # Increase priority over eggs.
127 precedence=EGG_DIST + 1,
128 )]
129 if basename.endswith('.exe'):
130 win_base, py_ver, platform = parse_bdist_wininst(basename)
131 if win_base is not None:
132 return interpret_distro_name(
133 location, win_base, metadata, py_ver, BINARY_DIST, platform
134 )
135 # Try source distro extensions (.zip, .tgz, etc.)
136 #
137 for ext in EXTENSIONS:
138 if basename.endswith(ext):
139 basename = basename[:-len(ext)]
140 return interpret_distro_name(location, basename, metadata)
141 return [] # no extension matched
142
143
144 def distros_for_filename(filename, metadata=None):
145 """Yield possible egg or source distribution objects based on a filename"""
146 return distros_for_location(
147 normalize_path(filename), os.path.basename(filename), metadata
148 )
149
150
151 def interpret_distro_name(
152 location, basename, metadata, py_version=None, precedence=SOURCE_DIST,
153 platform=None
154 ):
155 """Generate alternative interpretations of a source distro name
156
157 Note: if `location` is a filesystem filename, you should call
158 ``pkg_resources.normalize_path()`` on it before passing it to this
159 routine!
160 """
161 # Generate alternative interpretations of a source distro name
162 # Because some packages are ambiguous as to name/versions split
163 # e.g. "adns-python-1.1.0", "egenix-mx-commercial", etc.
164 # So, we generate each possible interepretation (e.g. "adns, python-1.1.0"
165 # "adns-python, 1.1.0", and "adns-python-1.1.0, no version"). In practice,
166 # the spurious interpretations should be ignored, because in the event
167 # there's also an "adns" package, the spurious "python-1.1.0" version will
168 # compare lower than any numeric version number, and is therefore unlikely
169 # to match a request for it. It's still a potential problem, though, and
170 # in the long run PyPI and the distutils should go for "safe" names and
171 # versions in distribution archive names (sdist and bdist).
172
173 parts = basename.split('-')
174 if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]):
175 # it is a bdist_dumb, not an sdist -- bail out
176 return
177
178 for p in range(1, len(parts) + 1):
179 yield Distribution(
180 location, metadata, '-'.join(parts[:p]), '-'.join(parts[p:]),
181 py_version=py_version, precedence=precedence,
182 platform=platform
183 )
184
185
186 # From Python 2.7 docs
187 def unique_everseen(iterable, key=None):
188 "List unique elements, preserving order. Remember all elements ever seen."
189 # unique_everseen('AAAABBBCCDAABBB') --> A B C D
190 # unique_everseen('ABBCcAD', str.lower) --> A B C D
191 seen = set()
192 seen_add = seen.add
193 if key is None:
194 for element in itertools.filterfalse(seen.__contains__, iterable):
195 seen_add(element)
196 yield element
197 else:
198 for element in iterable:
199 k = key(element)
200 if k not in seen:
201 seen_add(k)
202 yield element
203
204
205 def unique_values(func):
206 """
207 Wrap a function returning an iterable such that the resulting iterable
208 only ever yields unique items.
209 """
210
211 @wraps(func)
212 def wrapper(*args, **kwargs):
213 return unique_everseen(func(*args, **kwargs))
214
215 return wrapper
216
217
218 REL = re.compile(r"""<([^>]*\srel\s*=\s*['"]?([^'">]+)[^>]*)>""", re.I)
219 # this line is here to fix emacs' cruddy broken syntax highlighting
220
221
222 @unique_values
223 def find_external_links(url, page):
224 """Find rel="homepage" and rel="download" links in `page`, yielding URLs"""
225
226 for match in REL.finditer(page):
227 tag, rel = match.groups()
228 rels = set(map(str.strip, rel.lower().split(',')))
229 if 'homepage' in rels or 'download' in rels:
230 for match in HREF.finditer(tag):
231 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
232
233 for tag in ("<th>Home Page", "<th>Download URL"):
234 pos = page.find(tag)
235 if pos != -1:
236 match = HREF.search(page, pos)
237 if match:
238 yield urllib.parse.urljoin(url, htmldecode(match.group(1)))
239
240
241 class ContentChecker:
242 """
243 A null content checker that defines the interface for checking content
244 """
245
246 def feed(self, block):
247 """
248 Feed a block of data to the hash.
249 """
250 return
251
252 def is_valid(self):
253 """
254 Check the hash. Return False if validation fails.
255 """
256 return True
257
258 def report(self, reporter, template):
259 """
260 Call reporter with information about the checker (hash name)
261 substituted into the template.
262 """
263 return
264
265
266 class HashChecker(ContentChecker):
267 pattern = re.compile(
268 r'(?P<hash_name>sha1|sha224|sha384|sha256|sha512|md5)='
269 r'(?P<expected>[a-f0-9]+)'
270 )
271
272 def __init__(self, hash_name, expected):
273 self.hash_name = hash_name
274 self.hash = hashlib.new(hash_name)
275 self.expected = expected
276
277 @classmethod
278 def from_url(cls, url):
279 "Construct a (possibly null) ContentChecker from a URL"
280 fragment = urllib.parse.urlparse(url)[-1]
281 if not fragment:
282 return ContentChecker()
283 match = cls.pattern.search(fragment)
284 if not match:
285 return ContentChecker()
286 return cls(**match.groupdict())
287
288 def feed(self, block):
289 self.hash.update(block)
290
291 def is_valid(self):
292 return self.hash.hexdigest() == self.expected
293
294 def report(self, reporter, template):
295 msg = template % self.hash_name
296 return reporter(msg)
297
298
299 class PackageIndex(Environment):
300 """A distribution index that scans web pages for download URLs"""
301
302 def __init__(
303 self, index_url="https://pypi.org/simple/", hosts=('*',),
304 ca_bundle=None, verify_ssl=True, *args, **kw
305 ):
306 Environment.__init__(self, *args, **kw)
307 self.index_url = index_url + "/" [:not index_url.endswith('/')]
308 self.scanned_urls = {}
309 self.fetched_urls = {}
310 self.package_pages = {}
311 self.allows = re.compile('|'.join(map(translate, hosts))).match
312 self.to_scan = []
313 use_ssl = (
314 verify_ssl
315 and ssl_support.is_available
316 and (ca_bundle or ssl_support.find_ca_bundle())
317 )
318 if use_ssl:
319 self.opener = ssl_support.opener_for(ca_bundle)
320 else:
321 self.opener = urllib.request.urlopen
322
323 # FIXME: 'PackageIndex.process_url' is too complex (14)
324 def process_url(self, url, retrieve=False): # noqa: C901
325 """Evaluate a URL as a possible download, and maybe retrieve it"""
326 if url in self.scanned_urls and not retrieve:
327 return
328 self.scanned_urls[url] = True
329 if not URL_SCHEME(url):
330 self.process_filename(url)
331 return
332 else:
333 dists = list(distros_for_url(url))
334 if dists:
335 if not self.url_ok(url):
336 return
337 self.debug("Found link: %s", url)
338
339 if dists or not retrieve or url in self.fetched_urls:
340 list(map(self.add, dists))
341 return # don't need the actual page
342
343 if not self.url_ok(url):
344 self.fetched_urls[url] = True
345 return
346
347 self.info("Reading %s", url)
348 self.fetched_urls[url] = True # prevent multiple fetch attempts
349 tmpl = "Download error on %s: %%s -- Some packages may not be found!"
350 f = self.open_url(url, tmpl % url)
351 if f is None:
352 return
353 if isinstance(f, urllib.error.HTTPError) and f.code == 401:
354 self.info("Authentication error: %s" % f.msg)
355 self.fetched_urls[f.url] = True
356 if 'html' not in f.headers.get('content-type', '').lower():
357 f.close() # not html, we can't process it
358 return
359
360 base = f.url # handle redirects
361 page = f.read()
362 if not isinstance(page, str):
363 # In Python 3 and got bytes but want str.
364 if isinstance(f, urllib.error.HTTPError):
365 # Errors have no charset, assume latin1:
366 charset = 'latin-1'
367 else:
368 charset = f.headers.get_param('charset') or 'latin-1'
369 page = page.decode(charset, "ignore")
370 f.close()
371 for match in HREF.finditer(page):
372 link = urllib.parse.urljoin(base, htmldecode(match.group(1)))
373 self.process_url(link)
374 if url.startswith(self.index_url) and getattr(f, 'code', None) != 404:
375 page = self.process_index(url, page)
376
377 def process_filename(self, fn, nested=False):
378 # process filenames or directories
379 if not os.path.exists(fn):
380 self.warn("Not found: %s", fn)
381 return
382
383 if os.path.isdir(fn) and not nested:
384 path = os.path.realpath(fn)
385 for item in os.listdir(path):
386 self.process_filename(os.path.join(path, item), True)
387
388 dists = distros_for_filename(fn)
389 if dists:
390 self.debug("Found: %s", fn)
391 list(map(self.add, dists))
392
393 def url_ok(self, url, fatal=False):
394 s = URL_SCHEME(url)
395 is_file = s and s.group(1).lower() == 'file'
396 if is_file or self.allows(urllib.parse.urlparse(url)[1]):
397 return True
398 msg = (
399 "\nNote: Bypassing %s (disallowed host; see "
400 "http://bit.ly/2hrImnY for details).\n")
401 if fatal:
402 raise DistutilsError(msg % url)
403 else:
404 self.warn(msg, url)
405
406 def scan_egg_links(self, search_path):
407 dirs = filter(os.path.isdir, search_path)
408 egg_links = (
409 (path, entry)
410 for path in dirs
411 for entry in os.listdir(path)
412 if entry.endswith('.egg-link')
413 )
414 list(itertools.starmap(self.scan_egg_link, egg_links))
415
416 def scan_egg_link(self, path, entry):
417 with open(os.path.join(path, entry)) as raw_lines:
418 # filter non-empty lines
419 lines = list(filter(None, map(str.strip, raw_lines)))
420
421 if len(lines) != 2:
422 # format is not recognized; punt
423 return
424
425 egg_path, setup_path = lines
426
427 for dist in find_distributions(os.path.join(path, egg_path)):
428 dist.location = os.path.join(path, *lines)
429 dist.precedence = SOURCE_DIST
430 self.add(dist)
431
432 def _scan(self, link):
433 # Process a URL to see if it's for a package page
434 NO_MATCH_SENTINEL = None, None
435 if not link.startswith(self.index_url):
436 return NO_MATCH_SENTINEL
437
438 parts = list(map(
439 urllib.parse.unquote, link[len(self.index_url):].split('/')
440 ))
441 if len(parts) != 2 or '#' in parts[1]:
442 return NO_MATCH_SENTINEL
443
444 # it's a package page, sanitize and index it
445 pkg = safe_name(parts[0])
446 ver = safe_version(parts[1])
447 self.package_pages.setdefault(pkg.lower(), {})[link] = True
448 return to_filename(pkg), to_filename(ver)
449
450 def process_index(self, url, page):
451 """Process the contents of a PyPI page"""
452
453 # process an index page into the package-page index
454 for match in HREF.finditer(page):
455 try:
456 self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1))))
457 except ValueError:
458 pass
459
460 pkg, ver = self._scan(url) # ensure this page is in the page index
461 if not pkg:
462 return "" # no sense double-scanning non-package pages
463
464 # process individual package page
465 for new_url in find_external_links(url, page):
466 # Process the found URL
467 base, frag = egg_info_for_url(new_url)
468 if base.endswith('.py') and not frag:
469 if ver:
470 new_url += '#egg=%s-%s' % (pkg, ver)
471 else:
472 self.need_version_info(url)
473 self.scan_url(new_url)
474
475 return PYPI_MD5.sub(
476 lambda m: '<a href="%s#md5=%s">%s</a>' % m.group(1, 3, 2), page
477 )
478
479 def need_version_info(self, url):
480 self.scan_all(
481 "Page at %s links to .py file(s) without version info; an index "
482 "scan is required.", url
483 )
484
485 def scan_all(self, msg=None, *args):
486 if self.index_url not in self.fetched_urls:
487 if msg:
488 self.warn(msg, *args)
489 self.info(
490 "Scanning index of all packages (this may take a while)"
491 )
492 self.scan_url(self.index_url)
493
494 def find_packages(self, requirement):
495 self.scan_url(self.index_url + requirement.unsafe_name + '/')
496
497 if not self.package_pages.get(requirement.key):
498 # Fall back to safe version of the name
499 self.scan_url(self.index_url + requirement.project_name + '/')
500
501 if not self.package_pages.get(requirement.key):
502 # We couldn't find the target package, so search the index page too
503 self.not_found_in_index(requirement)
504
505 for url in list(self.package_pages.get(requirement.key, ())):
506 # scan each page that might be related to the desired package
507 self.scan_url(url)
508
509 def obtain(self, requirement, installer=None):
510 self.prescan()
511 self.find_packages(requirement)
512 for dist in self[requirement.key]:
513 if dist in requirement:
514 return dist
515 self.debug("%s does not match %s", requirement, dist)
516 return super(PackageIndex, self).obtain(requirement, installer)
517
518 def check_hash(self, checker, filename, tfp):
519 """
520 checker is a ContentChecker
521 """
522 checker.report(
523 self.debug,
524 "Validating %%s checksum for %s" % filename)
525 if not checker.is_valid():
526 tfp.close()
527 os.unlink(filename)
528 raise DistutilsError(
529 "%s validation failed for %s; "
530 "possible download problem?"
531 % (checker.hash.name, os.path.basename(filename))
532 )
533
534 def add_find_links(self, urls):
535 """Add `urls` to the list that will be prescanned for searches"""
536 for url in urls:
537 if (
538 self.to_scan is None # if we have already "gone online"
539 or not URL_SCHEME(url) # or it's a local file/directory
540 or url.startswith('file:')
541 or list(distros_for_url(url)) # or a direct package link
542 ):
543 # then go ahead and process it now
544 self.scan_url(url)
545 else:
546 # otherwise, defer retrieval till later
547 self.to_scan.append(url)
548
549 def prescan(self):
550 """Scan urls scheduled for prescanning (e.g. --find-links)"""
551 if self.to_scan:
552 list(map(self.scan_url, self.to_scan))
553 self.to_scan = None # from now on, go ahead and process immediately
554
555 def not_found_in_index(self, requirement):
556 if self[requirement.key]: # we've seen at least one distro
557 meth, msg = self.info, "Couldn't retrieve index page for %r"
558 else: # no distros seen for this name, might be misspelled
559 meth, msg = (
560 self.warn,
561 "Couldn't find index page for %r (maybe misspelled?)")
562 meth(msg, requirement.unsafe_name)
563 self.scan_all()
564
565 def download(self, spec, tmpdir):
566 """Locate and/or download `spec` to `tmpdir`, returning a local path
567
568 `spec` may be a ``Requirement`` object, or a string containing a URL,
569 an existing local filename, or a project/version requirement spec
570 (i.e. the string form of a ``Requirement`` object). If it is the URL
571 of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one
572 that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is
573 automatically created alongside the downloaded file.
574
575 If `spec` is a ``Requirement`` object or a string containing a
576 project/version requirement spec, this method returns the location of
577 a matching distribution (possibly after downloading it to `tmpdir`).
578 If `spec` is a locally existing file or directory name, it is simply
579 returned unchanged. If `spec` is a URL, it is downloaded to a subpath
580 of `tmpdir`, and the local filename is returned. Various errors may be
581 raised if a problem occurs during downloading.
582 """
583 if not isinstance(spec, Requirement):
584 scheme = URL_SCHEME(spec)
585 if scheme:
586 # It's a url, download it to tmpdir
587 found = self._download_url(scheme.group(1), spec, tmpdir)
588 base, fragment = egg_info_for_url(spec)
589 if base.endswith('.py'):
590 found = self.gen_setup(found, fragment, tmpdir)
591 return found
592 elif os.path.exists(spec):
593 # Existing file or directory, just return it
594 return spec
595 else:
596 spec = parse_requirement_arg(spec)
597 return getattr(self.fetch_distribution(spec, tmpdir), 'location', None)
598
599 def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME
600 self, requirement, tmpdir, force_scan=False, source=False,
601 develop_ok=False, local_index=None):
602 """Obtain a distribution suitable for fulfilling `requirement`
603
604 `requirement` must be a ``pkg_resources.Requirement`` instance.
605 If necessary, or if the `force_scan` flag is set, the requirement is
606 searched for in the (online) package index as well as the locally
607 installed packages. If a distribution matching `requirement` is found,
608 the returned distribution's ``location`` is the value you would have
609 gotten from calling the ``download()`` method with the matching
610 distribution's URL or filename. If no matching distribution is found,
611 ``None`` is returned.
612
613 If the `source` flag is set, only source distributions and source
614 checkout links will be considered. Unless the `develop_ok` flag is
615 set, development and system eggs (i.e., those using the ``.egg-info``
616 format) will be ignored.
617 """
618 # process a Requirement
619 self.info("Searching for %s", requirement)
620 skipped = {}
621 dist = None
622
623 def find(req, env=None):
624 if env is None:
625 env = self
626 # Find a matching distribution; may be called more than once
627
628 for dist in env[req.key]:
629
630 if dist.precedence == DEVELOP_DIST and not develop_ok:
631 if dist not in skipped:
632 self.warn(
633 "Skipping development or system egg: %s", dist,
634 )
635 skipped[dist] = 1
636 continue
637
638 test = (
639 dist in req
640 and (dist.precedence <= SOURCE_DIST or not source)
641 )
642 if test:
643 loc = self.download(dist.location, tmpdir)
644 dist.download_location = loc
645 if os.path.exists(dist.download_location):
646 return dist
647
648 if force_scan:
649 self.prescan()
650 self.find_packages(requirement)
651 dist = find(requirement)
652
653 if not dist and local_index is not None:
654 dist = find(requirement, local_index)
655
656 if dist is None:
657 if self.to_scan is not None:
658 self.prescan()
659 dist = find(requirement)
660
661 if dist is None and not force_scan:
662 self.find_packages(requirement)
663 dist = find(requirement)
664
665 if dist is None:
666 self.warn(
667 "No local packages or working download links found for %s%s",
668 (source and "a source distribution of " or ""),
669 requirement,
670 )
671 else:
672 self.info("Best match: %s", dist)
673 return dist.clone(location=dist.download_location)
674
675 def fetch(self, requirement, tmpdir, force_scan=False, source=False):
676 """Obtain a file suitable for fulfilling `requirement`
677
678 DEPRECATED; use the ``fetch_distribution()`` method now instead. For
679 backward compatibility, this routine is identical but returns the
680 ``location`` of the downloaded distribution instead of a distribution
681 object.
682 """
683 dist = self.fetch_distribution(requirement, tmpdir, force_scan, source)
684 if dist is not None:
685 return dist.location
686 return None
687
688 def gen_setup(self, filename, fragment, tmpdir):
689 match = EGG_FRAGMENT.match(fragment)
690 dists = match and [
691 d for d in
692 interpret_distro_name(filename, match.group(1), None) if d.version
693 ] or []
694
695 if len(dists) == 1: # unambiguous ``#egg`` fragment
696 basename = os.path.basename(filename)
697
698 # Make sure the file has been downloaded to the temp dir.
699 if os.path.dirname(filename) != tmpdir:
700 dst = os.path.join(tmpdir, basename)
701 from setuptools.command.easy_install import samefile
702 if not samefile(filename, dst):
703 shutil.copy2(filename, dst)
704 filename = dst
705
706 with open(os.path.join(tmpdir, 'setup.py'), 'w') as file:
707 file.write(
708 "from setuptools import setup\n"
709 "setup(name=%r, version=%r, py_modules=[%r])\n"
710 % (
711 dists[0].project_name, dists[0].version,
712 os.path.splitext(basename)[0]
713 )
714 )
715 return filename
716
717 elif match:
718 raise DistutilsError(
719 "Can't unambiguously interpret project/version identifier %r; "
720 "any dashes in the name or version should be escaped using "
721 "underscores. %r" % (fragment, dists)
722 )
723 else:
724 raise DistutilsError(
725 "Can't process plain .py files without an '#egg=name-version'"
726 " suffix to enable automatic setup script generation."
727 )
728
729 dl_blocksize = 8192
730
731 def _download_to(self, url, filename):
732 self.info("Downloading %s", url)
733 # Download the file
734 fp = None
735 try:
736 checker = HashChecker.from_url(url)
737 fp = self.open_url(url)
738 if isinstance(fp, urllib.error.HTTPError):
739 raise DistutilsError(
740 "Can't download %s: %s %s" % (url, fp.code, fp.msg)
741 )
742 headers = fp.info()
743 blocknum = 0
744 bs = self.dl_blocksize
745 size = -1
746 if "content-length" in headers:
747 # Some servers return multiple Content-Length headers :(
748 sizes = headers.get_all('Content-Length')
749 size = max(map(int, sizes))
750 self.reporthook(url, filename, blocknum, bs, size)
751 with open(filename, 'wb') as tfp:
752 while True:
753 block = fp.read(bs)
754 if block:
755 checker.feed(block)
756 tfp.write(block)
757 blocknum += 1
758 self.reporthook(url, filename, blocknum, bs, size)
759 else:
760 break
761 self.check_hash(checker, filename, tfp)
762 return headers
763 finally:
764 if fp:
765 fp.close()
766
767 def reporthook(self, url, filename, blocknum, blksize, size):
768 pass # no-op
769
770 # FIXME:
771 def open_url(self, url, warning=None): # noqa: C901 # is too complex (12)
772 if url.startswith('file:'):
773 return local_open(url)
774 try:
775 return open_with_auth(url, self.opener)
776 except (ValueError, http.client.InvalidURL) as v:
777 msg = ' '.join([str(arg) for arg in v.args])
778 if warning:
779 self.warn(warning, msg)
780 else:
781 raise DistutilsError('%s %s' % (url, msg)) from v
782 except urllib.error.HTTPError as v:
783 return v
784 except urllib.error.URLError as v:
785 if warning:
786 self.warn(warning, v.reason)
787 else:
788 raise DistutilsError("Download error for %s: %s"
789 % (url, v.reason)) from v
790 except http.client.BadStatusLine as v:
791 if warning:
792 self.warn(warning, v.line)
793 else:
794 raise DistutilsError(
795 '%s returned a bad status line. The server might be '
796 'down, %s' %
797 (url, v.line)
798 ) from v
799 except (http.client.HTTPException, socket.error) as v:
800 if warning:
801 self.warn(warning, v)
802 else:
803 raise DistutilsError("Download error for %s: %s"
804 % (url, v)) from v
805
806 def _download_url(self, scheme, url, tmpdir):
807 # Determine download filename
808 #
809 name, fragment = egg_info_for_url(url)
810 if name:
811 while '..' in name:
812 name = name.replace('..', '.').replace('\\', '_')
813 else:
814 name = "__downloaded__" # default if URL has no path contents
815
816 if name.endswith('.egg.zip'):
817 name = name[:-4] # strip the extra .zip before download
818
819 filename = os.path.join(tmpdir, name)
820
821 # Download the file
822 #
823 if scheme == 'svn' or scheme.startswith('svn+'):
824 return self._download_svn(url, filename)
825 elif scheme == 'git' or scheme.startswith('git+'):
826 return self._download_git(url, filename)
827 elif scheme.startswith('hg+'):
828 return self._download_hg(url, filename)
829 elif scheme == 'file':
830 return urllib.request.url2pathname(urllib.parse.urlparse(url)[2])
831 else:
832 self.url_ok(url, True) # raises error if not allowed
833 return self._attempt_download(url, filename)
834
835 def scan_url(self, url):
836 self.process_url(url, True)
837
838 def _attempt_download(self, url, filename):
839 headers = self._download_to(url, filename)
840 if 'html' in headers.get('content-type', '').lower():
841 return self._download_html(url, headers, filename)
842 else:
843 return filename
844
845 def _download_html(self, url, headers, filename):
846 file = open(filename)
847 for line in file:
848 if line.strip():
849 # Check for a subversion index page
850 if re.search(r'<title>([^- ]+ - )?Revision \d+:', line):
851 # it's a subversion index page:
852 file.close()
853 os.unlink(filename)
854 return self._download_svn(url, filename)
855 break # not an index page
856 file.close()
857 os.unlink(filename)
858 raise DistutilsError("Unexpected HTML page found at " + url)
859
860 def _download_svn(self, url, filename):
861 warnings.warn("SVN download support is deprecated", UserWarning)
862 url = url.split('#', 1)[0] # remove any fragment for svn's sake
863 creds = ''
864 if url.lower().startswith('svn:') and '@' in url:
865 scheme, netloc, path, p, q, f = urllib.parse.urlparse(url)
866 if not netloc and path.startswith('//') and '/' in path[2:]:
867 netloc, path = path[2:].split('/', 1)
868 auth, host = _splituser(netloc)
869 if auth:
870 if ':' in auth:
871 user, pw = auth.split(':', 1)
872 creds = " --username=%s --password=%s" % (user, pw)
873 else:
874 creds = " --username=" + auth
875 netloc = host
876 parts = scheme, netloc, url, p, q, f
877 url = urllib.parse.urlunparse(parts)
878 self.info("Doing subversion checkout from %s to %s", url, filename)
879 os.system("svn checkout%s -q %s %s" % (creds, url, filename))
880 return filename
881
882 @staticmethod
883 def _vcs_split_rev_from_url(url, pop_prefix=False):
884 scheme, netloc, path, query, frag = urllib.parse.urlsplit(url)
885
886 scheme = scheme.split('+', 1)[-1]
887
888 # Some fragment identification fails
889 path = path.split('#', 1)[0]
890
891 rev = None
892 if '@' in path:
893 path, rev = path.rsplit('@', 1)
894
895 # Also, discard fragment
896 url = urllib.parse.urlunsplit((scheme, netloc, path, query, ''))
897
898 return url, rev
899
900 def _download_git(self, url, filename):
901 filename = filename.split('#', 1)[0]
902 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
903
904 self.info("Doing git clone from %s to %s", url, filename)
905 os.system("git clone --quiet %s %s" % (url, filename))
906
907 if rev is not None:
908 self.info("Checking out %s", rev)
909 os.system("git -C %s checkout --quiet %s" % (
910 filename,
911 rev,
912 ))
913
914 return filename
915
916 def _download_hg(self, url, filename):
917 filename = filename.split('#', 1)[0]
918 url, rev = self._vcs_split_rev_from_url(url, pop_prefix=True)
919
920 self.info("Doing hg clone from %s to %s", url, filename)
921 os.system("hg clone --quiet %s %s" % (url, filename))
922
923 if rev is not None:
924 self.info("Updating to %s", rev)
925 os.system("hg --cwd %s up -C -r %s -q" % (
926 filename,
927 rev,
928 ))
929
930 return filename
931
932 def debug(self, msg, *args):
933 log.debug(msg, *args)
934
935 def info(self, msg, *args):
936 log.info(msg, *args)
937
938 def warn(self, msg, *args):
939 log.warn(msg, *args)
940
941
942 # This pattern matches a character entity reference (a decimal numeric
943 # references, a hexadecimal numeric reference, or a named reference).
944 entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub
945
946
947 def decode_entity(match):
948 what = match.group(0)
949 return html.unescape(what)
950
951
952 def htmldecode(text):
953 """
954 Decode HTML entities in the given text.
955
956 >>> htmldecode(
957 ... 'https://../package_name-0.1.2.tar.gz'
958 ... '?tokena=A&amp;tokenb=B">package_name-0.1.2.tar.gz')
959 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz'
960 """
961 return entity_sub(decode_entity, text)
962
963
964 def socket_timeout(timeout=15):
965 def _socket_timeout(func):
966 def _socket_timeout(*args, **kwargs):
967 old_timeout = socket.getdefaulttimeout()
968 socket.setdefaulttimeout(timeout)
969 try:
970 return func(*args, **kwargs)
971 finally:
972 socket.setdefaulttimeout(old_timeout)
973
974 return _socket_timeout
975
976 return _socket_timeout
977
978
979 def _encode_auth(auth):
980 """
981 Encode auth from a URL suitable for an HTTP header.
982 >>> str(_encode_auth('username%3Apassword'))
983 'dXNlcm5hbWU6cGFzc3dvcmQ='
984
985 Long auth strings should not cause a newline to be inserted.
986 >>> long_auth = 'username:' + 'password'*10
987 >>> chr(10) in str(_encode_auth(long_auth))
988 False
989 """
990 auth_s = urllib.parse.unquote(auth)
991 # convert to bytes
992 auth_bytes = auth_s.encode()
993 encoded_bytes = base64.b64encode(auth_bytes)
994 # convert back to a string
995 encoded = encoded_bytes.decode()
996 # strip the trailing carriage return
997 return encoded.replace('\n', '')
998
999
1000 class Credential:
1001 """
1002 A username/password pair. Use like a namedtuple.
1003 """
1004
1005 def __init__(self, username, password):
1006 self.username = username
1007 self.password = password
1008
1009 def __iter__(self):
1010 yield self.username
1011 yield self.password
1012
1013 def __str__(self):
1014 return '%(username)s:%(password)s' % vars(self)
1015
1016
1017 class PyPIConfig(configparser.RawConfigParser):
1018 def __init__(self):
1019 """
1020 Load from ~/.pypirc
1021 """
1022 defaults = dict.fromkeys(['username', 'password', 'repository'], '')
1023 configparser.RawConfigParser.__init__(self, defaults)
1024
1025 rc = os.path.join(os.path.expanduser('~'), '.pypirc')
1026 if os.path.exists(rc):
1027 self.read(rc)
1028
1029 @property
1030 def creds_by_repository(self):
1031 sections_with_repositories = [
1032 section for section in self.sections()
1033 if self.get(section, 'repository').strip()
1034 ]
1035
1036 return dict(map(self._get_repo_cred, sections_with_repositories))
1037
1038 def _get_repo_cred(self, section):
1039 repo = self.get(section, 'repository').strip()
1040 return repo, Credential(
1041 self.get(section, 'username').strip(),
1042 self.get(section, 'password').strip(),
1043 )
1044
1045 def find_credential(self, url):
1046 """
1047 If the URL indicated appears to be a repository defined in this
1048 config, return the credential for that repository.
1049 """
1050 for repository, cred in self.creds_by_repository.items():
1051 if url.startswith(repository):
1052 return cred
1053
1054
1055 def open_with_auth(url, opener=urllib.request.urlopen):
1056 """Open a urllib2 request, handling HTTP authentication"""
1057
1058 parsed = urllib.parse.urlparse(url)
1059 scheme, netloc, path, params, query, frag = parsed
1060
1061 # Double scheme does not raise on macOS as revealed by a
1062 # failing test. We would expect "nonnumeric port". Refs #20.
1063 if netloc.endswith(':'):
1064 raise http.client.InvalidURL("nonnumeric port: ''")
1065
1066 if scheme in ('http', 'https'):
1067 auth, address = _splituser(netloc)
1068 else:
1069 auth = None
1070
1071 if not auth:
1072 cred = PyPIConfig().find_credential(url)
1073 if cred:
1074 auth = str(cred)
1075 info = cred.username, url
1076 log.info('Authenticating as %s for %s (from .pypirc)', *info)
1077
1078 if auth:
1079 auth = "Basic " + _encode_auth(auth)
1080 parts = scheme, address, path, params, query, frag
1081 new_url = urllib.parse.urlunparse(parts)
1082 request = urllib.request.Request(new_url)
1083 request.add_header("Authorization", auth)
1084 else:
1085 request = urllib.request.Request(url)
1086
1087 request.add_header('User-Agent', user_agent)
1088 fp = opener(request)
1089
1090 if auth:
1091 # Put authentication info back into request URL if same host,
1092 # so that links found on the page will work
1093 s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url)
1094 if s2 == scheme and h2 == address:
1095 parts = s2, netloc, path2, param2, query2, frag2
1096 fp.url = urllib.parse.urlunparse(parts)
1097
1098 return fp
1099
1100
1101 # copy of urllib.parse._splituser from Python 3.8
1102 def _splituser(host):
1103 """splituser('user[:passwd]@host[:port]')
1104 --> 'user[:passwd]', 'host[:port]'."""
1105 user, delim, host = host.rpartition('@')
1106 return (user if delim else None), host
1107
1108
1109 # adding a timeout to avoid freezing package_index
1110 open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth)
1111
1112
1113 def fix_sf_url(url):
1114 return url # backward compatibility
1115
1116
1117 def local_open(url):
1118 """Read a local path, with special support for directories"""
1119 scheme, server, path, param, query, frag = urllib.parse.urlparse(url)
1120 filename = urllib.request.url2pathname(path)
1121 if os.path.isfile(filename):
1122 return urllib.request.urlopen(url)
1123 elif path.endswith('/') and os.path.isdir(filename):
1124 files = []
1125 for f in os.listdir(filename):
1126 filepath = os.path.join(filename, f)
1127 if f == 'index.html':
1128 with open(filepath, 'r') as fp:
1129 body = fp.read()
1130 break
1131 elif os.path.isdir(filepath):
1132 f += '/'
1133 files.append('<a href="{name}">{name}</a>'.format(name=f))
1134 else:
1135 tmpl = (
1136 "<html><head><title>{url}</title>"
1137 "</head><body>{files}</body></html>")
1138 body = tmpl.format(url=url, files='\n'.join(files))
1139 status, message = 200, "OK"
1140 else:
1141 status, message, body = 404, "Path not found", "Not found"
1142
1143 headers = {'content-type': 'text/html'}
1144 body_stream = io.StringIO(body)
1145 return urllib.error.HTTPError(url, status, message, headers, body_stream)