comparison env/lib/python3.9/site-packages/requests/utils.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2
3 """
4 requests.utils
5 ~~~~~~~~~~~~~~
6
7 This module provides utility functions that are used within Requests
8 that are also useful for external consumption.
9 """
10
11 import codecs
12 import contextlib
13 import io
14 import os
15 import re
16 import socket
17 import struct
18 import sys
19 import tempfile
20 import warnings
21 import zipfile
22 from collections import OrderedDict
23
24 from .__version__ import __version__
25 from . import certs
26 # to_native_string is unused here, but imported here for backwards compatibility
27 from ._internal_utils import to_native_string
28 from .compat import parse_http_list as _parse_list_header
29 from .compat import (
30 quote, urlparse, bytes, str, unquote, getproxies,
31 proxy_bypass, urlunparse, basestring, integer_types, is_py3,
32 proxy_bypass_environment, getproxies_environment, Mapping)
33 from .cookies import cookiejar_from_dict
34 from .structures import CaseInsensitiveDict
35 from .exceptions import (
36 InvalidURL, InvalidHeader, FileModeWarning, UnrewindableBodyError)
37
38 NETRC_FILES = ('.netrc', '_netrc')
39
40 DEFAULT_CA_BUNDLE_PATH = certs.where()
41
42 DEFAULT_PORTS = {'http': 80, 'https': 443}
43
44
45 if sys.platform == 'win32':
46 # provide a proxy_bypass version on Windows without DNS lookups
47
48 def proxy_bypass_registry(host):
49 try:
50 if is_py3:
51 import winreg
52 else:
53 import _winreg as winreg
54 except ImportError:
55 return False
56
57 try:
58 internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER,
59 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
60 # ProxyEnable could be REG_SZ or REG_DWORD, normalizing it
61 proxyEnable = int(winreg.QueryValueEx(internetSettings,
62 'ProxyEnable')[0])
63 # ProxyOverride is almost always a string
64 proxyOverride = winreg.QueryValueEx(internetSettings,
65 'ProxyOverride')[0]
66 except OSError:
67 return False
68 if not proxyEnable or not proxyOverride:
69 return False
70
71 # make a check value list from the registry entry: replace the
72 # '<local>' string by the localhost entry and the corresponding
73 # canonical entry.
74 proxyOverride = proxyOverride.split(';')
75 # now check if we match one of the registry values.
76 for test in proxyOverride:
77 if test == '<local>':
78 if '.' not in host:
79 return True
80 test = test.replace(".", r"\.") # mask dots
81 test = test.replace("*", r".*") # change glob sequence
82 test = test.replace("?", r".") # change glob char
83 if re.match(test, host, re.I):
84 return True
85 return False
86
87 def proxy_bypass(host): # noqa
88 """Return True, if the host should be bypassed.
89
90 Checks proxy settings gathered from the environment, if specified,
91 or the registry.
92 """
93 if getproxies_environment():
94 return proxy_bypass_environment(host)
95 else:
96 return proxy_bypass_registry(host)
97
98
99 def dict_to_sequence(d):
100 """Returns an internal sequence dictionary update."""
101
102 if hasattr(d, 'items'):
103 d = d.items()
104
105 return d
106
107
108 def super_len(o):
109 total_length = None
110 current_position = 0
111
112 if hasattr(o, '__len__'):
113 total_length = len(o)
114
115 elif hasattr(o, 'len'):
116 total_length = o.len
117
118 elif hasattr(o, 'fileno'):
119 try:
120 fileno = o.fileno()
121 except io.UnsupportedOperation:
122 pass
123 else:
124 total_length = os.fstat(fileno).st_size
125
126 # Having used fstat to determine the file length, we need to
127 # confirm that this file was opened up in binary mode.
128 if 'b' not in o.mode:
129 warnings.warn((
130 "Requests has determined the content-length for this "
131 "request using the binary size of the file: however, the "
132 "file has been opened in text mode (i.e. without the 'b' "
133 "flag in the mode). This may lead to an incorrect "
134 "content-length. In Requests 3.0, support will be removed "
135 "for files in text mode."),
136 FileModeWarning
137 )
138
139 if hasattr(o, 'tell'):
140 try:
141 current_position = o.tell()
142 except (OSError, IOError):
143 # This can happen in some weird situations, such as when the file
144 # is actually a special file descriptor like stdin. In this
145 # instance, we don't know what the length is, so set it to zero and
146 # let requests chunk it instead.
147 if total_length is not None:
148 current_position = total_length
149 else:
150 if hasattr(o, 'seek') and total_length is None:
151 # StringIO and BytesIO have seek but no useable fileno
152 try:
153 # seek to end of file
154 o.seek(0, 2)
155 total_length = o.tell()
156
157 # seek back to current position to support
158 # partially read file-like objects
159 o.seek(current_position or 0)
160 except (OSError, IOError):
161 total_length = 0
162
163 if total_length is None:
164 total_length = 0
165
166 return max(0, total_length - current_position)
167
168
169 def get_netrc_auth(url, raise_errors=False):
170 """Returns the Requests tuple auth for a given url from netrc."""
171
172 netrc_file = os.environ.get('NETRC')
173 if netrc_file is not None:
174 netrc_locations = (netrc_file,)
175 else:
176 netrc_locations = ('~/{}'.format(f) for f in NETRC_FILES)
177
178 try:
179 from netrc import netrc, NetrcParseError
180
181 netrc_path = None
182
183 for f in netrc_locations:
184 try:
185 loc = os.path.expanduser(f)
186 except KeyError:
187 # os.path.expanduser can fail when $HOME is undefined and
188 # getpwuid fails. See https://bugs.python.org/issue20164 &
189 # https://github.com/psf/requests/issues/1846
190 return
191
192 if os.path.exists(loc):
193 netrc_path = loc
194 break
195
196 # Abort early if there isn't one.
197 if netrc_path is None:
198 return
199
200 ri = urlparse(url)
201
202 # Strip port numbers from netloc. This weird `if...encode`` dance is
203 # used for Python 3.2, which doesn't support unicode literals.
204 splitstr = b':'
205 if isinstance(url, str):
206 splitstr = splitstr.decode('ascii')
207 host = ri.netloc.split(splitstr)[0]
208
209 try:
210 _netrc = netrc(netrc_path).authenticators(host)
211 if _netrc:
212 # Return with login / password
213 login_i = (0 if _netrc[0] else 1)
214 return (_netrc[login_i], _netrc[2])
215 except (NetrcParseError, IOError):
216 # If there was a parsing error or a permissions issue reading the file,
217 # we'll just skip netrc auth unless explicitly asked to raise errors.
218 if raise_errors:
219 raise
220
221 # App Engine hackiness.
222 except (ImportError, AttributeError):
223 pass
224
225
226 def guess_filename(obj):
227 """Tries to guess the filename of the given object."""
228 name = getattr(obj, 'name', None)
229 if (name and isinstance(name, basestring) and name[0] != '<' and
230 name[-1] != '>'):
231 return os.path.basename(name)
232
233
234 def extract_zipped_paths(path):
235 """Replace nonexistent paths that look like they refer to a member of a zip
236 archive with the location of an extracted copy of the target, or else
237 just return the provided path unchanged.
238 """
239 if os.path.exists(path):
240 # this is already a valid path, no need to do anything further
241 return path
242
243 # find the first valid part of the provided path and treat that as a zip archive
244 # assume the rest of the path is the name of a member in the archive
245 archive, member = os.path.split(path)
246 while archive and not os.path.exists(archive):
247 archive, prefix = os.path.split(archive)
248 member = '/'.join([prefix, member])
249
250 if not zipfile.is_zipfile(archive):
251 return path
252
253 zip_file = zipfile.ZipFile(archive)
254 if member not in zip_file.namelist():
255 return path
256
257 # we have a valid zip archive and a valid member of that archive
258 tmp = tempfile.gettempdir()
259 extracted_path = os.path.join(tmp, *member.split('/'))
260 if not os.path.exists(extracted_path):
261 extracted_path = zip_file.extract(member, path=tmp)
262
263 return extracted_path
264
265
266 def from_key_val_list(value):
267 """Take an object and test to see if it can be represented as a
268 dictionary. Unless it can not be represented as such, return an
269 OrderedDict, e.g.,
270
271 ::
272
273 >>> from_key_val_list([('key', 'val')])
274 OrderedDict([('key', 'val')])
275 >>> from_key_val_list('string')
276 Traceback (most recent call last):
277 ...
278 ValueError: cannot encode objects that are not 2-tuples
279 >>> from_key_val_list({'key': 'val'})
280 OrderedDict([('key', 'val')])
281
282 :rtype: OrderedDict
283 """
284 if value is None:
285 return None
286
287 if isinstance(value, (str, bytes, bool, int)):
288 raise ValueError('cannot encode objects that are not 2-tuples')
289
290 return OrderedDict(value)
291
292
293 def to_key_val_list(value):
294 """Take an object and test to see if it can be represented as a
295 dictionary. If it can be, return a list of tuples, e.g.,
296
297 ::
298
299 >>> to_key_val_list([('key', 'val')])
300 [('key', 'val')]
301 >>> to_key_val_list({'key': 'val'})
302 [('key', 'val')]
303 >>> to_key_val_list('string')
304 Traceback (most recent call last):
305 ...
306 ValueError: cannot encode objects that are not 2-tuples
307
308 :rtype: list
309 """
310 if value is None:
311 return None
312
313 if isinstance(value, (str, bytes, bool, int)):
314 raise ValueError('cannot encode objects that are not 2-tuples')
315
316 if isinstance(value, Mapping):
317 value = value.items()
318
319 return list(value)
320
321
322 # From mitsuhiko/werkzeug (used with permission).
323 def parse_list_header(value):
324 """Parse lists as described by RFC 2068 Section 2.
325
326 In particular, parse comma-separated lists where the elements of
327 the list may include quoted-strings. A quoted-string could
328 contain a comma. A non-quoted string could have quotes in the
329 middle. Quotes are removed automatically after parsing.
330
331 It basically works like :func:`parse_set_header` just that items
332 may appear multiple times and case sensitivity is preserved.
333
334 The return value is a standard :class:`list`:
335
336 >>> parse_list_header('token, "quoted value"')
337 ['token', 'quoted value']
338
339 To create a header from the :class:`list` again, use the
340 :func:`dump_header` function.
341
342 :param value: a string with a list header.
343 :return: :class:`list`
344 :rtype: list
345 """
346 result = []
347 for item in _parse_list_header(value):
348 if item[:1] == item[-1:] == '"':
349 item = unquote_header_value(item[1:-1])
350 result.append(item)
351 return result
352
353
354 # From mitsuhiko/werkzeug (used with permission).
355 def parse_dict_header(value):
356 """Parse lists of key, value pairs as described by RFC 2068 Section 2 and
357 convert them into a python dict:
358
359 >>> d = parse_dict_header('foo="is a fish", bar="as well"')
360 >>> type(d) is dict
361 True
362 >>> sorted(d.items())
363 [('bar', 'as well'), ('foo', 'is a fish')]
364
365 If there is no value for a key it will be `None`:
366
367 >>> parse_dict_header('key_without_value')
368 {'key_without_value': None}
369
370 To create a header from the :class:`dict` again, use the
371 :func:`dump_header` function.
372
373 :param value: a string with a dict header.
374 :return: :class:`dict`
375 :rtype: dict
376 """
377 result = {}
378 for item in _parse_list_header(value):
379 if '=' not in item:
380 result[item] = None
381 continue
382 name, value = item.split('=', 1)
383 if value[:1] == value[-1:] == '"':
384 value = unquote_header_value(value[1:-1])
385 result[name] = value
386 return result
387
388
389 # From mitsuhiko/werkzeug (used with permission).
390 def unquote_header_value(value, is_filename=False):
391 r"""Unquotes a header value. (Reversal of :func:`quote_header_value`).
392 This does not use the real unquoting but what browsers are actually
393 using for quoting.
394
395 :param value: the header value to unquote.
396 :rtype: str
397 """
398 if value and value[0] == value[-1] == '"':
399 # this is not the real unquoting, but fixing this so that the
400 # RFC is met will result in bugs with internet explorer and
401 # probably some other browsers as well. IE for example is
402 # uploading files with "C:\foo\bar.txt" as filename
403 value = value[1:-1]
404
405 # if this is a filename and the starting characters look like
406 # a UNC path, then just return the value without quotes. Using the
407 # replace sequence below on a UNC path has the effect of turning
408 # the leading double slash into a single slash and then
409 # _fix_ie_filename() doesn't work correctly. See #458.
410 if not is_filename or value[:2] != '\\\\':
411 return value.replace('\\\\', '\\').replace('\\"', '"')
412 return value
413
414
415 def dict_from_cookiejar(cj):
416 """Returns a key/value dictionary from a CookieJar.
417
418 :param cj: CookieJar object to extract cookies from.
419 :rtype: dict
420 """
421
422 cookie_dict = {}
423
424 for cookie in cj:
425 cookie_dict[cookie.name] = cookie.value
426
427 return cookie_dict
428
429
430 def add_dict_to_cookiejar(cj, cookie_dict):
431 """Returns a CookieJar from a key/value dictionary.
432
433 :param cj: CookieJar to insert cookies into.
434 :param cookie_dict: Dict of key/values to insert into CookieJar.
435 :rtype: CookieJar
436 """
437
438 return cookiejar_from_dict(cookie_dict, cj)
439
440
441 def get_encodings_from_content(content):
442 """Returns encodings from given content string.
443
444 :param content: bytestring to extract encodings from.
445 """
446 warnings.warn((
447 'In requests 3.0, get_encodings_from_content will be removed. For '
448 'more information, please see the discussion on issue #2266. (This'
449 ' warning should only appear once.)'),
450 DeprecationWarning)
451
452 charset_re = re.compile(r'<meta.*?charset=["\']*(.+?)["\'>]', flags=re.I)
453 pragma_re = re.compile(r'<meta.*?content=["\']*;?charset=(.+?)["\'>]', flags=re.I)
454 xml_re = re.compile(r'^<\?xml.*?encoding=["\']*(.+?)["\'>]')
455
456 return (charset_re.findall(content) +
457 pragma_re.findall(content) +
458 xml_re.findall(content))
459
460
461 def _parse_content_type_header(header):
462 """Returns content type and parameters from given header
463
464 :param header: string
465 :return: tuple containing content type and dictionary of
466 parameters
467 """
468
469 tokens = header.split(';')
470 content_type, params = tokens[0].strip(), tokens[1:]
471 params_dict = {}
472 items_to_strip = "\"' "
473
474 for param in params:
475 param = param.strip()
476 if param:
477 key, value = param, True
478 index_of_equals = param.find("=")
479 if index_of_equals != -1:
480 key = param[:index_of_equals].strip(items_to_strip)
481 value = param[index_of_equals + 1:].strip(items_to_strip)
482 params_dict[key.lower()] = value
483 return content_type, params_dict
484
485
486 def get_encoding_from_headers(headers):
487 """Returns encodings from given HTTP Header Dict.
488
489 :param headers: dictionary to extract encoding from.
490 :rtype: str
491 """
492
493 content_type = headers.get('content-type')
494
495 if not content_type:
496 return None
497
498 content_type, params = _parse_content_type_header(content_type)
499
500 if 'charset' in params:
501 return params['charset'].strip("'\"")
502
503 if 'text' in content_type:
504 return 'ISO-8859-1'
505
506 if 'application/json' in content_type:
507 # Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
508 return 'utf-8'
509
510
511 def stream_decode_response_unicode(iterator, r):
512 """Stream decodes a iterator."""
513
514 if r.encoding is None:
515 for item in iterator:
516 yield item
517 return
518
519 decoder = codecs.getincrementaldecoder(r.encoding)(errors='replace')
520 for chunk in iterator:
521 rv = decoder.decode(chunk)
522 if rv:
523 yield rv
524 rv = decoder.decode(b'', final=True)
525 if rv:
526 yield rv
527
528
529 def iter_slices(string, slice_length):
530 """Iterate over slices of a string."""
531 pos = 0
532 if slice_length is None or slice_length <= 0:
533 slice_length = len(string)
534 while pos < len(string):
535 yield string[pos:pos + slice_length]
536 pos += slice_length
537
538
539 def get_unicode_from_response(r):
540 """Returns the requested content back in unicode.
541
542 :param r: Response object to get unicode content from.
543
544 Tried:
545
546 1. charset from content-type
547 2. fall back and replace all unicode characters
548
549 :rtype: str
550 """
551 warnings.warn((
552 'In requests 3.0, get_unicode_from_response will be removed. For '
553 'more information, please see the discussion on issue #2266. (This'
554 ' warning should only appear once.)'),
555 DeprecationWarning)
556
557 tried_encodings = []
558
559 # Try charset from content-type
560 encoding = get_encoding_from_headers(r.headers)
561
562 if encoding:
563 try:
564 return str(r.content, encoding)
565 except UnicodeError:
566 tried_encodings.append(encoding)
567
568 # Fall back:
569 try:
570 return str(r.content, encoding, errors='replace')
571 except TypeError:
572 return r.content
573
574
575 # The unreserved URI characters (RFC 3986)
576 UNRESERVED_SET = frozenset(
577 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + "0123456789-._~")
578
579
580 def unquote_unreserved(uri):
581 """Un-escape any percent-escape sequences in a URI that are unreserved
582 characters. This leaves all reserved, illegal and non-ASCII bytes encoded.
583
584 :rtype: str
585 """
586 parts = uri.split('%')
587 for i in range(1, len(parts)):
588 h = parts[i][0:2]
589 if len(h) == 2 and h.isalnum():
590 try:
591 c = chr(int(h, 16))
592 except ValueError:
593 raise InvalidURL("Invalid percent-escape sequence: '%s'" % h)
594
595 if c in UNRESERVED_SET:
596 parts[i] = c + parts[i][2:]
597 else:
598 parts[i] = '%' + parts[i]
599 else:
600 parts[i] = '%' + parts[i]
601 return ''.join(parts)
602
603
604 def requote_uri(uri):
605 """Re-quote the given URI.
606
607 This function passes the given URI through an unquote/quote cycle to
608 ensure that it is fully and consistently quoted.
609
610 :rtype: str
611 """
612 safe_with_percent = "!#$%&'()*+,/:;=?@[]~"
613 safe_without_percent = "!#$&'()*+,/:;=?@[]~"
614 try:
615 # Unquote only the unreserved characters
616 # Then quote only illegal characters (do not quote reserved,
617 # unreserved, or '%')
618 return quote(unquote_unreserved(uri), safe=safe_with_percent)
619 except InvalidURL:
620 # We couldn't unquote the given URI, so let's try quoting it, but
621 # there may be unquoted '%'s in the URI. We need to make sure they're
622 # properly quoted so they do not cause issues elsewhere.
623 return quote(uri, safe=safe_without_percent)
624
625
626 def address_in_network(ip, net):
627 """This function allows you to check if an IP belongs to a network subnet
628
629 Example: returns True if ip = 192.168.1.1 and net = 192.168.1.0/24
630 returns False if ip = 192.168.1.1 and net = 192.168.100.0/24
631
632 :rtype: bool
633 """
634 ipaddr = struct.unpack('=L', socket.inet_aton(ip))[0]
635 netaddr, bits = net.split('/')
636 netmask = struct.unpack('=L', socket.inet_aton(dotted_netmask(int(bits))))[0]
637 network = struct.unpack('=L', socket.inet_aton(netaddr))[0] & netmask
638 return (ipaddr & netmask) == (network & netmask)
639
640
641 def dotted_netmask(mask):
642 """Converts mask from /xx format to xxx.xxx.xxx.xxx
643
644 Example: if mask is 24 function returns 255.255.255.0
645
646 :rtype: str
647 """
648 bits = 0xffffffff ^ (1 << 32 - mask) - 1
649 return socket.inet_ntoa(struct.pack('>I', bits))
650
651
652 def is_ipv4_address(string_ip):
653 """
654 :rtype: bool
655 """
656 try:
657 socket.inet_aton(string_ip)
658 except socket.error:
659 return False
660 return True
661
662
663 def is_valid_cidr(string_network):
664 """
665 Very simple check of the cidr format in no_proxy variable.
666
667 :rtype: bool
668 """
669 if string_network.count('/') == 1:
670 try:
671 mask = int(string_network.split('/')[1])
672 except ValueError:
673 return False
674
675 if mask < 1 or mask > 32:
676 return False
677
678 try:
679 socket.inet_aton(string_network.split('/')[0])
680 except socket.error:
681 return False
682 else:
683 return False
684 return True
685
686
687 @contextlib.contextmanager
688 def set_environ(env_name, value):
689 """Set the environment variable 'env_name' to 'value'
690
691 Save previous value, yield, and then restore the previous value stored in
692 the environment variable 'env_name'.
693
694 If 'value' is None, do nothing"""
695 value_changed = value is not None
696 if value_changed:
697 old_value = os.environ.get(env_name)
698 os.environ[env_name] = value
699 try:
700 yield
701 finally:
702 if value_changed:
703 if old_value is None:
704 del os.environ[env_name]
705 else:
706 os.environ[env_name] = old_value
707
708
709 def should_bypass_proxies(url, no_proxy):
710 """
711 Returns whether we should bypass proxies or not.
712
713 :rtype: bool
714 """
715 # Prioritize lowercase environment variables over uppercase
716 # to keep a consistent behaviour with other http projects (curl, wget).
717 get_proxy = lambda k: os.environ.get(k) or os.environ.get(k.upper())
718
719 # First check whether no_proxy is defined. If it is, check that the URL
720 # we're getting isn't in the no_proxy list.
721 no_proxy_arg = no_proxy
722 if no_proxy is None:
723 no_proxy = get_proxy('no_proxy')
724 parsed = urlparse(url)
725
726 if parsed.hostname is None:
727 # URLs don't always have hostnames, e.g. file:/// urls.
728 return True
729
730 if no_proxy:
731 # We need to check whether we match here. We need to see if we match
732 # the end of the hostname, both with and without the port.
733 no_proxy = (
734 host for host in no_proxy.replace(' ', '').split(',') if host
735 )
736
737 if is_ipv4_address(parsed.hostname):
738 for proxy_ip in no_proxy:
739 if is_valid_cidr(proxy_ip):
740 if address_in_network(parsed.hostname, proxy_ip):
741 return True
742 elif parsed.hostname == proxy_ip:
743 # If no_proxy ip was defined in plain IP notation instead of cidr notation &
744 # matches the IP of the index
745 return True
746 else:
747 host_with_port = parsed.hostname
748 if parsed.port:
749 host_with_port += ':{}'.format(parsed.port)
750
751 for host in no_proxy:
752 if parsed.hostname.endswith(host) or host_with_port.endswith(host):
753 # The URL does match something in no_proxy, so we don't want
754 # to apply the proxies on this URL.
755 return True
756
757 with set_environ('no_proxy', no_proxy_arg):
758 # parsed.hostname can be `None` in cases such as a file URI.
759 try:
760 bypass = proxy_bypass(parsed.hostname)
761 except (TypeError, socket.gaierror):
762 bypass = False
763
764 if bypass:
765 return True
766
767 return False
768
769
770 def get_environ_proxies(url, no_proxy=None):
771 """
772 Return a dict of environment proxies.
773
774 :rtype: dict
775 """
776 if should_bypass_proxies(url, no_proxy=no_proxy):
777 return {}
778 else:
779 return getproxies()
780
781
782 def select_proxy(url, proxies):
783 """Select a proxy for the url, if applicable.
784
785 :param url: The url being for the request
786 :param proxies: A dictionary of schemes or schemes and hosts to proxy URLs
787 """
788 proxies = proxies or {}
789 urlparts = urlparse(url)
790 if urlparts.hostname is None:
791 return proxies.get(urlparts.scheme, proxies.get('all'))
792
793 proxy_keys = [
794 urlparts.scheme + '://' + urlparts.hostname,
795 urlparts.scheme,
796 'all://' + urlparts.hostname,
797 'all',
798 ]
799 proxy = None
800 for proxy_key in proxy_keys:
801 if proxy_key in proxies:
802 proxy = proxies[proxy_key]
803 break
804
805 return proxy
806
807
808 def default_user_agent(name="python-requests"):
809 """
810 Return a string representing the default user agent.
811
812 :rtype: str
813 """
814 return '%s/%s' % (name, __version__)
815
816
817 def default_headers():
818 """
819 :rtype: requests.structures.CaseInsensitiveDict
820 """
821 return CaseInsensitiveDict({
822 'User-Agent': default_user_agent(),
823 'Accept-Encoding': ', '.join(('gzip', 'deflate')),
824 'Accept': '*/*',
825 'Connection': 'keep-alive',
826 })
827
828
829 def parse_header_links(value):
830 """Return a list of parsed link headers proxies.
831
832 i.e. Link: <http:/.../front.jpeg>; rel=front; type="image/jpeg",<http://.../back.jpeg>; rel=back;type="image/jpeg"
833
834 :rtype: list
835 """
836
837 links = []
838
839 replace_chars = ' \'"'
840
841 value = value.strip(replace_chars)
842 if not value:
843 return links
844
845 for val in re.split(', *<', value):
846 try:
847 url, params = val.split(';', 1)
848 except ValueError:
849 url, params = val, ''
850
851 link = {'url': url.strip('<> \'"')}
852
853 for param in params.split(';'):
854 try:
855 key, value = param.split('=')
856 except ValueError:
857 break
858
859 link[key.strip(replace_chars)] = value.strip(replace_chars)
860
861 links.append(link)
862
863 return links
864
865
866 # Null bytes; no need to recreate these on each call to guess_json_utf
867 _null = '\x00'.encode('ascii') # encoding to ASCII for Python 3
868 _null2 = _null * 2
869 _null3 = _null * 3
870
871
872 def guess_json_utf(data):
873 """
874 :rtype: str
875 """
876 # JSON always starts with two ASCII characters, so detection is as
877 # easy as counting the nulls and from their location and count
878 # determine the encoding. Also detect a BOM, if present.
879 sample = data[:4]
880 if sample in (codecs.BOM_UTF32_LE, codecs.BOM_UTF32_BE):
881 return 'utf-32' # BOM included
882 if sample[:3] == codecs.BOM_UTF8:
883 return 'utf-8-sig' # BOM included, MS style (discouraged)
884 if sample[:2] in (codecs.BOM_UTF16_LE, codecs.BOM_UTF16_BE):
885 return 'utf-16' # BOM included
886 nullcount = sample.count(_null)
887 if nullcount == 0:
888 return 'utf-8'
889 if nullcount == 2:
890 if sample[::2] == _null2: # 1st and 3rd are null
891 return 'utf-16-be'
892 if sample[1::2] == _null2: # 2nd and 4th are null
893 return 'utf-16-le'
894 # Did not detect 2 valid UTF-16 ascii-range characters
895 if nullcount == 3:
896 if sample[:3] == _null3:
897 return 'utf-32-be'
898 if sample[1:] == _null3:
899 return 'utf-32-le'
900 # Did not detect a valid UTF-32 ascii-range character
901 return None
902
903
904 def prepend_scheme_if_needed(url, new_scheme):
905 """Given a URL that may or may not have a scheme, prepend the given scheme.
906 Does not replace a present scheme with the one provided as an argument.
907
908 :rtype: str
909 """
910 scheme, netloc, path, params, query, fragment = urlparse(url, new_scheme)
911
912 # urlparse is a finicky beast, and sometimes decides that there isn't a
913 # netloc present. Assume that it's being over-cautious, and switch netloc
914 # and path if urlparse decided there was no netloc.
915 if not netloc:
916 netloc, path = path, netloc
917
918 return urlunparse((scheme, netloc, path, params, query, fragment))
919
920
921 def get_auth_from_url(url):
922 """Given a url with authentication components, extract them into a tuple of
923 username,password.
924
925 :rtype: (str,str)
926 """
927 parsed = urlparse(url)
928
929 try:
930 auth = (unquote(parsed.username), unquote(parsed.password))
931 except (AttributeError, TypeError):
932 auth = ('', '')
933
934 return auth
935
936
937 # Moved outside of function to avoid recompile every call
938 _CLEAN_HEADER_REGEX_BYTE = re.compile(b'^\\S[^\\r\\n]*$|^$')
939 _CLEAN_HEADER_REGEX_STR = re.compile(r'^\S[^\r\n]*$|^$')
940
941
942 def check_header_validity(header):
943 """Verifies that header value is a string which doesn't contain
944 leading whitespace or return characters. This prevents unintended
945 header injection.
946
947 :param header: tuple, in the format (name, value).
948 """
949 name, value = header
950
951 if isinstance(value, bytes):
952 pat = _CLEAN_HEADER_REGEX_BYTE
953 else:
954 pat = _CLEAN_HEADER_REGEX_STR
955 try:
956 if not pat.match(value):
957 raise InvalidHeader("Invalid return character or leading space in header: %s" % name)
958 except TypeError:
959 raise InvalidHeader("Value for header {%s: %s} must be of type str or "
960 "bytes, not %s" % (name, value, type(value)))
961
962
963 def urldefragauth(url):
964 """
965 Given a url remove the fragment and the authentication part.
966
967 :rtype: str
968 """
969 scheme, netloc, path, params, query, fragment = urlparse(url)
970
971 # see func:`prepend_scheme_if_needed`
972 if not netloc:
973 netloc, path = path, netloc
974
975 netloc = netloc.rsplit('@', 1)[-1]
976
977 return urlunparse((scheme, netloc, path, params, query, ''))
978
979
980 def rewind_body(prepared_request):
981 """Move file pointer back to its recorded starting position
982 so it can be read again on redirect.
983 """
984 body_seek = getattr(prepared_request.body, 'seek', None)
985 if body_seek is not None and isinstance(prepared_request._body_position, integer_types):
986 try:
987 body_seek(prepared_request._body_position)
988 except (IOError, OSError):
989 raise UnrewindableBodyError("An error occurred when rewinding request "
990 "body for redirect.")
991 else:
992 raise UnrewindableBodyError("Unable to rewind request body for redirect.")