comparison env/lib/python3.9/site-packages/urllib3/poolmanager.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import
2
3 import collections
4 import functools
5 import logging
6
7 from ._collections import RecentlyUsedContainer
8 from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
9 from .exceptions import (
10 LocationValueError,
11 MaxRetryError,
12 ProxySchemeUnknown,
13 ProxySchemeUnsupported,
14 URLSchemeUnknown,
15 )
16 from .packages import six
17 from .packages.six.moves.urllib.parse import urljoin
18 from .request import RequestMethods
19 from .util.proxy import connection_requires_http_tunnel
20 from .util.retry import Retry
21 from .util.url import parse_url
22
23 __all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
24
25
26 log = logging.getLogger(__name__)
27
28 SSL_KEYWORDS = (
29 "key_file",
30 "cert_file",
31 "cert_reqs",
32 "ca_certs",
33 "ssl_version",
34 "ca_cert_dir",
35 "ssl_context",
36 "key_password",
37 )
38
39 # All known keyword arguments that could be provided to the pool manager, its
40 # pools, or the underlying connections. This is used to construct a pool key.
41 _key_fields = (
42 "key_scheme", # str
43 "key_host", # str
44 "key_port", # int
45 "key_timeout", # int or float or Timeout
46 "key_retries", # int or Retry
47 "key_strict", # bool
48 "key_block", # bool
49 "key_source_address", # str
50 "key_key_file", # str
51 "key_key_password", # str
52 "key_cert_file", # str
53 "key_cert_reqs", # str
54 "key_ca_certs", # str
55 "key_ssl_version", # str
56 "key_ca_cert_dir", # str
57 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext
58 "key_maxsize", # int
59 "key_headers", # dict
60 "key__proxy", # parsed proxy url
61 "key__proxy_headers", # dict
62 "key__proxy_config", # class
63 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples
64 "key__socks_options", # dict
65 "key_assert_hostname", # bool or string
66 "key_assert_fingerprint", # str
67 "key_server_hostname", # str
68 )
69
70 #: The namedtuple class used to construct keys for the connection pool.
71 #: All custom key schemes should include the fields in this key at a minimum.
72 PoolKey = collections.namedtuple("PoolKey", _key_fields)
73
74 _proxy_config_fields = ("ssl_context", "use_forwarding_for_https")
75 ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields)
76
77
78 def _default_key_normalizer(key_class, request_context):
79 """
80 Create a pool key out of a request context dictionary.
81
82 According to RFC 3986, both the scheme and host are case-insensitive.
83 Therefore, this function normalizes both before constructing the pool
84 key for an HTTPS request. If you wish to change this behaviour, provide
85 alternate callables to ``key_fn_by_scheme``.
86
87 :param key_class:
88 The class to use when constructing the key. This should be a namedtuple
89 with the ``scheme`` and ``host`` keys at a minimum.
90 :type key_class: namedtuple
91 :param request_context:
92 A dictionary-like object that contain the context for a request.
93 :type request_context: dict
94
95 :return: A namedtuple that can be used as a connection pool key.
96 :rtype: PoolKey
97 """
98 # Since we mutate the dictionary, make a copy first
99 context = request_context.copy()
100 context["scheme"] = context["scheme"].lower()
101 context["host"] = context["host"].lower()
102
103 # These are both dictionaries and need to be transformed into frozensets
104 for key in ("headers", "_proxy_headers", "_socks_options"):
105 if key in context and context[key] is not None:
106 context[key] = frozenset(context[key].items())
107
108 # The socket_options key may be a list and needs to be transformed into a
109 # tuple.
110 socket_opts = context.get("socket_options")
111 if socket_opts is not None:
112 context["socket_options"] = tuple(socket_opts)
113
114 # Map the kwargs to the names in the namedtuple - this is necessary since
115 # namedtuples can't have fields starting with '_'.
116 for key in list(context.keys()):
117 context["key_" + key] = context.pop(key)
118
119 # Default to ``None`` for keys missing from the context
120 for field in key_class._fields:
121 if field not in context:
122 context[field] = None
123
124 return key_class(**context)
125
126
127 #: A dictionary that maps a scheme to a callable that creates a pool key.
128 #: This can be used to alter the way pool keys are constructed, if desired.
129 #: Each PoolManager makes a copy of this dictionary so they can be configured
130 #: globally here, or individually on the instance.
131 key_fn_by_scheme = {
132 "http": functools.partial(_default_key_normalizer, PoolKey),
133 "https": functools.partial(_default_key_normalizer, PoolKey),
134 }
135
136 pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
137
138
139 class PoolManager(RequestMethods):
140 """
141 Allows for arbitrary requests while transparently keeping track of
142 necessary connection pools for you.
143
144 :param num_pools:
145 Number of connection pools to cache before discarding the least
146 recently used pool.
147
148 :param headers:
149 Headers to include with all requests, unless other headers are given
150 explicitly.
151
152 :param \\**connection_pool_kw:
153 Additional parameters are used to create fresh
154 :class:`urllib3.connectionpool.ConnectionPool` instances.
155
156 Example::
157
158 >>> manager = PoolManager(num_pools=2)
159 >>> r = manager.request('GET', 'http://google.com/')
160 >>> r = manager.request('GET', 'http://google.com/mail')
161 >>> r = manager.request('GET', 'http://yahoo.com/')
162 >>> len(manager.pools)
163 2
164
165 """
166
167 proxy = None
168 proxy_config = None
169
170 def __init__(self, num_pools=10, headers=None, **connection_pool_kw):
171 RequestMethods.__init__(self, headers)
172 self.connection_pool_kw = connection_pool_kw
173 self.pools = RecentlyUsedContainer(num_pools, dispose_func=lambda p: p.close())
174
175 # Locally set the pool classes and keys so other PoolManagers can
176 # override them.
177 self.pool_classes_by_scheme = pool_classes_by_scheme
178 self.key_fn_by_scheme = key_fn_by_scheme.copy()
179
180 def __enter__(self):
181 return self
182
183 def __exit__(self, exc_type, exc_val, exc_tb):
184 self.clear()
185 # Return False to re-raise any potential exceptions
186 return False
187
188 def _new_pool(self, scheme, host, port, request_context=None):
189 """
190 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
191 any additional pool keyword arguments.
192
193 If ``request_context`` is provided, it is provided as keyword arguments
194 to the pool class used. This method is used to actually create the
195 connection pools handed out by :meth:`connection_from_url` and
196 companion methods. It is intended to be overridden for customization.
197 """
198 pool_cls = self.pool_classes_by_scheme[scheme]
199 if request_context is None:
200 request_context = self.connection_pool_kw.copy()
201
202 # Although the context has everything necessary to create the pool,
203 # this function has historically only used the scheme, host, and port
204 # in the positional args. When an API change is acceptable these can
205 # be removed.
206 for key in ("scheme", "host", "port"):
207 request_context.pop(key, None)
208
209 if scheme == "http":
210 for kw in SSL_KEYWORDS:
211 request_context.pop(kw, None)
212
213 return pool_cls(host, port, **request_context)
214
215 def clear(self):
216 """
217 Empty our store of pools and direct them all to close.
218
219 This will not affect in-flight connections, but they will not be
220 re-used after completion.
221 """
222 self.pools.clear()
223
224 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
225 """
226 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
227
228 If ``port`` isn't given, it will be derived from the ``scheme`` using
229 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
230 provided, it is merged with the instance's ``connection_pool_kw``
231 variable and used to create the new connection pool, if one is
232 needed.
233 """
234
235 if not host:
236 raise LocationValueError("No host specified.")
237
238 request_context = self._merge_pool_kwargs(pool_kwargs)
239 request_context["scheme"] = scheme or "http"
240 if not port:
241 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
242 request_context["port"] = port
243 request_context["host"] = host
244
245 return self.connection_from_context(request_context)
246
247 def connection_from_context(self, request_context):
248 """
249 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
250
251 ``request_context`` must at least contain the ``scheme`` key and its
252 value must be a key in ``key_fn_by_scheme`` instance variable.
253 """
254 scheme = request_context["scheme"].lower()
255 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
256 if not pool_key_constructor:
257 raise URLSchemeUnknown(scheme)
258 pool_key = pool_key_constructor(request_context)
259
260 return self.connection_from_pool_key(pool_key, request_context=request_context)
261
262 def connection_from_pool_key(self, pool_key, request_context=None):
263 """
264 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
265
266 ``pool_key`` should be a namedtuple that only contains immutable
267 objects. At a minimum it must have the ``scheme``, ``host``, and
268 ``port`` fields.
269 """
270 with self.pools.lock:
271 # If the scheme, host, or port doesn't match existing open
272 # connections, open a new ConnectionPool.
273 pool = self.pools.get(pool_key)
274 if pool:
275 return pool
276
277 # Make a fresh ConnectionPool of the desired type
278 scheme = request_context["scheme"]
279 host = request_context["host"]
280 port = request_context["port"]
281 pool = self._new_pool(scheme, host, port, request_context=request_context)
282 self.pools[pool_key] = pool
283
284 return pool
285
286 def connection_from_url(self, url, pool_kwargs=None):
287 """
288 Similar to :func:`urllib3.connectionpool.connection_from_url`.
289
290 If ``pool_kwargs`` is not provided and a new pool needs to be
291 constructed, ``self.connection_pool_kw`` is used to initialize
292 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
293 is provided, it is used instead. Note that if a new pool does not
294 need to be created for the request, the provided ``pool_kwargs`` are
295 not used.
296 """
297 u = parse_url(url)
298 return self.connection_from_host(
299 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
300 )
301
302 def _merge_pool_kwargs(self, override):
303 """
304 Merge a dictionary of override values for self.connection_pool_kw.
305
306 This does not modify self.connection_pool_kw and returns a new dict.
307 Any keys in the override dictionary with a value of ``None`` are
308 removed from the merged dictionary.
309 """
310 base_pool_kwargs = self.connection_pool_kw.copy()
311 if override:
312 for key, value in override.items():
313 if value is None:
314 try:
315 del base_pool_kwargs[key]
316 except KeyError:
317 pass
318 else:
319 base_pool_kwargs[key] = value
320 return base_pool_kwargs
321
322 def _proxy_requires_url_absolute_form(self, parsed_url):
323 """
324 Indicates if the proxy requires the complete destination URL in the
325 request. Normally this is only needed when not using an HTTP CONNECT
326 tunnel.
327 """
328 if self.proxy is None:
329 return False
330
331 return not connection_requires_http_tunnel(
332 self.proxy, self.proxy_config, parsed_url.scheme
333 )
334
335 def _validate_proxy_scheme_url_selection(self, url_scheme):
336 """
337 Validates that were not attempting to do TLS in TLS connections on
338 Python2 or with unsupported SSL implementations.
339 """
340 if self.proxy is None or url_scheme != "https":
341 return
342
343 if self.proxy.scheme != "https":
344 return
345
346 if six.PY2 and not self.proxy_config.use_forwarding_for_https:
347 raise ProxySchemeUnsupported(
348 "Contacting HTTPS destinations through HTTPS proxies "
349 "'via CONNECT tunnels' is not supported in Python 2"
350 )
351
352 def urlopen(self, method, url, redirect=True, **kw):
353 """
354 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
355 with custom cross-host redirect logic and only sends the request-uri
356 portion of the ``url``.
357
358 The given ``url`` parameter must be absolute, such that an appropriate
359 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
360 """
361 u = parse_url(url)
362 self._validate_proxy_scheme_url_selection(u.scheme)
363
364 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
365
366 kw["assert_same_host"] = False
367 kw["redirect"] = False
368
369 if "headers" not in kw:
370 kw["headers"] = self.headers.copy()
371
372 if self._proxy_requires_url_absolute_form(u):
373 response = conn.urlopen(method, url, **kw)
374 else:
375 response = conn.urlopen(method, u.request_uri, **kw)
376
377 redirect_location = redirect and response.get_redirect_location()
378 if not redirect_location:
379 return response
380
381 # Support relative URLs for redirecting.
382 redirect_location = urljoin(url, redirect_location)
383
384 # RFC 7231, Section 6.4.4
385 if response.status == 303:
386 method = "GET"
387
388 retries = kw.get("retries")
389 if not isinstance(retries, Retry):
390 retries = Retry.from_int(retries, redirect=redirect)
391
392 # Strip headers marked as unsafe to forward to the redirected location.
393 # Check remove_headers_on_redirect to avoid a potential network call within
394 # conn.is_same_host() which may use socket.gethostbyname() in the future.
395 if retries.remove_headers_on_redirect and not conn.is_same_host(
396 redirect_location
397 ):
398 headers = list(six.iterkeys(kw["headers"]))
399 for header in headers:
400 if header.lower() in retries.remove_headers_on_redirect:
401 kw["headers"].pop(header, None)
402
403 try:
404 retries = retries.increment(method, url, response=response, _pool=conn)
405 except MaxRetryError:
406 if retries.raise_on_redirect:
407 response.drain_conn()
408 raise
409 return response
410
411 kw["retries"] = retries
412 kw["redirect"] = redirect
413
414 log.info("Redirecting %s -> %s", url, redirect_location)
415
416 response.drain_conn()
417 return self.urlopen(method, redirect_location, **kw)
418
419
420 class ProxyManager(PoolManager):
421 """
422 Behaves just like :class:`PoolManager`, but sends all requests through
423 the defined proxy, using the CONNECT method for HTTPS URLs.
424
425 :param proxy_url:
426 The URL of the proxy to be used.
427
428 :param proxy_headers:
429 A dictionary containing headers that will be sent to the proxy. In case
430 of HTTP they are being sent with each request, while in the
431 HTTPS/CONNECT case they are sent only once. Could be used for proxy
432 authentication.
433
434 :param proxy_ssl_context:
435 The proxy SSL context is used to establish the TLS connection to the
436 proxy when using HTTPS proxies.
437
438 :param use_forwarding_for_https:
439 (Defaults to False) If set to True will forward requests to the HTTPS
440 proxy to be made on behalf of the client instead of creating a TLS
441 tunnel via the CONNECT method. **Enabling this flag means that request
442 and response headers and content will be visible from the HTTPS proxy**
443 whereas tunneling keeps request and response headers and content
444 private. IP address, target hostname, SNI, and port are always visible
445 to an HTTPS proxy even when this flag is disabled.
446
447 Example:
448 >>> proxy = urllib3.ProxyManager('http://localhost:3128/')
449 >>> r1 = proxy.request('GET', 'http://google.com/')
450 >>> r2 = proxy.request('GET', 'http://httpbin.org/')
451 >>> len(proxy.pools)
452 1
453 >>> r3 = proxy.request('GET', 'https://httpbin.org/')
454 >>> r4 = proxy.request('GET', 'https://twitter.com/')
455 >>> len(proxy.pools)
456 3
457
458 """
459
460 def __init__(
461 self,
462 proxy_url,
463 num_pools=10,
464 headers=None,
465 proxy_headers=None,
466 proxy_ssl_context=None,
467 use_forwarding_for_https=False,
468 **connection_pool_kw
469 ):
470
471 if isinstance(proxy_url, HTTPConnectionPool):
472 proxy_url = "%s://%s:%i" % (
473 proxy_url.scheme,
474 proxy_url.host,
475 proxy_url.port,
476 )
477 proxy = parse_url(proxy_url)
478
479 if proxy.scheme not in ("http", "https"):
480 raise ProxySchemeUnknown(proxy.scheme)
481
482 if not proxy.port:
483 port = port_by_scheme.get(proxy.scheme, 80)
484 proxy = proxy._replace(port=port)
485
486 self.proxy = proxy
487 self.proxy_headers = proxy_headers or {}
488 self.proxy_ssl_context = proxy_ssl_context
489 self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https)
490
491 connection_pool_kw["_proxy"] = self.proxy
492 connection_pool_kw["_proxy_headers"] = self.proxy_headers
493 connection_pool_kw["_proxy_config"] = self.proxy_config
494
495 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw)
496
497 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None):
498 if scheme == "https":
499 return super(ProxyManager, self).connection_from_host(
500 host, port, scheme, pool_kwargs=pool_kwargs
501 )
502
503 return super(ProxyManager, self).connection_from_host(
504 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs
505 )
506
507 def _set_proxy_headers(self, url, headers=None):
508 """
509 Sets headers needed by proxies: specifically, the Accept and Host
510 headers. Only sets headers not provided by the user.
511 """
512 headers_ = {"Accept": "*/*"}
513
514 netloc = parse_url(url).netloc
515 if netloc:
516 headers_["Host"] = netloc
517
518 if headers:
519 headers_.update(headers)
520 return headers_
521
522 def urlopen(self, method, url, redirect=True, **kw):
523 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
524 u = parse_url(url)
525 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
526 # For connections using HTTP CONNECT, httplib sets the necessary
527 # headers on the CONNECT to the proxy. If we're not using CONNECT,
528 # we'll definitely need to set 'Host' at the very least.
529 headers = kw.get("headers", self.headers)
530 kw["headers"] = self._set_proxy_headers(url, headers)
531
532 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw)
533
534
535 def proxy_from_url(url, **kw):
536 return ProxyManager(proxy_url=url, **kw)