comparison env/lib/python3.9/site-packages/urllib3/response.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 from __future__ import absolute_import
2
3 import io
4 import logging
5 import zlib
6 from contextlib import contextmanager
7 from socket import error as SocketError
8 from socket import timeout as SocketTimeout
9
10 try:
11 import brotli
12 except ImportError:
13 brotli = None
14
15 from ._collections import HTTPHeaderDict
16 from .connection import BaseSSLError, HTTPException
17 from .exceptions import (
18 BodyNotHttplibCompatible,
19 DecodeError,
20 HTTPError,
21 IncompleteRead,
22 InvalidChunkLength,
23 InvalidHeader,
24 ProtocolError,
25 ReadTimeoutError,
26 ResponseNotChunked,
27 SSLError,
28 )
29 from .packages import six
30 from .util.response import is_fp_closed, is_response_to_head
31
32 log = logging.getLogger(__name__)
33
34
35 class DeflateDecoder(object):
36 def __init__(self):
37 self._first_try = True
38 self._data = b""
39 self._obj = zlib.decompressobj()
40
41 def __getattr__(self, name):
42 return getattr(self._obj, name)
43
44 def decompress(self, data):
45 if not data:
46 return data
47
48 if not self._first_try:
49 return self._obj.decompress(data)
50
51 self._data += data
52 try:
53 decompressed = self._obj.decompress(data)
54 if decompressed:
55 self._first_try = False
56 self._data = None
57 return decompressed
58 except zlib.error:
59 self._first_try = False
60 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
61 try:
62 return self.decompress(self._data)
63 finally:
64 self._data = None
65
66
67 class GzipDecoderState(object):
68
69 FIRST_MEMBER = 0
70 OTHER_MEMBERS = 1
71 SWALLOW_DATA = 2
72
73
74 class GzipDecoder(object):
75 def __init__(self):
76 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
77 self._state = GzipDecoderState.FIRST_MEMBER
78
79 def __getattr__(self, name):
80 return getattr(self._obj, name)
81
82 def decompress(self, data):
83 ret = bytearray()
84 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
85 return bytes(ret)
86 while True:
87 try:
88 ret += self._obj.decompress(data)
89 except zlib.error:
90 previous_state = self._state
91 # Ignore data after the first error
92 self._state = GzipDecoderState.SWALLOW_DATA
93 if previous_state == GzipDecoderState.OTHER_MEMBERS:
94 # Allow trailing garbage acceptable in other gzip clients
95 return bytes(ret)
96 raise
97 data = self._obj.unused_data
98 if not data:
99 return bytes(ret)
100 self._state = GzipDecoderState.OTHER_MEMBERS
101 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
102
103
104 if brotli is not None:
105
106 class BrotliDecoder(object):
107 # Supports both 'brotlipy' and 'Brotli' packages
108 # since they share an import name. The top branches
109 # are for 'brotlipy' and bottom branches for 'Brotli'
110 def __init__(self):
111 self._obj = brotli.Decompressor()
112 if hasattr(self._obj, "decompress"):
113 self.decompress = self._obj.decompress
114 else:
115 self.decompress = self._obj.process
116
117 def flush(self):
118 if hasattr(self._obj, "flush"):
119 return self._obj.flush()
120 return b""
121
122
123 class MultiDecoder(object):
124 """
125 From RFC7231:
126 If one or more encodings have been applied to a representation, the
127 sender that applied the encodings MUST generate a Content-Encoding
128 header field that lists the content codings in the order in which
129 they were applied.
130 """
131
132 def __init__(self, modes):
133 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
134
135 def flush(self):
136 return self._decoders[0].flush()
137
138 def decompress(self, data):
139 for d in reversed(self._decoders):
140 data = d.decompress(data)
141 return data
142
143
144 def _get_decoder(mode):
145 if "," in mode:
146 return MultiDecoder(mode)
147
148 if mode == "gzip":
149 return GzipDecoder()
150
151 if brotli is not None and mode == "br":
152 return BrotliDecoder()
153
154 return DeflateDecoder()
155
156
157 class HTTPResponse(io.IOBase):
158 """
159 HTTP Response container.
160
161 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
162 loaded and decoded on-demand when the ``data`` property is accessed. This
163 class is also compatible with the Python standard library's :mod:`io`
164 module, and can hence be treated as a readable object in the context of that
165 framework.
166
167 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
168
169 :param preload_content:
170 If True, the response's body will be preloaded during construction.
171
172 :param decode_content:
173 If True, will attempt to decode the body based on the
174 'content-encoding' header.
175
176 :param original_response:
177 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
178 object, it's convenient to include the original for debug purposes. It's
179 otherwise unused.
180
181 :param retries:
182 The retries contains the last :class:`~urllib3.util.retry.Retry` that
183 was used during the request.
184
185 :param enforce_content_length:
186 Enforce content length checking. Body returned by server must match
187 value of Content-Length header, if present. Otherwise, raise error.
188 """
189
190 CONTENT_DECODERS = ["gzip", "deflate"]
191 if brotli is not None:
192 CONTENT_DECODERS += ["br"]
193 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
194
195 def __init__(
196 self,
197 body="",
198 headers=None,
199 status=0,
200 version=0,
201 reason=None,
202 strict=0,
203 preload_content=True,
204 decode_content=True,
205 original_response=None,
206 pool=None,
207 connection=None,
208 msg=None,
209 retries=None,
210 enforce_content_length=False,
211 request_method=None,
212 request_url=None,
213 auto_close=True,
214 ):
215
216 if isinstance(headers, HTTPHeaderDict):
217 self.headers = headers
218 else:
219 self.headers = HTTPHeaderDict(headers)
220 self.status = status
221 self.version = version
222 self.reason = reason
223 self.strict = strict
224 self.decode_content = decode_content
225 self.retries = retries
226 self.enforce_content_length = enforce_content_length
227 self.auto_close = auto_close
228
229 self._decoder = None
230 self._body = None
231 self._fp = None
232 self._original_response = original_response
233 self._fp_bytes_read = 0
234 self.msg = msg
235 self._request_url = request_url
236
237 if body and isinstance(body, (six.string_types, bytes)):
238 self._body = body
239
240 self._pool = pool
241 self._connection = connection
242
243 if hasattr(body, "read"):
244 self._fp = body
245
246 # Are we using the chunked-style of transfer encoding?
247 self.chunked = False
248 self.chunk_left = None
249 tr_enc = self.headers.get("transfer-encoding", "").lower()
250 # Don't incur the penalty of creating a list and then discarding it
251 encodings = (enc.strip() for enc in tr_enc.split(","))
252 if "chunked" in encodings:
253 self.chunked = True
254
255 # Determine length of response
256 self.length_remaining = self._init_length(request_method)
257
258 # If requested, preload the body.
259 if preload_content and not self._body:
260 self._body = self.read(decode_content=decode_content)
261
262 def get_redirect_location(self):
263 """
264 Should we redirect and where to?
265
266 :returns: Truthy redirect location string if we got a redirect status
267 code and valid location. ``None`` if redirect status and no
268 location. ``False`` if not a redirect status code.
269 """
270 if self.status in self.REDIRECT_STATUSES:
271 return self.headers.get("location")
272
273 return False
274
275 def release_conn(self):
276 if not self._pool or not self._connection:
277 return
278
279 self._pool._put_conn(self._connection)
280 self._connection = None
281
282 def drain_conn(self):
283 """
284 Read and discard any remaining HTTP response data in the response connection.
285
286 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
287 """
288 try:
289 self.read()
290 except (HTTPError, SocketError, BaseSSLError, HTTPException):
291 pass
292
293 @property
294 def data(self):
295 # For backwards-compat with earlier urllib3 0.4 and earlier.
296 if self._body:
297 return self._body
298
299 if self._fp:
300 return self.read(cache_content=True)
301
302 @property
303 def connection(self):
304 return self._connection
305
306 def isclosed(self):
307 return is_fp_closed(self._fp)
308
309 def tell(self):
310 """
311 Obtain the number of bytes pulled over the wire so far. May differ from
312 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
313 if bytes are encoded on the wire (e.g, compressed).
314 """
315 return self._fp_bytes_read
316
317 def _init_length(self, request_method):
318 """
319 Set initial length value for Response content if available.
320 """
321 length = self.headers.get("content-length")
322
323 if length is not None:
324 if self.chunked:
325 # This Response will fail with an IncompleteRead if it can't be
326 # received as chunked. This method falls back to attempt reading
327 # the response before raising an exception.
328 log.warning(
329 "Received response with both Content-Length and "
330 "Transfer-Encoding set. This is expressly forbidden "
331 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
332 "attempting to process response as Transfer-Encoding: "
333 "chunked."
334 )
335 return None
336
337 try:
338 # RFC 7230 section 3.3.2 specifies multiple content lengths can
339 # be sent in a single Content-Length header
340 # (e.g. Content-Length: 42, 42). This line ensures the values
341 # are all valid ints and that as long as the `set` length is 1,
342 # all values are the same. Otherwise, the header is invalid.
343 lengths = set([int(val) for val in length.split(",")])
344 if len(lengths) > 1:
345 raise InvalidHeader(
346 "Content-Length contained multiple "
347 "unmatching values (%s)" % length
348 )
349 length = lengths.pop()
350 except ValueError:
351 length = None
352 else:
353 if length < 0:
354 length = None
355
356 # Convert status to int for comparison
357 # In some cases, httplib returns a status of "_UNKNOWN"
358 try:
359 status = int(self.status)
360 except ValueError:
361 status = 0
362
363 # Check for responses that shouldn't include a body
364 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
365 length = 0
366
367 return length
368
369 def _init_decoder(self):
370 """
371 Set-up the _decoder attribute if necessary.
372 """
373 # Note: content-encoding value should be case-insensitive, per RFC 7230
374 # Section 3.2
375 content_encoding = self.headers.get("content-encoding", "").lower()
376 if self._decoder is None:
377 if content_encoding in self.CONTENT_DECODERS:
378 self._decoder = _get_decoder(content_encoding)
379 elif "," in content_encoding:
380 encodings = [
381 e.strip()
382 for e in content_encoding.split(",")
383 if e.strip() in self.CONTENT_DECODERS
384 ]
385 if len(encodings):
386 self._decoder = _get_decoder(content_encoding)
387
388 DECODER_ERROR_CLASSES = (IOError, zlib.error)
389 if brotli is not None:
390 DECODER_ERROR_CLASSES += (brotli.error,)
391
392 def _decode(self, data, decode_content, flush_decoder):
393 """
394 Decode the data passed in and potentially flush the decoder.
395 """
396 if not decode_content:
397 return data
398
399 try:
400 if self._decoder:
401 data = self._decoder.decompress(data)
402 except self.DECODER_ERROR_CLASSES as e:
403 content_encoding = self.headers.get("content-encoding", "").lower()
404 raise DecodeError(
405 "Received response with content-encoding: %s, but "
406 "failed to decode it." % content_encoding,
407 e,
408 )
409 if flush_decoder:
410 data += self._flush_decoder()
411
412 return data
413
414 def _flush_decoder(self):
415 """
416 Flushes the decoder. Should only be called if the decoder is actually
417 being used.
418 """
419 if self._decoder:
420 buf = self._decoder.decompress(b"")
421 return buf + self._decoder.flush()
422
423 return b""
424
425 @contextmanager
426 def _error_catcher(self):
427 """
428 Catch low-level python exceptions, instead re-raising urllib3
429 variants, so that low-level exceptions are not leaked in the
430 high-level api.
431
432 On exit, release the connection back to the pool.
433 """
434 clean_exit = False
435
436 try:
437 try:
438 yield
439
440 except SocketTimeout:
441 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
442 # there is yet no clean way to get at it from this context.
443 raise ReadTimeoutError(self._pool, None, "Read timed out.")
444
445 except BaseSSLError as e:
446 # FIXME: Is there a better way to differentiate between SSLErrors?
447 if "read operation timed out" not in str(e):
448 # SSL errors related to framing/MAC get wrapped and reraised here
449 raise SSLError(e)
450
451 raise ReadTimeoutError(self._pool, None, "Read timed out.")
452
453 except (HTTPException, SocketError) as e:
454 # This includes IncompleteRead.
455 raise ProtocolError("Connection broken: %r" % e, e)
456
457 # If no exception is thrown, we should avoid cleaning up
458 # unnecessarily.
459 clean_exit = True
460 finally:
461 # If we didn't terminate cleanly, we need to throw away our
462 # connection.
463 if not clean_exit:
464 # The response may not be closed but we're not going to use it
465 # anymore so close it now to ensure that the connection is
466 # released back to the pool.
467 if self._original_response:
468 self._original_response.close()
469
470 # Closing the response may not actually be sufficient to close
471 # everything, so if we have a hold of the connection close that
472 # too.
473 if self._connection:
474 self._connection.close()
475
476 # If we hold the original response but it's closed now, we should
477 # return the connection back to the pool.
478 if self._original_response and self._original_response.isclosed():
479 self.release_conn()
480
481 def read(self, amt=None, decode_content=None, cache_content=False):
482 """
483 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
484 parameters: ``decode_content`` and ``cache_content``.
485
486 :param amt:
487 How much of the content to read. If specified, caching is skipped
488 because it doesn't make sense to cache partial content as the full
489 response.
490
491 :param decode_content:
492 If True, will attempt to decode the body based on the
493 'content-encoding' header.
494
495 :param cache_content:
496 If True, will save the returned data such that the same result is
497 returned despite of the state of the underlying file object. This
498 is useful if you want the ``.data`` property to continue working
499 after having ``.read()`` the file object. (Overridden if ``amt`` is
500 set.)
501 """
502 self._init_decoder()
503 if decode_content is None:
504 decode_content = self.decode_content
505
506 if self._fp is None:
507 return
508
509 flush_decoder = False
510 fp_closed = getattr(self._fp, "closed", False)
511
512 with self._error_catcher():
513 if amt is None:
514 # cStringIO doesn't like amt=None
515 data = self._fp.read() if not fp_closed else b""
516 flush_decoder = True
517 else:
518 cache_content = False
519 data = self._fp.read(amt) if not fp_closed else b""
520 if (
521 amt != 0 and not data
522 ): # Platform-specific: Buggy versions of Python.
523 # Close the connection when no data is returned
524 #
525 # This is redundant to what httplib/http.client _should_
526 # already do. However, versions of python released before
527 # December 15, 2012 (http://bugs.python.org/issue16298) do
528 # not properly close the connection in all cases. There is
529 # no harm in redundantly calling close.
530 self._fp.close()
531 flush_decoder = True
532 if self.enforce_content_length and self.length_remaining not in (
533 0,
534 None,
535 ):
536 # This is an edge case that httplib failed to cover due
537 # to concerns of backward compatibility. We're
538 # addressing it here to make sure IncompleteRead is
539 # raised during streaming, so all calls with incorrect
540 # Content-Length are caught.
541 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
542
543 if data:
544 self._fp_bytes_read += len(data)
545 if self.length_remaining is not None:
546 self.length_remaining -= len(data)
547
548 data = self._decode(data, decode_content, flush_decoder)
549
550 if cache_content:
551 self._body = data
552
553 return data
554
555 def stream(self, amt=2 ** 16, decode_content=None):
556 """
557 A generator wrapper for the read() method. A call will block until
558 ``amt`` bytes have been read from the connection or until the
559 connection is closed.
560
561 :param amt:
562 How much of the content to read. The generator will return up to
563 much data per iteration, but may return less. This is particularly
564 likely when using compressed data. However, the empty string will
565 never be returned.
566
567 :param decode_content:
568 If True, will attempt to decode the body based on the
569 'content-encoding' header.
570 """
571 if self.chunked and self.supports_chunked_reads():
572 for line in self.read_chunked(amt, decode_content=decode_content):
573 yield line
574 else:
575 while not is_fp_closed(self._fp):
576 data = self.read(amt=amt, decode_content=decode_content)
577
578 if data:
579 yield data
580
581 @classmethod
582 def from_httplib(ResponseCls, r, **response_kw):
583 """
584 Given an :class:`http.client.HTTPResponse` instance ``r``, return a
585 corresponding :class:`urllib3.response.HTTPResponse` object.
586
587 Remaining parameters are passed to the HTTPResponse constructor, along
588 with ``original_response=r``.
589 """
590 headers = r.msg
591
592 if not isinstance(headers, HTTPHeaderDict):
593 if six.PY2:
594 # Python 2.7
595 headers = HTTPHeaderDict.from_httplib(headers)
596 else:
597 headers = HTTPHeaderDict(headers.items())
598
599 # HTTPResponse objects in Python 3 don't have a .strict attribute
600 strict = getattr(r, "strict", 0)
601 resp = ResponseCls(
602 body=r,
603 headers=headers,
604 status=r.status,
605 version=r.version,
606 reason=r.reason,
607 strict=strict,
608 original_response=r,
609 **response_kw
610 )
611 return resp
612
613 # Backwards-compatibility methods for http.client.HTTPResponse
614 def getheaders(self):
615 return self.headers
616
617 def getheader(self, name, default=None):
618 return self.headers.get(name, default)
619
620 # Backwards compatibility for http.cookiejar
621 def info(self):
622 return self.headers
623
624 # Overrides from io.IOBase
625 def close(self):
626 if not self.closed:
627 self._fp.close()
628
629 if self._connection:
630 self._connection.close()
631
632 if not self.auto_close:
633 io.IOBase.close(self)
634
635 @property
636 def closed(self):
637 if not self.auto_close:
638 return io.IOBase.closed.__get__(self)
639 elif self._fp is None:
640 return True
641 elif hasattr(self._fp, "isclosed"):
642 return self._fp.isclosed()
643 elif hasattr(self._fp, "closed"):
644 return self._fp.closed
645 else:
646 return True
647
648 def fileno(self):
649 if self._fp is None:
650 raise IOError("HTTPResponse has no file to get a fileno from")
651 elif hasattr(self._fp, "fileno"):
652 return self._fp.fileno()
653 else:
654 raise IOError(
655 "The file-like object this HTTPResponse is wrapped "
656 "around has no file descriptor"
657 )
658
659 def flush(self):
660 if (
661 self._fp is not None
662 and hasattr(self._fp, "flush")
663 and not getattr(self._fp, "closed", False)
664 ):
665 return self._fp.flush()
666
667 def readable(self):
668 # This method is required for `io` module compatibility.
669 return True
670
671 def readinto(self, b):
672 # This method is required for `io` module compatibility.
673 temp = self.read(len(b))
674 if len(temp) == 0:
675 return 0
676 else:
677 b[: len(temp)] = temp
678 return len(temp)
679
680 def supports_chunked_reads(self):
681 """
682 Checks if the underlying file-like object looks like a
683 :class:`http.client.HTTPResponse` object. We do this by testing for
684 the fp attribute. If it is present we assume it returns raw chunks as
685 processed by read_chunked().
686 """
687 return hasattr(self._fp, "fp")
688
689 def _update_chunk_length(self):
690 # First, we'll figure out length of a chunk and then
691 # we'll try to read it from socket.
692 if self.chunk_left is not None:
693 return
694 line = self._fp.fp.readline()
695 line = line.split(b";", 1)[0]
696 try:
697 self.chunk_left = int(line, 16)
698 except ValueError:
699 # Invalid chunked protocol response, abort.
700 self.close()
701 raise InvalidChunkLength(self, line)
702
703 def _handle_chunk(self, amt):
704 returned_chunk = None
705 if amt is None:
706 chunk = self._fp._safe_read(self.chunk_left)
707 returned_chunk = chunk
708 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
709 self.chunk_left = None
710 elif amt < self.chunk_left:
711 value = self._fp._safe_read(amt)
712 self.chunk_left = self.chunk_left - amt
713 returned_chunk = value
714 elif amt == self.chunk_left:
715 value = self._fp._safe_read(amt)
716 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
717 self.chunk_left = None
718 returned_chunk = value
719 else: # amt > self.chunk_left
720 returned_chunk = self._fp._safe_read(self.chunk_left)
721 self._fp._safe_read(2) # Toss the CRLF at the end of the chunk.
722 self.chunk_left = None
723 return returned_chunk
724
725 def read_chunked(self, amt=None, decode_content=None):
726 """
727 Similar to :meth:`HTTPResponse.read`, but with an additional
728 parameter: ``decode_content``.
729
730 :param amt:
731 How much of the content to read. If specified, caching is skipped
732 because it doesn't make sense to cache partial content as the full
733 response.
734
735 :param decode_content:
736 If True, will attempt to decode the body based on the
737 'content-encoding' header.
738 """
739 self._init_decoder()
740 # FIXME: Rewrite this method and make it a class with a better structured logic.
741 if not self.chunked:
742 raise ResponseNotChunked(
743 "Response is not chunked. "
744 "Header 'transfer-encoding: chunked' is missing."
745 )
746 if not self.supports_chunked_reads():
747 raise BodyNotHttplibCompatible(
748 "Body should be http.client.HTTPResponse like. "
749 "It should have have an fp attribute which returns raw chunks."
750 )
751
752 with self._error_catcher():
753 # Don't bother reading the body of a HEAD request.
754 if self._original_response and is_response_to_head(self._original_response):
755 self._original_response.close()
756 return
757
758 # If a response is already read and closed
759 # then return immediately.
760 if self._fp.fp is None:
761 return
762
763 while True:
764 self._update_chunk_length()
765 if self.chunk_left == 0:
766 break
767 chunk = self._handle_chunk(amt)
768 decoded = self._decode(
769 chunk, decode_content=decode_content, flush_decoder=False
770 )
771 if decoded:
772 yield decoded
773
774 if decode_content:
775 # On CPython and PyPy, we should never need to flush the
776 # decoder. However, on Jython we *might* need to, so
777 # lets defensively do it anyway.
778 decoded = self._flush_decoder()
779 if decoded: # Platform-specific: Jython.
780 yield decoded
781
782 # Chunk content ends with \r\n: discard it.
783 while True:
784 line = self._fp.fp.readline()
785 if not line:
786 # Some sites may not end with '\r\n'.
787 break
788 if line == b"\r\n":
789 break
790
791 # We read everything; close the "file".
792 if self._original_response:
793 self._original_response.close()
794
795 def geturl(self):
796 """
797 Returns the URL that was the source of this response.
798 If the request that generated this response redirected, this method
799 will return the final redirect location.
800 """
801 if self.retries is not None and len(self.retries.history):
802 return self.retries.history[-1].redirect_location
803 else:
804 return self._request_url
805
806 def __iter__(self):
807 buffer = []
808 for chunk in self.stream(decode_content=True):
809 if b"\n" in chunk:
810 chunk = chunk.split(b"\n")
811 yield b"".join(buffer) + chunk[0] + b"\n"
812 for x in chunk[1:-1]:
813 yield x + b"\n"
814 if chunk[-1]:
815 buffer = [chunk[-1]]
816 else:
817 buffer = []
818 else:
819 buffer.append(chunk)
820 if buffer:
821 yield b"".join(buffer)