comparison env/lib/python3.9/site-packages/pip/_internal/network/lazy_wheel.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 """Lazy ZIP over HTTP"""
2
3 __all__ = ['HTTPRangeRequestUnsupported', 'dist_from_wheel_url']
4
5 from bisect import bisect_left, bisect_right
6 from contextlib import contextmanager
7 from tempfile import NamedTemporaryFile
8 from zipfile import BadZipfile, ZipFile
9
10 from pip._vendor.requests.models import CONTENT_CHUNK_SIZE
11
12 from pip._internal.network.utils import HEADERS, raise_for_status, response_chunks
13 from pip._internal.utils.typing import MYPY_CHECK_RUNNING
14 from pip._internal.utils.wheel import pkg_resources_distribution_for_wheel
15
16 if MYPY_CHECK_RUNNING:
17 from typing import Any, Dict, Iterator, List, Optional, Tuple
18
19 from pip._vendor.pkg_resources import Distribution
20 from pip._vendor.requests.models import Response
21
22 from pip._internal.network.session import PipSession
23
24
25 class HTTPRangeRequestUnsupported(Exception):
26 pass
27
28
29 def dist_from_wheel_url(name, url, session):
30 # type: (str, str, PipSession) -> Distribution
31 """Return a pkg_resources.Distribution from the given wheel URL.
32
33 This uses HTTP range requests to only fetch the potion of the wheel
34 containing metadata, just enough for the object to be constructed.
35 If such requests are not supported, HTTPRangeRequestUnsupported
36 is raised.
37 """
38 with LazyZipOverHTTP(url, session) as wheel:
39 # For read-only ZIP files, ZipFile only needs methods read,
40 # seek, seekable and tell, not the whole IO protocol.
41 zip_file = ZipFile(wheel) # type: ignore
42 # After context manager exit, wheel.name
43 # is an invalid file by intention.
44 return pkg_resources_distribution_for_wheel(zip_file, name, wheel.name)
45
46
47 class LazyZipOverHTTP:
48 """File-like object mapped to a ZIP file over HTTP.
49
50 This uses HTTP range requests to lazily fetch the file's content,
51 which is supposed to be fed to ZipFile. If such requests are not
52 supported by the server, raise HTTPRangeRequestUnsupported
53 during initialization.
54 """
55
56 def __init__(self, url, session, chunk_size=CONTENT_CHUNK_SIZE):
57 # type: (str, PipSession, int) -> None
58 head = session.head(url, headers=HEADERS)
59 raise_for_status(head)
60 assert head.status_code == 200
61 self._session, self._url, self._chunk_size = session, url, chunk_size
62 self._length = int(head.headers['Content-Length'])
63 self._file = NamedTemporaryFile()
64 self.truncate(self._length)
65 self._left = [] # type: List[int]
66 self._right = [] # type: List[int]
67 if 'bytes' not in head.headers.get('Accept-Ranges', 'none'):
68 raise HTTPRangeRequestUnsupported('range request is not supported')
69 self._check_zip()
70
71 @property
72 def mode(self):
73 # type: () -> str
74 """Opening mode, which is always rb."""
75 return 'rb'
76
77 @property
78 def name(self):
79 # type: () -> str
80 """Path to the underlying file."""
81 return self._file.name
82
83 def seekable(self):
84 # type: () -> bool
85 """Return whether random access is supported, which is True."""
86 return True
87
88 def close(self):
89 # type: () -> None
90 """Close the file."""
91 self._file.close()
92
93 @property
94 def closed(self):
95 # type: () -> bool
96 """Whether the file is closed."""
97 return self._file.closed
98
99 def read(self, size=-1):
100 # type: (int) -> bytes
101 """Read up to size bytes from the object and return them.
102
103 As a convenience, if size is unspecified or -1,
104 all bytes until EOF are returned. Fewer than
105 size bytes may be returned if EOF is reached.
106 """
107 download_size = max(size, self._chunk_size)
108 start, length = self.tell(), self._length
109 stop = length if size < 0 else min(start+download_size, length)
110 start = max(0, stop-download_size)
111 self._download(start, stop-1)
112 return self._file.read(size)
113
114 def readable(self):
115 # type: () -> bool
116 """Return whether the file is readable, which is True."""
117 return True
118
119 def seek(self, offset, whence=0):
120 # type: (int, int) -> int
121 """Change stream position and return the new absolute position.
122
123 Seek to offset relative position indicated by whence:
124 * 0: Start of stream (the default). pos should be >= 0;
125 * 1: Current position - pos may be negative;
126 * 2: End of stream - pos usually negative.
127 """
128 return self._file.seek(offset, whence)
129
130 def tell(self):
131 # type: () -> int
132 """Return the current possition."""
133 return self._file.tell()
134
135 def truncate(self, size=None):
136 # type: (Optional[int]) -> int
137 """Resize the stream to the given size in bytes.
138
139 If size is unspecified resize to the current position.
140 The current stream position isn't changed.
141
142 Return the new file size.
143 """
144 return self._file.truncate(size)
145
146 def writable(self):
147 # type: () -> bool
148 """Return False."""
149 return False
150
151 def __enter__(self):
152 # type: () -> LazyZipOverHTTP
153 self._file.__enter__()
154 return self
155
156 def __exit__(self, *exc):
157 # type: (*Any) -> Optional[bool]
158 return self._file.__exit__(*exc)
159
160 @contextmanager
161 def _stay(self):
162 # type: ()-> Iterator[None]
163 """Return a context manager keeping the position.
164
165 At the end of the block, seek back to original position.
166 """
167 pos = self.tell()
168 try:
169 yield
170 finally:
171 self.seek(pos)
172
173 def _check_zip(self):
174 # type: () -> None
175 """Check and download until the file is a valid ZIP."""
176 end = self._length - 1
177 for start in reversed(range(0, end, self._chunk_size)):
178 self._download(start, end)
179 with self._stay():
180 try:
181 # For read-only ZIP files, ZipFile only needs
182 # methods read, seek, seekable and tell.
183 ZipFile(self) # type: ignore
184 except BadZipfile:
185 pass
186 else:
187 break
188
189 def _stream_response(self, start, end, base_headers=HEADERS):
190 # type: (int, int, Dict[str, str]) -> Response
191 """Return HTTP response to a range request from start to end."""
192 headers = base_headers.copy()
193 headers['Range'] = f'bytes={start}-{end}'
194 # TODO: Get range requests to be correctly cached
195 headers['Cache-Control'] = 'no-cache'
196 return self._session.get(self._url, headers=headers, stream=True)
197
198 def _merge(self, start, end, left, right):
199 # type: (int, int, int, int) -> Iterator[Tuple[int, int]]
200 """Return an iterator of intervals to be fetched.
201
202 Args:
203 start (int): Start of needed interval
204 end (int): End of needed interval
205 left (int): Index of first overlapping downloaded data
206 right (int): Index after last overlapping downloaded data
207 """
208 lslice, rslice = self._left[left:right], self._right[left:right]
209 i = start = min([start]+lslice[:1])
210 end = max([end]+rslice[-1:])
211 for j, k in zip(lslice, rslice):
212 if j > i:
213 yield i, j-1
214 i = k + 1
215 if i <= end:
216 yield i, end
217 self._left[left:right], self._right[left:right] = [start], [end]
218
219 def _download(self, start, end):
220 # type: (int, int) -> None
221 """Download bytes from start to end inclusively."""
222 with self._stay():
223 left = bisect_left(self._right, start)
224 right = bisect_right(self._left, end)
225 for start, end in self._merge(start, end, left, right):
226 response = self._stream_response(start, end)
227 response.raise_for_status()
228 self.seek(start)
229 for chunk in response_chunks(response, self._chunk_size):
230 self._file.write(chunk)