comparison env/lib/python3.9/site-packages/boto/glacier/vault.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # -*- coding: utf-8 -*-
2 # Copyright (c) 2012 Thomas Parslow http://almostobsolete.net/
3 # Copyright (c) 2012 Robie Basak <robie@justgohome.co.uk>
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 #
24 import codecs
25 from boto.glacier.exceptions import UploadArchiveError
26 from boto.glacier.job import Job
27 from boto.glacier.writer import compute_hashes_from_fileobj, \
28 resume_file_upload, Writer
29 from boto.glacier.concurrent import ConcurrentUploader
30 from boto.glacier.utils import minimum_part_size, DEFAULT_PART_SIZE
31 import os.path
32
33
34 _MEGABYTE = 1024 * 1024
35 _GIGABYTE = 1024 * _MEGABYTE
36
37 MAXIMUM_ARCHIVE_SIZE = 10000 * 4 * _GIGABYTE
38 MAXIMUM_NUMBER_OF_PARTS = 10000
39
40
41 class Vault(object):
42
43 DefaultPartSize = DEFAULT_PART_SIZE
44 SingleOperationThreshold = 100 * _MEGABYTE
45
46 ResponseDataElements = (('VaultName', 'name', None),
47 ('VaultARN', 'arn', None),
48 ('CreationDate', 'creation_date', None),
49 ('LastInventoryDate', 'last_inventory_date', None),
50 ('SizeInBytes', 'size', 0),
51 ('NumberOfArchives', 'number_of_archives', 0))
52
53 def __init__(self, layer1, response_data=None):
54 self.layer1 = layer1
55 if response_data:
56 for response_name, attr_name, default in self.ResponseDataElements:
57 value = response_data[response_name]
58 setattr(self, attr_name, value)
59 else:
60 for response_name, attr_name, default in self.ResponseDataElements:
61 setattr(self, attr_name, default)
62
63 def __repr__(self):
64 return 'Vault("%s")' % self.arn
65
66 def delete(self):
67 """
68 Delete's this vault. WARNING!
69 """
70 self.layer1.delete_vault(self.name)
71
72 def upload_archive(self, filename, description=None):
73 """
74 Adds an archive to a vault. For archives greater than 100MB the
75 multipart upload will be used.
76
77 :type file: str
78 :param file: A filename to upload
79
80 :type description: str
81 :param description: An optional description for the archive.
82
83 :rtype: str
84 :return: The archive id of the newly created archive
85 """
86 if os.path.getsize(filename) > self.SingleOperationThreshold:
87 return self.create_archive_from_file(filename, description=description)
88 return self._upload_archive_single_operation(filename, description)
89
90 def _upload_archive_single_operation(self, filename, description):
91 """
92 Adds an archive to a vault in a single operation. It's recommended for
93 archives less than 100MB
94
95 :type file: str
96 :param file: A filename to upload
97
98 :type description: str
99 :param description: A description for the archive.
100
101 :rtype: str
102 :return: The archive id of the newly created archive
103 """
104 with open(filename, 'rb') as fileobj:
105 linear_hash, tree_hash = compute_hashes_from_fileobj(fileobj)
106 fileobj.seek(0)
107 response = self.layer1.upload_archive(self.name, fileobj,
108 linear_hash, tree_hash,
109 description)
110 return response['ArchiveId']
111
112 def create_archive_writer(self, part_size=DefaultPartSize,
113 description=None):
114 """
115 Create a new archive and begin a multi-part upload to it.
116 Returns a file-like object to which the data for the archive
117 can be written. Once all the data is written the file-like
118 object should be closed, you can then call the get_archive_id
119 method on it to get the ID of the created archive.
120
121 :type part_size: int
122 :param part_size: The part size for the multipart upload.
123
124 :type description: str
125 :param description: An optional description for the archive.
126
127 :rtype: :class:`boto.glacier.writer.Writer`
128 :return: A Writer object that to which the archive data
129 should be written.
130 """
131 response = self.layer1.initiate_multipart_upload(self.name,
132 part_size,
133 description)
134 return Writer(self, response['UploadId'], part_size=part_size)
135
136 def create_archive_from_file(self, filename=None, file_obj=None,
137 description=None, upload_id_callback=None):
138 """
139 Create a new archive and upload the data from the given file
140 or file-like object.
141
142 :type filename: str
143 :param filename: A filename to upload
144
145 :type file_obj: file
146 :param file_obj: A file-like object to upload
147
148 :type description: str
149 :param description: An optional description for the archive.
150
151 :type upload_id_callback: function
152 :param upload_id_callback: if set, call with the upload_id as the
153 only parameter when it becomes known, to enable future calls
154 to resume_archive_from_file in case resume is needed.
155
156 :rtype: str
157 :return: The archive id of the newly created archive
158 """
159 part_size = self.DefaultPartSize
160 if not file_obj:
161 file_size = os.path.getsize(filename)
162 try:
163 part_size = minimum_part_size(file_size, part_size)
164 except ValueError:
165 raise UploadArchiveError("File size of %s bytes exceeds "
166 "40,000 GB archive limit of Glacier.")
167 file_obj = open(filename, "rb")
168 writer = self.create_archive_writer(
169 description=description,
170 part_size=part_size)
171 if upload_id_callback:
172 upload_id_callback(writer.upload_id)
173 while True:
174 data = file_obj.read(part_size)
175 if not data:
176 break
177 writer.write(data)
178 writer.close()
179 return writer.get_archive_id()
180
181 @staticmethod
182 def _range_string_to_part_index(range_string, part_size):
183 start, inside_end = [int(value) for value in range_string.split('-')]
184 end = inside_end + 1
185 length = end - start
186 if length == part_size + 1:
187 # Off-by-one bug in Amazon's Glacier implementation,
188 # see: https://forums.aws.amazon.com/thread.jspa?threadID=106866
189 # Workaround: since part_size is too big by one byte, adjust it
190 end -= 1
191 inside_end -= 1
192 length -= 1
193 assert not (start % part_size), (
194 "upload part start byte is not on a part boundary")
195 assert (length <= part_size), "upload part is bigger than part size"
196 return start // part_size
197
198 def resume_archive_from_file(self, upload_id, filename=None,
199 file_obj=None):
200 """Resume upload of a file already part-uploaded to Glacier.
201
202 The resumption of an upload where the part-uploaded section is empty
203 is a valid degenerate case that this function can handle.
204
205 One and only one of filename or file_obj must be specified.
206
207 :type upload_id: str
208 :param upload_id: existing Glacier upload id of upload being resumed.
209
210 :type filename: str
211 :param filename: file to open for resume
212
213 :type fobj: file
214 :param fobj: file-like object containing local data to resume. This
215 must read from the start of the entire upload, not just from the
216 point being resumed. Use fobj.seek(0) to achieve this if necessary.
217
218 :rtype: str
219 :return: The archive id of the newly created archive
220
221 """
222 part_list_response = self.list_all_parts(upload_id)
223 part_size = part_list_response['PartSizeInBytes']
224
225 part_hash_map = {}
226 for part_desc in part_list_response['Parts']:
227 part_index = self._range_string_to_part_index(
228 part_desc['RangeInBytes'], part_size)
229 part_tree_hash = codecs.decode(part_desc['SHA256TreeHash'], 'hex_codec')
230 part_hash_map[part_index] = part_tree_hash
231
232 if not file_obj:
233 file_obj = open(filename, "rb")
234
235 return resume_file_upload(
236 self, upload_id, part_size, file_obj, part_hash_map)
237
238 def concurrent_create_archive_from_file(self, filename, description,
239 **kwargs):
240 """
241 Create a new archive from a file and upload the given
242 file.
243
244 This is a convenience method around the
245 :class:`boto.glacier.concurrent.ConcurrentUploader`
246 class. This method will perform a multipart upload
247 and upload the parts of the file concurrently.
248
249 :type filename: str
250 :param filename: A filename to upload
251
252 :param kwargs: Additional kwargs to pass through to
253 :py:class:`boto.glacier.concurrent.ConcurrentUploader`.
254 You can pass any argument besides the ``api`` and
255 ``vault_name`` param (these arguments are already
256 passed to the ``ConcurrentUploader`` for you).
257
258 :raises: `boto.glacier.exception.UploadArchiveError` is an error
259 occurs during the upload process.
260
261 :rtype: str
262 :return: The archive id of the newly created archive
263
264 """
265 uploader = ConcurrentUploader(self.layer1, self.name, **kwargs)
266 archive_id = uploader.upload(filename, description)
267 return archive_id
268
269 def retrieve_archive(self, archive_id, sns_topic=None,
270 description=None):
271 """
272 Initiate a archive retrieval job to download the data from an
273 archive. You will need to wait for the notification from
274 Amazon (via SNS) before you can actually download the data,
275 this takes around 4 hours.
276
277 :type archive_id: str
278 :param archive_id: The id of the archive
279
280 :type description: str
281 :param description: An optional description for the job.
282
283 :type sns_topic: str
284 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
285 sends notification when the job is completed and the output
286 is ready for you to download.
287
288 :rtype: :class:`boto.glacier.job.Job`
289 :return: A Job object representing the retrieval job.
290 """
291 job_data = {'Type': 'archive-retrieval',
292 'ArchiveId': archive_id}
293 if sns_topic is not None:
294 job_data['SNSTopic'] = sns_topic
295 if description is not None:
296 job_data['Description'] = description
297
298 response = self.layer1.initiate_job(self.name, job_data)
299 return self.get_job(response['JobId'])
300
301 def retrieve_inventory(self, sns_topic=None,
302 description=None, byte_range=None,
303 start_date=None, end_date=None,
304 limit=None):
305 """
306 Initiate a inventory retrieval job to list the items in the
307 vault. You will need to wait for the notification from
308 Amazon (via SNS) before you can actually download the data,
309 this takes around 4 hours.
310
311 :type description: str
312 :param description: An optional description for the job.
313
314 :type sns_topic: str
315 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
316 sends notification when the job is completed and the output
317 is ready for you to download.
318
319 :type byte_range: str
320 :param byte_range: Range of bytes to retrieve.
321
322 :type start_date: DateTime
323 :param start_date: Beginning of the date range to query.
324
325 :type end_date: DateTime
326 :param end_date: End of the date range to query.
327
328 :type limit: int
329 :param limit: Limits the number of results returned.
330
331 :rtype: str
332 :return: The ID of the job
333 """
334 job_data = {'Type': 'inventory-retrieval'}
335 if sns_topic is not None:
336 job_data['SNSTopic'] = sns_topic
337 if description is not None:
338 job_data['Description'] = description
339 if byte_range is not None:
340 job_data['RetrievalByteRange'] = byte_range
341 if start_date is not None or end_date is not None or limit is not None:
342 rparams = {}
343
344 if start_date is not None:
345 rparams['StartDate'] = start_date.strftime('%Y-%m-%dT%H:%M:%S%Z')
346 if end_date is not None:
347 rparams['EndDate'] = end_date.strftime('%Y-%m-%dT%H:%M:%S%Z')
348 if limit is not None:
349 rparams['Limit'] = limit
350
351 job_data['InventoryRetrievalParameters'] = rparams
352
353 response = self.layer1.initiate_job(self.name, job_data)
354 return response['JobId']
355
356 def retrieve_inventory_job(self, **kwargs):
357 """
358 Identical to ``retrieve_inventory``, but returns a ``Job`` instance
359 instead of just the job ID.
360
361 :type description: str
362 :param description: An optional description for the job.
363
364 :type sns_topic: str
365 :param sns_topic: The Amazon SNS topic ARN where Amazon Glacier
366 sends notification when the job is completed and the output
367 is ready for you to download.
368
369 :type byte_range: str
370 :param byte_range: Range of bytes to retrieve.
371
372 :type start_date: DateTime
373 :param start_date: Beginning of the date range to query.
374
375 :type end_date: DateTime
376 :param end_date: End of the date range to query.
377
378 :type limit: int
379 :param limit: Limits the number of results returned.
380
381 :rtype: :class:`boto.glacier.job.Job`
382 :return: A Job object representing the retrieval job.
383 """
384 job_id = self.retrieve_inventory(**kwargs)
385 return self.get_job(job_id)
386
387 def delete_archive(self, archive_id):
388 """
389 This operation deletes an archive from the vault.
390
391 :type archive_id: str
392 :param archive_id: The ID for the archive to be deleted.
393 """
394 return self.layer1.delete_archive(self.name, archive_id)
395
396 def get_job(self, job_id):
397 """
398 Get an object representing a job in progress.
399
400 :type job_id: str
401 :param job_id: The ID of the job
402
403 :rtype: :class:`boto.glacier.job.Job`
404 :return: A Job object representing the job.
405 """
406 response_data = self.layer1.describe_job(self.name, job_id)
407 return Job(self, response_data)
408
409 def list_jobs(self, completed=None, status_code=None):
410 """
411 Return a list of Job objects related to this vault.
412
413 :type completed: boolean
414 :param completed: Specifies the state of the jobs to return.
415 If a value of True is passed, only completed jobs will
416 be returned. If a value of False is passed, only
417 uncompleted jobs will be returned. If no value is
418 passed, all jobs will be returned.
419
420 :type status_code: string
421 :param status_code: Specifies the type of job status to return.
422 Valid values are: InProgress|Succeeded|Failed. If not
423 specified, jobs with all status codes are returned.
424
425 :rtype: list of :class:`boto.glacier.job.Job`
426 :return: A list of Job objects related to this vault.
427 """
428 response_data = self.layer1.list_jobs(self.name, completed,
429 status_code)
430 return [Job(self, jd) for jd in response_data['JobList']]
431
432 def list_all_parts(self, upload_id):
433 """Automatically make and combine multiple calls to list_parts.
434
435 Call list_parts as necessary, combining the results in case multiple
436 calls were required to get data on all available parts.
437
438 """
439 result = self.layer1.list_parts(self.name, upload_id)
440 marker = result['Marker']
441 while marker:
442 additional_result = self.layer1.list_parts(
443 self.name, upload_id, marker=marker)
444 result['Parts'].extend(additional_result['Parts'])
445 marker = additional_result['Marker']
446 # The marker makes no sense in an unpaginated result, and clearing it
447 # makes testing easier. This also has the nice property that the result
448 # is a normal (but expanded) response.
449 result['Marker'] = None
450 return result