comparison env/lib/python3.9/site-packages/boto/s3/key.py @ 0:4f3585e2f14b draft default tip

"planemo upload commit 60cee0fc7c0cda8592644e1aad72851dec82c959"
author shellac
date Mon, 22 Mar 2021 18:12:50 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:4f3585e2f14b
1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/
2 # Copyright (c) 2011, Nexenta Systems Inc.
3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved
4 #
5 # Permission is hereby granted, free of charge, to any person obtaining a
6 # copy of this software and associated documentation files (the
7 # "Software"), to deal in the Software without restriction, including
8 # without limitation the rights to use, copy, modify, merge, publish, dis-
9 # tribute, sublicense, and/or sell copies of the Software, and to permit
10 # persons to whom the Software is furnished to do so, subject to the fol-
11 # lowing conditions:
12 #
13 # The above copyright notice and this permission notice shall be included
14 # in all copies or substantial portions of the Software.
15 #
16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 # IN THE SOFTWARE.
23 import email.utils
24 import errno
25 import hashlib
26 import mimetypes
27 import os
28 import re
29 import base64
30 import binascii
31 import math
32 from hashlib import md5
33 import boto.utils
34 from boto.compat import BytesIO, six, urllib, encodebytes
35
36 from boto.exception import BotoClientError
37 from boto.exception import StorageDataError
38 from boto.exception import PleaseRetryException
39 from boto.provider import Provider
40 from boto.s3.keyfile import KeyFile
41 from boto.s3.user import User
42 from boto import UserAgent
43 from boto.utils import compute_md5, compute_hash
44 from boto.utils import find_matching_headers
45 from boto.utils import merge_headers_by_name
46
47
48 class Key(object):
49 """
50 Represents a key (object) in an S3 bucket.
51
52 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`.
53 :ivar name: The name of this Key object.
54 :ivar metadata: A dictionary containing user metadata that you
55 wish to store with the object or that has been retrieved from
56 an existing object.
57 :ivar cache_control: The value of the `Cache-Control` HTTP header.
58 :ivar content_type: The value of the `Content-Type` HTTP header.
59 :ivar content_encoding: The value of the `Content-Encoding` HTTP header.
60 :ivar content_disposition: The value of the `Content-Disposition` HTTP
61 header.
62 :ivar content_language: The value of the `Content-Language` HTTP header.
63 :ivar etag: The `etag` associated with this object.
64 :ivar last_modified: The string timestamp representing the last
65 time this object was modified in S3.
66 :ivar owner: The ID of the owner of this object.
67 :ivar storage_class: The storage class of the object. Currently, one of:
68 STANDARD | REDUCED_REDUNDANCY | GLACIER
69 :ivar md5: The MD5 hash of the contents of the object.
70 :ivar size: The size, in bytes, of the object.
71 :ivar version_id: The version ID of this object, if it is a versioned
72 object.
73 :ivar encrypted: Whether the object is encrypted while at rest on
74 the server.
75 """
76
77 DefaultContentType = 'application/octet-stream'
78
79 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?>
80 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01">
81 <Days>%s</Days>
82 </RestoreRequest>"""
83
84
85 BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192)
86
87 # The object metadata fields a user can set, other than custom metadata
88 # fields (i.e., those beginning with a provider-specific prefix like
89 # x-amz-meta).
90 base_user_settable_fields = set(["cache-control", "content-disposition",
91 "content-encoding", "content-language",
92 "content-md5", "content-type",
93 "x-robots-tag", "expires"])
94 _underscore_base_user_settable_fields = set()
95 for f in base_user_settable_fields:
96 _underscore_base_user_settable_fields.add(f.replace('-', '_'))
97 # Metadata fields, whether user-settable or not, other than custom
98 # metadata fields (i.e., those beginning with a provider specific prefix
99 # like x-amz-meta).
100 base_fields = (base_user_settable_fields |
101 set(["last-modified", "content-length", "date", "etag"]))
102
103
104
105 def __init__(self, bucket=None, name=None):
106 self.bucket = bucket
107 self.name = name
108 self.metadata = {}
109 self.cache_control = None
110 self.content_type = self.DefaultContentType
111 self.content_encoding = None
112 self.content_disposition = None
113 self.content_language = None
114 self.filename = None
115 self.etag = None
116 self.is_latest = False
117 self.last_modified = None
118 self.owner = None
119 self._storage_class = None
120 self.path = None
121 self.resp = None
122 self.mode = None
123 self.size = None
124 self.version_id = None
125 self.source_version_id = None
126 self.delete_marker = False
127 self.encrypted = None
128 # If the object is being restored, this attribute will be set to True.
129 # If the object is restored, it will be set to False. Otherwise this
130 # value will be None. If the restore is completed (ongoing_restore =
131 # False), the expiry_date will be populated with the expiry date of the
132 # restored object.
133 self.ongoing_restore = None
134 self.expiry_date = None
135 self.local_hashes = {}
136
137 def __repr__(self):
138 if self.bucket:
139 name = u'<Key: %s,%s>' % (self.bucket.name, self.name)
140 else:
141 name = u'<Key: None,%s>' % self.name
142
143 # Encode to bytes for Python 2 to prevent display decoding issues
144 if not isinstance(name, str):
145 name = name.encode('utf-8')
146
147 return name
148
149 def __iter__(self):
150 return self
151
152 @property
153 def provider(self):
154 provider = None
155 if self.bucket and self.bucket.connection:
156 provider = self.bucket.connection.provider
157 return provider
158
159 def _get_key(self):
160 return self.name
161
162 def _set_key(self, value):
163 self.name = value
164
165 key = property(_get_key, _set_key);
166
167 def _get_md5(self):
168 if 'md5' in self.local_hashes and self.local_hashes['md5']:
169 return binascii.b2a_hex(self.local_hashes['md5'])
170
171 def _set_md5(self, value):
172 if value:
173 self.local_hashes['md5'] = binascii.a2b_hex(value)
174 elif 'md5' in self.local_hashes:
175 self.local_hashes.pop('md5', None)
176
177 md5 = property(_get_md5, _set_md5);
178
179 def _get_base64md5(self):
180 if 'md5' in self.local_hashes and self.local_hashes['md5']:
181 md5 = self.local_hashes['md5']
182 if not isinstance(md5, bytes):
183 md5 = md5.encode('utf-8')
184 return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n')
185
186 def _set_base64md5(self, value):
187 if value:
188 if not isinstance(value, six.string_types):
189 value = value.decode('utf-8')
190 self.local_hashes['md5'] = binascii.a2b_base64(value)
191 elif 'md5' in self.local_hashes:
192 del self.local_hashes['md5']
193
194 base64md5 = property(_get_base64md5, _set_base64md5);
195
196 def _get_storage_class(self):
197 if self._storage_class is None and self.bucket:
198 # Attempt to fetch storage class
199 list_items = list(self.bucket.list(self.name.encode('utf-8')))
200 if len(list_items) and getattr(list_items[0], '_storage_class',
201 None):
202 self._storage_class = list_items[0]._storage_class
203 else:
204 # Key is not yet saved? Just use default...
205 self._storage_class = 'STANDARD'
206
207 return self._storage_class
208
209 def _set_storage_class(self, value):
210 self._storage_class = value
211
212 storage_class = property(_get_storage_class, _set_storage_class)
213
214 def get_md5_from_hexdigest(self, md5_hexdigest):
215 """
216 A utility function to create the 2-tuple (md5hexdigest, base64md5)
217 from just having a precalculated md5_hexdigest.
218 """
219 digest = binascii.unhexlify(md5_hexdigest)
220 base64md5 = encodebytes(digest)
221 if base64md5[-1] == '\n':
222 base64md5 = base64md5[0:-1]
223 return (md5_hexdigest, base64md5)
224
225 def handle_encryption_headers(self, resp):
226 provider = self.bucket.connection.provider
227 if provider.server_side_encryption_header:
228 self.encrypted = resp.getheader(
229 provider.server_side_encryption_header, None)
230 else:
231 self.encrypted = None
232
233 def handle_storage_class_header(self, resp):
234 provider = self.bucket.connection.provider
235 if provider.storage_class_header:
236 self._storage_class = resp.getheader(
237 provider.storage_class_header, None)
238 if (self._storage_class is None and
239 provider.get_provider_name() == 'aws'):
240 # S3 docs for HEAD object requests say S3 will return this
241 # header for all objects except Standard storage class objects.
242 self._storage_class = 'STANDARD'
243
244
245 def handle_version_headers(self, resp, force=False):
246 provider = self.bucket.connection.provider
247 # If the Key object already has a version_id attribute value, it
248 # means that it represents an explicit version and the user is
249 # doing a get_contents_*(version_id=<foo>) to retrieve another
250 # version of the Key. In that case, we don't really want to
251 # overwrite the version_id in this Key object. Comprende?
252 if self.version_id is None or force:
253 self.version_id = resp.getheader(provider.version_id, None)
254 self.source_version_id = resp.getheader(provider.copy_source_version_id,
255 None)
256 if resp.getheader(provider.delete_marker, 'false') == 'true':
257 self.delete_marker = True
258 else:
259 self.delete_marker = False
260
261 def handle_restore_headers(self, response):
262 provider = self.bucket.connection.provider
263 header = response.getheader(provider.restore_header)
264 if header is None:
265 return
266 parts = header.split(',', 1)
267 for part in parts:
268 key, val = [i.strip() for i in part.split('=')]
269 val = val.replace('"', '')
270 if key == 'ongoing-request':
271 self.ongoing_restore = True if val.lower() == 'true' else False
272 elif key == 'expiry-date':
273 self.expiry_date = val
274
275 def handle_addl_headers(self, headers):
276 """
277 Used by Key subclasses to do additional, provider-specific
278 processing of response headers. No-op for this base class.
279 """
280 pass
281
282 def open_read(self, headers=None, query_args='',
283 override_num_retries=None, response_headers=None):
284 """
285 Open this key for reading
286
287 :type headers: dict
288 :param headers: Headers to pass in the web request
289
290 :type query_args: string
291 :param query_args: Arguments to pass in the query string
292 (ie, 'torrent')
293
294 :type override_num_retries: int
295 :param override_num_retries: If not None will override configured
296 num_retries parameter for underlying GET.
297
298 :type response_headers: dict
299 :param response_headers: A dictionary containing HTTP
300 headers/values that will override any headers associated
301 with the stored object in the response. See
302 http://goo.gl/EWOPb for details.
303 """
304 if self.resp is None:
305 self.mode = 'r'
306
307 provider = self.bucket.connection.provider
308 self.resp = self.bucket.connection.make_request(
309 'GET', self.bucket.name, self.name, headers,
310 query_args=query_args,
311 override_num_retries=override_num_retries)
312 if self.resp.status < 199 or self.resp.status > 299:
313 body = self.resp.read()
314 raise provider.storage_response_error(self.resp.status,
315 self.resp.reason, body)
316 response_headers = self.resp.msg
317 self.metadata = boto.utils.get_aws_metadata(response_headers,
318 provider)
319 for name, value in response_headers.items():
320 # To get correct size for Range GETs, use Content-Range
321 # header if one was returned. If not, use Content-Length
322 # header.
323 if (name.lower() == 'content-length' and
324 'Content-Range' not in response_headers):
325 self.size = int(value)
326 elif name.lower() == 'content-range':
327 end_range = re.sub('.*/(.*)', '\\1', value)
328 self.size = int(end_range)
329 elif name.lower() in Key.base_fields:
330 self.__dict__[name.lower().replace('-', '_')] = value
331 self.handle_version_headers(self.resp)
332 self.handle_encryption_headers(self.resp)
333 self.handle_restore_headers(self.resp)
334 self.handle_addl_headers(self.resp.getheaders())
335
336 def open_write(self, headers=None, override_num_retries=None):
337 """
338 Open this key for writing.
339 Not yet implemented
340
341 :type headers: dict
342 :param headers: Headers to pass in the write request
343
344 :type override_num_retries: int
345 :param override_num_retries: If not None will override configured
346 num_retries parameter for underlying PUT.
347 """
348 raise BotoClientError('Not Implemented')
349
350 def open(self, mode='r', headers=None, query_args=None,
351 override_num_retries=None):
352 if mode == 'r':
353 self.mode = 'r'
354 self.open_read(headers=headers, query_args=query_args,
355 override_num_retries=override_num_retries)
356 elif mode == 'w':
357 self.mode = 'w'
358 self.open_write(headers=headers,
359 override_num_retries=override_num_retries)
360 else:
361 raise BotoClientError('Invalid mode: %s' % mode)
362
363 closed = False
364
365 def close(self, fast=False):
366 """
367 Close this key.
368
369 :type fast: bool
370 :param fast: True if you want the connection to be closed without first
371 reading the content. This should only be used in cases where subsequent
372 calls don't need to return the content from the open HTTP connection.
373 Note: As explained at
374 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse,
375 callers must read the whole response before sending a new request to the
376 server. Calling Key.close(fast=True) and making a subsequent request to
377 the server will work because boto will get an httplib exception and
378 close/reopen the connection.
379
380 """
381 if self.resp and not fast:
382 self.resp.read()
383 self.resp = None
384 self.mode = None
385 self.closed = True
386
387 def next(self):
388 """
389 By providing a next method, the key object supports use as an iterator.
390 For example, you can now say:
391
392 for bytes in key:
393 write bytes to a file or whatever
394
395 All of the HTTP connection stuff is handled for you.
396 """
397 self.open_read()
398 data = self.resp.read(self.BufferSize)
399 if not data:
400 self.close()
401 raise StopIteration
402 return data
403
404 # Python 3 iterator support
405 __next__ = next
406
407 def read(self, size=0):
408 self.open_read()
409 if size == 0:
410 data = self.resp.read()
411 else:
412 data = self.resp.read(size)
413 if not data:
414 self.close()
415 return data
416
417 def change_storage_class(self, new_storage_class, dst_bucket=None,
418 validate_dst_bucket=True):
419 """
420 Change the storage class of an existing key.
421 Depending on whether a different destination bucket is supplied
422 or not, this will either move the item within the bucket, preserving
423 all metadata and ACL info bucket changing the storage class or it
424 will copy the item to the provided destination bucket, also
425 preserving metadata and ACL info.
426
427 :type new_storage_class: string
428 :param new_storage_class: The new storage class for the Key.
429 Possible values are:
430 * STANDARD
431 * REDUCED_REDUNDANCY
432
433 :type dst_bucket: string
434 :param dst_bucket: The name of a destination bucket. If not
435 provided the current bucket of the key will be used.
436
437 :type validate_dst_bucket: bool
438 :param validate_dst_bucket: If True, will validate the dst_bucket
439 by using an extra list request.
440 """
441 bucket_name = dst_bucket or self.bucket.name
442 if new_storage_class == 'STANDARD':
443 return self.copy(bucket_name, self.name,
444 reduced_redundancy=False, preserve_acl=True,
445 validate_dst_bucket=validate_dst_bucket)
446 elif new_storage_class == 'REDUCED_REDUNDANCY':
447 return self.copy(bucket_name, self.name,
448 reduced_redundancy=True, preserve_acl=True,
449 validate_dst_bucket=validate_dst_bucket)
450 else:
451 raise BotoClientError('Invalid storage class: %s' %
452 new_storage_class)
453
454 def copy(self, dst_bucket, dst_key, metadata=None,
455 reduced_redundancy=False, preserve_acl=False,
456 encrypt_key=False, validate_dst_bucket=True):
457 """
458 Copy this Key to another bucket.
459
460 :type dst_bucket: string
461 :param dst_bucket: The name of the destination bucket
462
463 :type dst_key: string
464 :param dst_key: The name of the destination key
465
466 :type metadata: dict
467 :param metadata: Metadata to be associated with new key. If
468 metadata is supplied, it will replace the metadata of the
469 source key being copied. If no metadata is supplied, the
470 source key's metadata will be copied to the new key.
471
472 :type reduced_redundancy: bool
473 :param reduced_redundancy: If True, this will force the
474 storage class of the new Key to be REDUCED_REDUNDANCY
475 regardless of the storage class of the key being copied.
476 The Reduced Redundancy Storage (RRS) feature of S3,
477 provides lower redundancy at lower storage cost.
478
479 :type preserve_acl: bool
480 :param preserve_acl: If True, the ACL from the source key will
481 be copied to the destination key. If False, the
482 destination key will have the default ACL. Note that
483 preserving the ACL in the new key object will require two
484 additional API calls to S3, one to retrieve the current
485 ACL and one to set that ACL on the new object. If you
486 don't care about the ACL, a value of False will be
487 significantly more efficient.
488
489 :type encrypt_key: bool
490 :param encrypt_key: If True, the new copy of the object will
491 be encrypted on the server-side by S3 and will be stored
492 in an encrypted form while at rest in S3.
493
494 :type validate_dst_bucket: bool
495 :param validate_dst_bucket: If True, will validate the dst_bucket
496 by using an extra list request.
497
498 :rtype: :class:`boto.s3.key.Key` or subclass
499 :returns: An instance of the newly created key object
500 """
501 dst_bucket = self.bucket.connection.lookup(dst_bucket,
502 validate_dst_bucket)
503 if reduced_redundancy:
504 storage_class = 'REDUCED_REDUNDANCY'
505 else:
506 storage_class = self.storage_class
507 return dst_bucket.copy_key(dst_key, self.bucket.name,
508 self.name, metadata,
509 storage_class=storage_class,
510 preserve_acl=preserve_acl,
511 encrypt_key=encrypt_key,
512 src_version_id=self.version_id)
513
514 def startElement(self, name, attrs, connection):
515 if name == 'Owner':
516 self.owner = User(self)
517 return self.owner
518 else:
519 return None
520
521 def endElement(self, name, value, connection):
522 if name == 'Key':
523 self.name = value
524 elif name == 'ETag':
525 self.etag = value
526 elif name == 'IsLatest':
527 if value == 'true':
528 self.is_latest = True
529 else:
530 self.is_latest = False
531 elif name == 'LastModified':
532 self.last_modified = value
533 elif name == 'Size':
534 self.size = int(value)
535 elif name == 'StorageClass':
536 self.storage_class = value
537 elif name == 'Owner':
538 pass
539 elif name == 'VersionId':
540 self.version_id = value
541 else:
542 setattr(self, name, value)
543
544 def exists(self, headers=None):
545 """
546 Returns True if the key exists
547
548 :rtype: bool
549 :return: Whether the key exists on S3
550 """
551 return bool(self.bucket.lookup(self.name, headers=headers))
552
553 def delete(self, headers=None):
554 """
555 Delete this key from S3
556 """
557 return self.bucket.delete_key(self.name, version_id=self.version_id,
558 headers=headers)
559
560 def get_metadata(self, name):
561 return self.metadata.get(name)
562
563 def set_metadata(self, name, value):
564 # Ensure that metadata that is vital to signing is in the correct
565 # case. Applies to ``Content-Type`` & ``Content-MD5``.
566 if name.lower() == 'content-type':
567 self.metadata['Content-Type'] = value
568 elif name.lower() == 'content-md5':
569 self.metadata['Content-MD5'] = value
570 else:
571 self.metadata[name] = value
572 if name.lower() in Key.base_user_settable_fields:
573 self.__dict__[name.lower().replace('-', '_')] = value
574
575 def update_metadata(self, d):
576 self.metadata.update(d)
577
578 # convenience methods for setting/getting ACL
579 def set_acl(self, acl_str, headers=None):
580 if self.bucket is not None:
581 self.bucket.set_acl(acl_str, self.name, headers=headers)
582
583 def get_acl(self, headers=None):
584 if self.bucket is not None:
585 return self.bucket.get_acl(self.name, headers=headers)
586
587 def get_xml_acl(self, headers=None):
588 if self.bucket is not None:
589 return self.bucket.get_xml_acl(self.name, headers=headers)
590
591 def set_xml_acl(self, acl_str, headers=None):
592 if self.bucket is not None:
593 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers)
594
595 def set_canned_acl(self, acl_str, headers=None):
596 return self.bucket.set_canned_acl(acl_str, self.name, headers)
597
598 def get_redirect(self):
599 """Return the redirect location configured for this key.
600
601 If no redirect is configured (via set_redirect), then None
602 will be returned.
603
604 """
605 response = self.bucket.connection.make_request(
606 'HEAD', self.bucket.name, self.name)
607 if response.status == 200:
608 return response.getheader('x-amz-website-redirect-location')
609 else:
610 raise self.provider.storage_response_error(
611 response.status, response.reason, response.read())
612
613 def set_redirect(self, redirect_location, headers=None):
614 """Configure this key to redirect to another location.
615
616 When the bucket associated with this key is accessed from the website
617 endpoint, a 301 redirect will be issued to the specified
618 `redirect_location`.
619
620 :type redirect_location: string
621 :param redirect_location: The location to redirect.
622
623 """
624 if headers is None:
625 headers = {}
626 else:
627 headers = headers.copy()
628
629 headers['x-amz-website-redirect-location'] = redirect_location
630 response = self.bucket.connection.make_request('PUT', self.bucket.name,
631 self.name, headers)
632 if response.status == 200:
633 return True
634 else:
635 raise self.provider.storage_response_error(
636 response.status, response.reason, response.read())
637
638 def make_public(self, headers=None):
639 return self.bucket.set_canned_acl('public-read', self.name, headers)
640
641 def generate_url(self, expires_in, method='GET', headers=None,
642 query_auth=True, force_http=False, response_headers=None,
643 expires_in_absolute=False, version_id=None,
644 policy=None, reduced_redundancy=False, encrypt_key=False):
645 """
646 Generate a URL to access this key.
647
648 :type expires_in: int
649 :param expires_in: How long the url is valid for, in seconds.
650
651 :type method: string
652 :param method: The method to use for retrieving the file
653 (default is GET).
654
655 :type headers: dict
656 :param headers: Any headers to pass along in the request.
657
658 :type query_auth: bool
659 :param query_auth: If True, signs the request in the URL.
660
661 :type force_http: bool
662 :param force_http: If True, http will be used instead of https.
663
664 :type response_headers: dict
665 :param response_headers: A dictionary containing HTTP
666 headers/values that will override any headers associated
667 with the stored object in the response. See
668 http://goo.gl/EWOPb for details.
669
670 :type expires_in_absolute: bool
671 :param expires_in_absolute:
672
673 :type version_id: string
674 :param version_id: The version_id of the object to GET. If specified
675 this overrides any value in the key.
676
677 :type policy: :class:`boto.s3.acl.CannedACLStrings`
678 :param policy: A canned ACL policy that will be applied to the
679 new key in S3.
680
681 :type reduced_redundancy: bool
682 :param reduced_redundancy: If True, this will set the storage
683 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
684 Redundancy Storage (RRS) feature of S3, provides lower
685 redundancy at lower storage cost.
686
687 :type encrypt_key: bool
688 :param encrypt_key: If True, the new copy of the object will
689 be encrypted on the server-side by S3 and will be stored
690 in an encrypted form while at rest in S3.
691
692 :rtype: string
693 :return: The URL to access the key
694 """
695 provider = self.bucket.connection.provider
696 version_id = version_id or self.version_id
697 if headers is None:
698 headers = {}
699 else:
700 headers = headers.copy()
701
702 # add headers accordingly (usually PUT case)
703 if policy:
704 headers[provider.acl_header] = policy
705 if reduced_redundancy:
706 self.storage_class = 'REDUCED_REDUNDANCY'
707 if provider.storage_class_header:
708 headers[provider.storage_class_header] = self.storage_class
709 if encrypt_key:
710 headers[provider.server_side_encryption_header] = 'AES256'
711 headers = boto.utils.merge_meta(headers, self.metadata, provider)
712
713 return self.bucket.connection.generate_url(expires_in, method,
714 self.bucket.name, self.name,
715 headers, query_auth,
716 force_http,
717 response_headers,
718 expires_in_absolute,
719 version_id)
720
721 def send_file(self, fp, headers=None, cb=None, num_cb=10,
722 query_args=None, chunked_transfer=False, size=None):
723 """
724 Upload a file to a key into a bucket on S3.
725
726 :type fp: file
727 :param fp: The file pointer to upload. The file pointer must
728 point at the offset from which you wish to upload.
729 ie. if uploading the full file, it should point at the
730 start of the file. Normally when a file is opened for
731 reading, the fp will point at the first byte. See the
732 bytes parameter below for more info.
733
734 :type headers: dict
735 :param headers: The headers to pass along with the PUT request
736
737 :type num_cb: int
738 :param num_cb: (optional) If a callback is specified with the
739 cb parameter this parameter determines the granularity of
740 the callback by defining the maximum number of times the
741 callback will be called during the file
742 transfer. Providing a negative integer will cause your
743 callback to be called with each buffer read.
744
745 :type query_args: string
746 :param query_args: (optional) Arguments to pass in the query string.
747
748 :type chunked_transfer: boolean
749 :param chunked_transfer: (optional) If true, we use chunked
750 Transfer-Encoding.
751
752 :type size: int
753 :param size: (optional) The Maximum number of bytes to read
754 from the file pointer (fp). This is useful when uploading
755 a file in multiple parts where you are splitting the file
756 up into different ranges to be uploaded. If not specified,
757 the default behaviour is to read all bytes from the file
758 pointer. Less bytes may be available.
759 """
760 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
761 query_args=query_args,
762 chunked_transfer=chunked_transfer, size=size)
763
764 def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10,
765 query_args=None, chunked_transfer=False, size=None,
766 hash_algs=None):
767 provider = self.bucket.connection.provider
768 try:
769 spos = fp.tell()
770 except IOError:
771 spos = None
772 self.read_from_stream = False
773
774 # If hash_algs is unset and the MD5 hasn't already been computed,
775 # default to an MD5 hash_alg to hash the data on-the-fly.
776 if hash_algs is None and not self.md5:
777 hash_algs = {'md5': md5}
778 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
779
780 def sender(http_conn, method, path, data, headers):
781 # This function is called repeatedly for temporary retries
782 # so we must be sure the file pointer is pointing at the
783 # start of the data.
784 if spos is not None and spos != fp.tell():
785 fp.seek(spos)
786 elif spos is None and self.read_from_stream:
787 # if seek is not supported, and we've read from this
788 # stream already, then we need to abort retries to
789 # avoid setting bad data.
790 raise provider.storage_data_error(
791 'Cannot retry failed request. fp does not support seeking.')
792
793 # If the caller explicitly specified host header, tell putrequest
794 # not to add a second host header. Similarly for accept-encoding.
795 skips = {}
796 if boto.utils.find_matching_headers('host', headers):
797 skips['skip_host'] = 1
798 if boto.utils.find_matching_headers('accept-encoding', headers):
799 skips['skip_accept_encoding'] = 1
800 http_conn.putrequest(method, path, **skips)
801 for key in headers:
802 http_conn.putheader(key, headers[key])
803 http_conn.endheaders()
804
805 save_debug = self.bucket.connection.debug
806 self.bucket.connection.debug = 0
807 # If the debuglevel < 4 we don't want to show connection
808 # payload, so turn off HTTP connection-level debug output (to
809 # be restored below).
810 # Use the getattr approach to allow this to work in AppEngine.
811 if getattr(http_conn, 'debuglevel', 0) < 4:
812 http_conn.set_debuglevel(0)
813
814 data_len = 0
815 if cb:
816 if size:
817 cb_size = size
818 elif self.size:
819 cb_size = self.size
820 else:
821 cb_size = 0
822 if chunked_transfer and cb_size == 0:
823 # For chunked Transfer, we call the cb for every 1MB
824 # of data transferred, except when we know size.
825 cb_count = (1024 * 1024) / self.BufferSize
826 elif num_cb > 1:
827 cb_count = int(
828 math.ceil(cb_size / self.BufferSize / (num_cb - 1.0)))
829 elif num_cb < 0:
830 cb_count = -1
831 else:
832 cb_count = 0
833 i = 0
834 cb(data_len, cb_size)
835
836 bytes_togo = size
837 if bytes_togo and bytes_togo < self.BufferSize:
838 chunk = fp.read(bytes_togo)
839 else:
840 chunk = fp.read(self.BufferSize)
841
842 if not isinstance(chunk, bytes):
843 chunk = chunk.encode('utf-8')
844
845 if spos is None:
846 # read at least something from a non-seekable fp.
847 self.read_from_stream = True
848 while chunk:
849 chunk_len = len(chunk)
850 data_len += chunk_len
851 if chunked_transfer:
852 http_conn.send('%x;\r\n' % chunk_len)
853 http_conn.send(chunk)
854 http_conn.send('\r\n')
855 else:
856 http_conn.send(chunk)
857 for alg in digesters:
858 digesters[alg].update(chunk)
859 if bytes_togo:
860 bytes_togo -= chunk_len
861 if bytes_togo <= 0:
862 break
863 if cb:
864 i += 1
865 if i == cb_count or cb_count == -1:
866 cb(data_len, cb_size)
867 i = 0
868 if bytes_togo and bytes_togo < self.BufferSize:
869 chunk = fp.read(bytes_togo)
870 else:
871 chunk = fp.read(self.BufferSize)
872
873 if not isinstance(chunk, bytes):
874 chunk = chunk.encode('utf-8')
875
876 self.size = data_len
877
878 for alg in digesters:
879 self.local_hashes[alg] = digesters[alg].digest()
880
881 if chunked_transfer:
882 http_conn.send('0\r\n')
883 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5)
884 http_conn.send('\r\n')
885
886 if cb and (cb_count <= 1 or i > 0) and data_len > 0:
887 cb(data_len, cb_size)
888
889 http_conn.set_debuglevel(save_debug)
890 self.bucket.connection.debug = save_debug
891 response = http_conn.getresponse()
892 body = response.read()
893
894 if not self.should_retry(response, chunked_transfer):
895 raise provider.storage_response_error(
896 response.status, response.reason, body)
897
898 return response
899
900 if not headers:
901 headers = {}
902 else:
903 headers = headers.copy()
904 # Overwrite user-supplied user-agent.
905 for header in find_matching_headers('User-Agent', headers):
906 del headers[header]
907 headers['User-Agent'] = UserAgent
908 # If storage_class is None, then a user has not explicitly requested
909 # a storage class, so we can assume STANDARD here
910 if self._storage_class not in [None, 'STANDARD']:
911 headers[provider.storage_class_header] = self.storage_class
912 if find_matching_headers('Content-Encoding', headers):
913 self.content_encoding = merge_headers_by_name(
914 'Content-Encoding', headers)
915 if find_matching_headers('Content-Language', headers):
916 self.content_language = merge_headers_by_name(
917 'Content-Language', headers)
918 content_type_headers = find_matching_headers('Content-Type', headers)
919 if content_type_headers:
920 # Some use cases need to suppress sending of the Content-Type
921 # header and depend on the receiving server to set the content
922 # type. This can be achieved by setting headers['Content-Type']
923 # to None when calling this method.
924 if (len(content_type_headers) == 1 and
925 headers[content_type_headers[0]] is None):
926 # Delete null Content-Type value to skip sending that header.
927 del headers[content_type_headers[0]]
928 else:
929 self.content_type = merge_headers_by_name(
930 'Content-Type', headers)
931 elif self.path:
932 self.content_type = mimetypes.guess_type(self.path)[0]
933 if self.content_type is None:
934 self.content_type = self.DefaultContentType
935 headers['Content-Type'] = self.content_type
936 else:
937 headers['Content-Type'] = self.content_type
938 if self.base64md5:
939 headers['Content-MD5'] = self.base64md5
940 if chunked_transfer:
941 headers['Transfer-Encoding'] = 'chunked'
942 #if not self.base64md5:
943 # headers['Trailer'] = "Content-MD5"
944 else:
945 headers['Content-Length'] = str(self.size)
946 # This is terrible. We need a SHA256 of the body for SigV4, but to do
947 # the chunked ``sender`` behavior above, the ``fp`` isn't available to
948 # the auth mechanism (because closures). Detect if it's SigV4 & embelish
949 # while we can before the auth calculations occur.
950 if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability():
951 kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256}
952 if size is not None:
953 kwargs['size'] = size
954 headers['_sha256'] = compute_hash(**kwargs)[0]
955 headers['Expect'] = '100-Continue'
956 headers = boto.utils.merge_meta(headers, self.metadata, provider)
957 resp = self.bucket.connection.make_request(
958 'PUT',
959 self.bucket.name,
960 self.name,
961 headers,
962 sender=sender,
963 query_args=query_args
964 )
965 self.handle_version_headers(resp, force=True)
966 self.handle_addl_headers(resp.getheaders())
967
968 def should_retry(self, response, chunked_transfer=False):
969 provider = self.bucket.connection.provider
970
971 if not chunked_transfer:
972 if response.status in [500, 503]:
973 # 500 & 503 can be plain retries.
974 return True
975
976 if response.getheader('location'):
977 # If there's a redirect, plain retry.
978 return True
979
980 if 200 <= response.status <= 299:
981 self.etag = response.getheader('etag')
982 md5 = self.md5
983 if isinstance(md5, bytes):
984 md5 = md5.decode('utf-8')
985
986 # If you use customer-provided encryption keys, the ETag value that
987 # Amazon S3 returns in the response will not be the MD5 of the
988 # object.
989 amz_server_side_encryption_customer_algorithm = response.getheader(
990 'x-amz-server-side-encryption-customer-algorithm', None)
991 # The same is applicable for KMS-encrypted objects in gs buckets.
992 goog_customer_managed_encryption = response.getheader(
993 'x-goog-encryption-kms-key-name', None)
994 if (amz_server_side_encryption_customer_algorithm is None and
995 goog_customer_managed_encryption is None):
996 if self.etag != '"%s"' % md5:
997 raise provider.storage_data_error(
998 'ETag from S3 did not match computed MD5. '
999 '%s vs. %s' % (self.etag, self.md5))
1000
1001 return True
1002
1003 if response.status == 400:
1004 # The 400 must be trapped so the retry handler can check to
1005 # see if it was a timeout.
1006 # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb
1007 # out.
1008 body = response.read()
1009 err = provider.storage_response_error(
1010 response.status,
1011 response.reason,
1012 body
1013 )
1014
1015 if err.error_code in ['RequestTimeout']:
1016 raise PleaseRetryException(
1017 "Saw %s, retrying" % err.error_code,
1018 response=response
1019 )
1020
1021 return False
1022
1023 def compute_md5(self, fp, size=None):
1024 """
1025 :type fp: file
1026 :param fp: File pointer to the file to MD5 hash. The file
1027 pointer will be reset to the same position before the
1028 method returns.
1029
1030 :type size: int
1031 :param size: (optional) The Maximum number of bytes to read
1032 from the file pointer (fp). This is useful when uploading
1033 a file in multiple parts where the file is being split
1034 in place into different parts. Less bytes may be available.
1035 """
1036 hex_digest, b64_digest, data_size = compute_md5(fp, size=size)
1037 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size.
1038 # The internal implementation of compute_md5() needs to return the
1039 # data size but we don't want to return that value to the external
1040 # caller because it changes the class interface (i.e. it might
1041 # break some code) so we consume the third tuple value here and
1042 # return the remainder of the tuple to the caller, thereby preserving
1043 # the existing interface.
1044 self.size = data_size
1045 return (hex_digest, b64_digest)
1046
1047 def set_contents_from_stream(self, fp, headers=None, replace=True,
1048 cb=None, num_cb=10, policy=None,
1049 reduced_redundancy=False, query_args=None,
1050 size=None):
1051 """
1052 Store an object using the name of the Key object as the key in
1053 cloud and the contents of the data stream pointed to by 'fp' as
1054 the contents.
1055
1056 The stream object is not seekable and total size is not known.
1057 This has the implication that we can't specify the
1058 Content-Size and Content-MD5 in the header. So for huge
1059 uploads, the delay in calculating MD5 is avoided but with a
1060 penalty of inability to verify the integrity of the uploaded
1061 data.
1062
1063 :type fp: file
1064 :param fp: the file whose contents are to be uploaded
1065
1066 :type headers: dict
1067 :param headers: additional HTTP headers to be sent with the
1068 PUT request.
1069
1070 :type replace: bool
1071 :param replace: If this parameter is False, the method will first check
1072 to see if an object exists in the bucket with the same key. If it
1073 does, it won't overwrite it. The default value is True which will
1074 overwrite the object.
1075
1076 :type cb: function
1077 :param cb: a callback function that will be called to report
1078 progress on the upload. The callback should accept two integer
1079 parameters, the first representing the number of bytes that have
1080 been successfully transmitted to GS and the second representing the
1081 total number of bytes that need to be transmitted.
1082
1083 :type num_cb: int
1084 :param num_cb: (optional) If a callback is specified with the
1085 cb parameter, this parameter determines the granularity of
1086 the callback by defining the maximum number of times the
1087 callback will be called during the file transfer.
1088
1089 :type policy: :class:`boto.gs.acl.CannedACLStrings`
1090 :param policy: A canned ACL policy that will be applied to the new key
1091 in GS.
1092
1093 :type reduced_redundancy: bool
1094 :param reduced_redundancy: If True, this will set the storage
1095 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1096 Redundancy Storage (RRS) feature of S3, provides lower
1097 redundancy at lower storage cost.
1098
1099 :type size: int
1100 :param size: (optional) The Maximum number of bytes to read from
1101 the file pointer (fp). This is useful when uploading a
1102 file in multiple parts where you are splitting the file up
1103 into different ranges to be uploaded. If not specified,
1104 the default behaviour is to read all bytes from the file
1105 pointer. Less bytes may be available.
1106 """
1107
1108 provider = self.bucket.connection.provider
1109 if not provider.supports_chunked_transfer():
1110 raise BotoClientError('%s does not support chunked transfer'
1111 % provider.get_provider_name())
1112
1113 # Name of the Object should be specified explicitly for Streams.
1114 if not self.name or self.name == '':
1115 raise BotoClientError('Cannot determine the destination '
1116 'object name for the given stream')
1117
1118 if headers is None:
1119 headers = {}
1120 if policy:
1121 headers[provider.acl_header] = policy
1122
1123 if reduced_redundancy:
1124 self.storage_class = 'REDUCED_REDUNDANCY'
1125 if provider.storage_class_header:
1126 headers[provider.storage_class_header] = self.storage_class
1127
1128 if self.bucket is not None:
1129 if not replace:
1130 if self.bucket.lookup(self.name):
1131 return
1132 self.send_file(fp, headers, cb, num_cb, query_args,
1133 chunked_transfer=True, size=size)
1134
1135 def set_contents_from_file(self, fp, headers=None, replace=True,
1136 cb=None, num_cb=10, policy=None, md5=None,
1137 reduced_redundancy=False, query_args=None,
1138 encrypt_key=False, size=None, rewind=False):
1139 """
1140 Store an object in S3 using the name of the Key object as the
1141 key in S3 and the contents of the file pointed to by 'fp' as the
1142 contents. The data is read from 'fp' from its current position until
1143 'size' bytes have been read or EOF.
1144
1145 :type fp: file
1146 :param fp: the file whose contents to upload
1147
1148 :type headers: dict
1149 :param headers: Additional HTTP headers that will be sent with
1150 the PUT request.
1151
1152 :type replace: bool
1153 :param replace: If this parameter is False, the method will
1154 first check to see if an object exists in the bucket with
1155 the same key. If it does, it won't overwrite it. The
1156 default value is True which will overwrite the object.
1157
1158 :type cb: function
1159 :param cb: a callback function that will be called to report
1160 progress on the upload. The callback should accept two
1161 integer parameters, the first representing the number of
1162 bytes that have been successfully transmitted to S3 and
1163 the second representing the size of the to be transmitted
1164 object.
1165
1166 :type num_cb: int
1167 :param num_cb: (optional) If a callback is specified with the
1168 cb parameter this parameter determines the granularity of
1169 the callback by defining the maximum number of times the
1170 callback will be called during the file transfer.
1171
1172 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1173 :param policy: A canned ACL policy that will be applied to the
1174 new key in S3.
1175
1176 :type md5: A tuple containing the hexdigest version of the MD5
1177 checksum of the file as the first element and the
1178 Base64-encoded version of the plain checksum as the second
1179 element. This is the same format returned by the
1180 compute_md5 method.
1181 :param md5: If you need to compute the MD5 for any reason
1182 prior to upload, it's silly to have to do it twice so this
1183 param, if present, will be used as the MD5 values of the
1184 file. Otherwise, the checksum will be computed.
1185
1186 :type reduced_redundancy: bool
1187 :param reduced_redundancy: If True, this will set the storage
1188 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1189 Redundancy Storage (RRS) feature of S3, provides lower
1190 redundancy at lower storage cost.
1191
1192 :type encrypt_key: bool
1193 :param encrypt_key: If True, the new copy of the object will
1194 be encrypted on the server-side by S3 and will be stored
1195 in an encrypted form while at rest in S3.
1196
1197 :type size: int
1198 :param size: (optional) The Maximum number of bytes to read
1199 from the file pointer (fp). This is useful when uploading
1200 a file in multiple parts where you are splitting the file
1201 up into different ranges to be uploaded. If not specified,
1202 the default behaviour is to read all bytes from the file
1203 pointer. Less bytes may be available.
1204
1205 :type rewind: bool
1206 :param rewind: (optional) If True, the file pointer (fp) will
1207 be rewound to the start before any bytes are read from
1208 it. The default behaviour is False which reads from the
1209 current position of the file pointer (fp).
1210
1211 :rtype: int
1212 :return: The number of bytes written to the key.
1213 """
1214 provider = self.bucket.connection.provider
1215 headers = headers or {}
1216 if policy:
1217 headers[provider.acl_header] = policy
1218 if encrypt_key:
1219 headers[provider.server_side_encryption_header] = 'AES256'
1220
1221 if rewind:
1222 # caller requests reading from beginning of fp.
1223 fp.seek(0, os.SEEK_SET)
1224 else:
1225 # The following seek/tell/seek logic is intended
1226 # to detect applications using the older interface to
1227 # set_contents_from_file(), which automatically rewound the
1228 # file each time the Key was reused. This changed with commit
1229 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads
1230 # split into multiple parts and uploaded in parallel, and at
1231 # the time of that commit this check was added because otherwise
1232 # older programs would get a success status and upload an empty
1233 # object. Unfortuantely, it's very inefficient for fp's implemented
1234 # by KeyFile (used, for example, by gsutil when copying between
1235 # providers). So, we skip the check for the KeyFile case.
1236 # TODO: At some point consider removing this seek/tell/seek
1237 # logic, after enough time has passed that it's unlikely any
1238 # programs remain that assume the older auto-rewind interface.
1239 if not isinstance(fp, KeyFile):
1240 spos = fp.tell()
1241 fp.seek(0, os.SEEK_END)
1242 if fp.tell() == spos:
1243 fp.seek(0, os.SEEK_SET)
1244 if fp.tell() != spos:
1245 # Raise an exception as this is likely a programming
1246 # error whereby there is data before the fp but nothing
1247 # after it.
1248 fp.seek(spos)
1249 raise AttributeError('fp is at EOF. Use rewind option '
1250 'or seek() to data start.')
1251 # seek back to the correct position.
1252 fp.seek(spos)
1253
1254 if reduced_redundancy:
1255 self.storage_class = 'REDUCED_REDUNDANCY'
1256 if provider.storage_class_header:
1257 headers[provider.storage_class_header] = self.storage_class
1258 # TODO - What if provider doesn't support reduced reduncancy?
1259 # What if different providers provide different classes?
1260 if hasattr(fp, 'name'):
1261 self.path = fp.name
1262 if self.bucket is not None:
1263 if not md5 and provider.supports_chunked_transfer():
1264 # defer md5 calculation to on the fly and
1265 # we don't know anything about size yet.
1266 chunked_transfer = True
1267 self.size = None
1268 else:
1269 chunked_transfer = False
1270 if isinstance(fp, KeyFile):
1271 # Avoid EOF seek for KeyFile case as it's very inefficient.
1272 key = fp.getkey()
1273 size = key.size - fp.tell()
1274 self.size = size
1275 # At present both GCS and S3 use MD5 for the etag for
1276 # non-multipart-uploaded objects. If the etag is 32 hex
1277 # chars use it as an MD5, to avoid having to read the file
1278 # twice while transferring.
1279 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)):
1280 etag = key.etag.strip('"')
1281 md5 = (etag, base64.b64encode(binascii.unhexlify(etag)))
1282 if not md5:
1283 # compute_md5() and also set self.size to actual
1284 # size of the bytes read computing the md5.
1285 md5 = self.compute_md5(fp, size)
1286 # adjust size if required
1287 size = self.size
1288 elif size:
1289 self.size = size
1290 else:
1291 # If md5 is provided, still need to size so
1292 # calculate based on bytes to end of content
1293 spos = fp.tell()
1294 fp.seek(0, os.SEEK_END)
1295 self.size = fp.tell() - spos
1296 fp.seek(spos)
1297 size = self.size
1298 self.md5 = md5[0]
1299 self.base64md5 = md5[1]
1300
1301 if self.name is None:
1302 self.name = self.md5
1303 if not replace:
1304 if self.bucket.lookup(self.name):
1305 return
1306
1307 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb,
1308 query_args=query_args,
1309 chunked_transfer=chunked_transfer, size=size)
1310 # return number of bytes written.
1311 return self.size
1312
1313 def set_contents_from_filename(self, filename, headers=None, replace=True,
1314 cb=None, num_cb=10, policy=None, md5=None,
1315 reduced_redundancy=False,
1316 encrypt_key=False):
1317 """
1318 Store an object in S3 using the name of the Key object as the
1319 key in S3 and the contents of the file named by 'filename'.
1320 See set_contents_from_file method for details about the
1321 parameters.
1322
1323 :type filename: string
1324 :param filename: The name of the file that you want to put onto S3
1325
1326 :type headers: dict
1327 :param headers: Additional headers to pass along with the
1328 request to AWS.
1329
1330 :type replace: bool
1331 :param replace: If True, replaces the contents of the file
1332 if it already exists.
1333
1334 :type cb: function
1335 :param cb: a callback function that will be called to report
1336 progress on the upload. The callback should accept two
1337 integer parameters, the first representing the number of
1338 bytes that have been successfully transmitted to S3 and
1339 the second representing the size of the to be transmitted
1340 object.
1341
1342 :type cb: int
1343 :param num_cb: (optional) If a callback is specified with the
1344 cb parameter this parameter determines the granularity of
1345 the callback by defining the maximum number of times the
1346 callback will be called during the file transfer.
1347
1348 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1349 :param policy: A canned ACL policy that will be applied to the
1350 new key in S3.
1351
1352 :type md5: A tuple containing the hexdigest version of the MD5
1353 checksum of the file as the first element and the
1354 Base64-encoded version of the plain checksum as the second
1355 element. This is the same format returned by the
1356 compute_md5 method.
1357 :param md5: If you need to compute the MD5 for any reason
1358 prior to upload, it's silly to have to do it twice so this
1359 param, if present, will be used as the MD5 values of the
1360 file. Otherwise, the checksum will be computed.
1361
1362 :type reduced_redundancy: bool
1363 :param reduced_redundancy: If True, this will set the storage
1364 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1365 Redundancy Storage (RRS) feature of S3, provides lower
1366 redundancy at lower storage cost. :type encrypt_key: bool
1367 :param encrypt_key: If True, the new copy of the object
1368 will be encrypted on the server-side by S3 and will be
1369 stored in an encrypted form while at rest in S3.
1370
1371 :rtype: int
1372 :return: The number of bytes written to the key.
1373 """
1374 with open(filename, 'rb') as fp:
1375 return self.set_contents_from_file(fp, headers, replace, cb,
1376 num_cb, policy, md5,
1377 reduced_redundancy,
1378 encrypt_key=encrypt_key)
1379
1380 def set_contents_from_string(self, string_data, headers=None, replace=True,
1381 cb=None, num_cb=10, policy=None, md5=None,
1382 reduced_redundancy=False,
1383 encrypt_key=False):
1384 """
1385 Store an object in S3 using the name of the Key object as the
1386 key in S3 and the string 's' as the contents.
1387 See set_contents_from_file method for details about the
1388 parameters.
1389
1390 :type headers: dict
1391 :param headers: Additional headers to pass along with the
1392 request to AWS.
1393
1394 :type replace: bool
1395 :param replace: If True, replaces the contents of the file if
1396 it already exists.
1397
1398 :type cb: function
1399 :param cb: a callback function that will be called to report
1400 progress on the upload. The callback should accept two
1401 integer parameters, the first representing the number of
1402 bytes that have been successfully transmitted to S3 and
1403 the second representing the size of the to be transmitted
1404 object.
1405
1406 :type num_cb: int
1407 :param num_cb: (optional) If a callback is specified with the
1408 num_cb parameter this parameter determines the granularity of
1409 the callback by defining the maximum number of times the
1410 callback will be called during the file transfer.
1411
1412 :type policy: :class:`boto.s3.acl.CannedACLStrings`
1413 :param policy: A canned ACL policy that will be applied to the
1414 new key in S3.
1415
1416 :type md5: A tuple containing the hexdigest version of the MD5
1417 checksum of the file as the first element and the
1418 Base64-encoded version of the plain checksum as the second
1419 element. This is the same format returned by the
1420 compute_md5 method.
1421 :param md5: If you need to compute the MD5 for any reason
1422 prior to upload, it's silly to have to do it twice so this
1423 param, if present, will be used as the MD5 values of the
1424 file. Otherwise, the checksum will be computed.
1425
1426 :type reduced_redundancy: bool
1427 :param reduced_redundancy: If True, this will set the storage
1428 class of the new Key to be REDUCED_REDUNDANCY. The Reduced
1429 Redundancy Storage (RRS) feature of S3, provides lower
1430 redundancy at lower storage cost.
1431
1432 :type encrypt_key: bool
1433 :param encrypt_key: If True, the new copy of the object will
1434 be encrypted on the server-side by S3 and will be stored
1435 in an encrypted form while at rest in S3.
1436 """
1437 if not isinstance(string_data, bytes):
1438 string_data = string_data.encode("utf-8")
1439 fp = BytesIO(string_data)
1440 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb,
1441 policy, md5, reduced_redundancy,
1442 encrypt_key=encrypt_key)
1443 fp.close()
1444 return r
1445
1446 def get_file(self, fp, headers=None, cb=None, num_cb=10,
1447 torrent=False, version_id=None, override_num_retries=None,
1448 response_headers=None):
1449 """
1450 Retrieves a file from an S3 Key
1451
1452 :type fp: file
1453 :param fp: File pointer to put the data into
1454
1455 :type headers: string
1456 :param: headers to send when retrieving the files
1457
1458 :type cb: function
1459 :param cb: a callback function that will be called to report
1460 progress on the upload. The callback should accept two
1461 integer parameters, the first representing the number of
1462 bytes that have been successfully transmitted to S3 and
1463 the second representing the size of the to be transmitted
1464 object.
1465
1466 :type cb: int
1467 :param num_cb: (optional) If a callback is specified with the
1468 cb parameter this parameter determines the granularity of
1469 the callback by defining the maximum number of times the
1470 callback will be called during the file transfer.
1471
1472 :type torrent: bool
1473 :param torrent: Flag for whether to get a torrent for the file
1474
1475 :type override_num_retries: int
1476 :param override_num_retries: If not None will override configured
1477 num_retries parameter for underlying GET.
1478
1479 :type response_headers: dict
1480 :param response_headers: A dictionary containing HTTP
1481 headers/values that will override any headers associated
1482 with the stored object in the response. See
1483 http://goo.gl/EWOPb for details.
1484
1485 :type version_id: str
1486 :param version_id: The ID of a particular version of the object.
1487 If this parameter is not supplied but the Key object has
1488 a ``version_id`` attribute, that value will be used when
1489 retrieving the object. You can set the Key object's
1490 ``version_id`` attribute to None to always grab the latest
1491 version from a version-enabled bucket.
1492 """
1493 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb,
1494 torrent=torrent, version_id=version_id,
1495 override_num_retries=override_num_retries,
1496 response_headers=response_headers,
1497 hash_algs=None,
1498 query_args=None)
1499
1500 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10,
1501 torrent=False, version_id=None, override_num_retries=None,
1502 response_headers=None, hash_algs=None, query_args=None):
1503 if headers is None:
1504 headers = {}
1505 save_debug = self.bucket.connection.debug
1506 if self.bucket.connection.debug == 1:
1507 self.bucket.connection.debug = 0
1508
1509 query_args = query_args or []
1510 if torrent:
1511 query_args.append('torrent')
1512
1513 if hash_algs is None and not torrent:
1514 hash_algs = {'md5': md5}
1515 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {})
1516
1517 # If a version_id is passed in, use that. If not, check to see
1518 # if the Key object has an explicit version_id and, if so, use that.
1519 # Otherwise, don't pass a version_id query param.
1520 if version_id is None:
1521 version_id = self.version_id
1522 if version_id:
1523 query_args.append('versionId=%s' % version_id)
1524 if response_headers:
1525 for key in response_headers:
1526 query_args.append('%s=%s' % (
1527 key, urllib.parse.quote(response_headers[key])))
1528 query_args = '&'.join(query_args)
1529 self.open('r', headers, query_args=query_args,
1530 override_num_retries=override_num_retries)
1531
1532 data_len = 0
1533 if cb:
1534 if self.size is None:
1535 cb_size = 0
1536 else:
1537 cb_size = self.size
1538 if self.size is None and num_cb != -1:
1539 # If size is not available due to chunked transfer for example,
1540 # we'll call the cb for every 1MB of data transferred.
1541 cb_count = (1024 * 1024) / self.BufferSize
1542 elif num_cb > 1:
1543 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0)))
1544 elif num_cb < 0:
1545 cb_count = -1
1546 else:
1547 cb_count = 0
1548 i = 0
1549 cb(data_len, cb_size)
1550 try:
1551 for bytes in self:
1552 fp.write(bytes)
1553 data_len += len(bytes)
1554 for alg in digesters:
1555 digesters[alg].update(bytes)
1556 if cb:
1557 if cb_size > 0 and data_len >= cb_size:
1558 break
1559 i += 1
1560 if i == cb_count or cb_count == -1:
1561 cb(data_len, cb_size)
1562 i = 0
1563 except IOError as e:
1564 if e.errno == errno.ENOSPC:
1565 raise StorageDataError('Out of space for destination file '
1566 '%s' % fp.name)
1567 raise
1568 if cb and (cb_count <= 1 or i > 0) and data_len > 0:
1569 cb(data_len, cb_size)
1570 for alg in digesters:
1571 self.local_hashes[alg] = digesters[alg].digest()
1572 if self.size is None and not torrent and "Range" not in headers:
1573 self.size = data_len
1574 self.close()
1575 self.bucket.connection.debug = save_debug
1576
1577 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10):
1578 """
1579 Get a torrent file (see to get_file)
1580
1581 :type fp: file
1582 :param fp: The file pointer of where to put the torrent
1583
1584 :type headers: dict
1585 :param headers: Headers to be passed
1586
1587 :type cb: function
1588 :param cb: a callback function that will be called to report
1589 progress on the upload. The callback should accept two
1590 integer parameters, the first representing the number of
1591 bytes that have been successfully transmitted to S3 and
1592 the second representing the size of the to be transmitted
1593 object.
1594
1595 :type cb: int
1596 :param num_cb: (optional) If a callback is specified with the
1597 cb parameter this parameter determines the granularity of
1598 the callback by defining the maximum number of times the
1599 callback will be called during the file transfer.
1600
1601 """
1602 return self.get_file(fp, headers, cb, num_cb, torrent=True)
1603
1604 def get_contents_to_file(self, fp, headers=None,
1605 cb=None, num_cb=10,
1606 torrent=False,
1607 version_id=None,
1608 res_download_handler=None,
1609 response_headers=None):
1610 """
1611 Retrieve an object from S3 using the name of the Key object as the
1612 key in S3. Write the contents of the object to the file pointed
1613 to by 'fp'.
1614
1615 :type fp: File -like object
1616 :param fp:
1617
1618 :type headers: dict
1619 :param headers: additional HTTP headers that will be sent with
1620 the GET request.
1621
1622 :type cb: function
1623 :param cb: a callback function that will be called to report
1624 progress on the upload. The callback should accept two
1625 integer parameters, the first representing the number of
1626 bytes that have been successfully transmitted to S3 and
1627 the second representing the size of the to be transmitted
1628 object.
1629
1630 :type cb: int
1631 :param num_cb: (optional) If a callback is specified with the
1632 cb parameter this parameter determines the granularity of
1633 the callback by defining the maximum number of times the
1634 callback will be called during the file transfer.
1635
1636 :type torrent: bool
1637 :param torrent: If True, returns the contents of a torrent
1638 file as a string.
1639
1640 :type res_upload_handler: ResumableDownloadHandler
1641 :param res_download_handler: If provided, this handler will
1642 perform the download.
1643
1644 :type response_headers: dict
1645 :param response_headers: A dictionary containing HTTP
1646 headers/values that will override any headers associated
1647 with the stored object in the response. See
1648 http://goo.gl/EWOPb for details.
1649
1650 :type version_id: str
1651 :param version_id: The ID of a particular version of the object.
1652 If this parameter is not supplied but the Key object has
1653 a ``version_id`` attribute, that value will be used when
1654 retrieving the object. You can set the Key object's
1655 ``version_id`` attribute to None to always grab the latest
1656 version from a version-enabled bucket.
1657 """
1658 if self.bucket is not None:
1659 if res_download_handler:
1660 res_download_handler.get_file(self, fp, headers, cb, num_cb,
1661 torrent=torrent,
1662 version_id=version_id)
1663 else:
1664 self.get_file(fp, headers, cb, num_cb, torrent=torrent,
1665 version_id=version_id,
1666 response_headers=response_headers)
1667
1668 def get_contents_to_filename(self, filename, headers=None,
1669 cb=None, num_cb=10,
1670 torrent=False,
1671 version_id=None,
1672 res_download_handler=None,
1673 response_headers=None):
1674 """
1675 Retrieve an object from S3 using the name of the Key object as the
1676 key in S3. Store contents of the object to a file named by 'filename'.
1677 See get_contents_to_file method for details about the
1678 parameters.
1679
1680 :type filename: string
1681 :param filename: The filename of where to put the file contents
1682
1683 :type headers: dict
1684 :param headers: Any additional headers to send in the request
1685
1686 :type cb: function
1687 :param cb: a callback function that will be called to report
1688 progress on the upload. The callback should accept two
1689 integer parameters, the first representing the number of
1690 bytes that have been successfully transmitted to S3 and
1691 the second representing the size of the to be transmitted
1692 object.
1693
1694 :type num_cb: int
1695 :param num_cb: (optional) If a callback is specified with the
1696 cb parameter this parameter determines the granularity of
1697 the callback by defining the maximum number of times the
1698 callback will be called during the file transfer.
1699
1700 :type torrent: bool
1701 :param torrent: If True, returns the contents of a torrent file
1702 as a string.
1703
1704 :type res_upload_handler: ResumableDownloadHandler
1705 :param res_download_handler: If provided, this handler will
1706 perform the download.
1707
1708 :type response_headers: dict
1709 :param response_headers: A dictionary containing HTTP
1710 headers/values that will override any headers associated
1711 with the stored object in the response. See
1712 http://goo.gl/EWOPb for details.
1713
1714 :type version_id: str
1715 :param version_id: The ID of a particular version of the object.
1716 If this parameter is not supplied but the Key object has
1717 a ``version_id`` attribute, that value will be used when
1718 retrieving the object. You can set the Key object's
1719 ``version_id`` attribute to None to always grab the latest
1720 version from a version-enabled bucket.
1721 """
1722 try:
1723 with open(filename, 'wb') as fp:
1724 self.get_contents_to_file(fp, headers, cb, num_cb,
1725 torrent=torrent,
1726 version_id=version_id,
1727 res_download_handler=res_download_handler,
1728 response_headers=response_headers)
1729 except Exception:
1730 os.remove(filename)
1731 raise
1732 # if last_modified date was sent from s3, try to set file's timestamp
1733 if self.last_modified is not None:
1734 try:
1735 modified_tuple = email.utils.parsedate_tz(self.last_modified)
1736 modified_stamp = int(email.utils.mktime_tz(modified_tuple))
1737 os.utime(fp.name, (modified_stamp, modified_stamp))
1738 except Exception:
1739 pass
1740
1741 def get_contents_as_string(self, headers=None,
1742 cb=None, num_cb=10,
1743 torrent=False,
1744 version_id=None,
1745 response_headers=None, encoding=None):
1746 """
1747 Retrieve an object from S3 using the name of the Key object as the
1748 key in S3. Return the contents of the object as a string.
1749 See get_contents_to_file method for details about the
1750 parameters.
1751
1752 :type headers: dict
1753 :param headers: Any additional headers to send in the request
1754
1755 :type cb: function
1756 :param cb: a callback function that will be called to report
1757 progress on the upload. The callback should accept two
1758 integer parameters, the first representing the number of
1759 bytes that have been successfully transmitted to S3 and
1760 the second representing the size of the to be transmitted
1761 object.
1762
1763 :type cb: int
1764 :param num_cb: (optional) If a callback is specified with the
1765 cb parameter this parameter determines the granularity of
1766 the callback by defining the maximum number of times the
1767 callback will be called during the file transfer.
1768
1769 :type torrent: bool
1770 :param torrent: If True, returns the contents of a torrent file
1771 as a string.
1772
1773 :type response_headers: dict
1774 :param response_headers: A dictionary containing HTTP
1775 headers/values that will override any headers associated
1776 with the stored object in the response. See
1777 http://goo.gl/EWOPb for details.
1778
1779 :type version_id: str
1780 :param version_id: The ID of a particular version of the object.
1781 If this parameter is not supplied but the Key object has
1782 a ``version_id`` attribute, that value will be used when
1783 retrieving the object. You can set the Key object's
1784 ``version_id`` attribute to None to always grab the latest
1785 version from a version-enabled bucket.
1786
1787 :type encoding: str
1788 :param encoding: The text encoding to use, such as ``utf-8``
1789 or ``iso-8859-1``. If set, then a string will be returned.
1790 Defaults to ``None`` and returns bytes.
1791
1792 :rtype: bytes or str
1793 :returns: The contents of the file as bytes or a string
1794 """
1795 fp = BytesIO()
1796 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent,
1797 version_id=version_id,
1798 response_headers=response_headers)
1799 value = fp.getvalue()
1800
1801 if encoding is not None:
1802 value = value.decode(encoding)
1803
1804 return value
1805
1806 def add_email_grant(self, permission, email_address, headers=None):
1807 """
1808 Convenience method that provides a quick way to add an email grant
1809 to a key. This method retrieves the current ACL, creates a new
1810 grant based on the parameters passed in, adds that grant to the ACL
1811 and then PUT's the new ACL back to S3.
1812
1813 :type permission: string
1814 :param permission: The permission being granted. Should be one of:
1815 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1816
1817 :type email_address: string
1818 :param email_address: The email address associated with the AWS
1819 account your are granting the permission to.
1820
1821 :type recursive: boolean
1822 :param recursive: A boolean value to controls whether the
1823 command will apply the grant to all keys within the bucket
1824 or not. The default value is False. By passing a True
1825 value, the call will iterate through all keys in the
1826 bucket and apply the same grant to each key. CAUTION: If
1827 you have a lot of keys, this could take a long time!
1828 """
1829 policy = self.get_acl(headers=headers)
1830 policy.acl.add_email_grant(permission, email_address)
1831 self.set_acl(policy, headers=headers)
1832
1833 def add_user_grant(self, permission, user_id, headers=None,
1834 display_name=None):
1835 """
1836 Convenience method that provides a quick way to add a canonical
1837 user grant to a key. This method retrieves the current ACL,
1838 creates a new grant based on the parameters passed in, adds that
1839 grant to the ACL and then PUT's the new ACL back to S3.
1840
1841 :type permission: string
1842 :param permission: The permission being granted. Should be one of:
1843 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL).
1844
1845 :type user_id: string
1846 :param user_id: The canonical user id associated with the AWS
1847 account your are granting the permission to.
1848
1849 :type display_name: string
1850 :param display_name: An option string containing the user's
1851 Display Name. Only required on Walrus.
1852 """
1853 policy = self.get_acl(headers=headers)
1854 policy.acl.add_user_grant(permission, user_id,
1855 display_name=display_name)
1856 self.set_acl(policy, headers=headers)
1857
1858 def _normalize_metadata(self, metadata):
1859 if type(metadata) == set:
1860 norm_metadata = set()
1861 for k in metadata:
1862 norm_metadata.add(k.lower())
1863 else:
1864 norm_metadata = {}
1865 for k in metadata:
1866 norm_metadata[k.lower()] = metadata[k]
1867 return norm_metadata
1868
1869 def _get_remote_metadata(self, headers=None):
1870 """
1871 Extracts metadata from existing URI into a dict, so we can
1872 overwrite/delete from it to form the new set of metadata to apply to a
1873 key.
1874 """
1875 metadata = {}
1876 for underscore_name in self._underscore_base_user_settable_fields:
1877 if hasattr(self, underscore_name):
1878 value = getattr(self, underscore_name)
1879 if value:
1880 # Generate HTTP field name corresponding to "_" named field.
1881 field_name = underscore_name.replace('_', '-')
1882 metadata[field_name.lower()] = value
1883 # self.metadata contains custom metadata, which are all user-settable.
1884 prefix = self.provider.metadata_prefix
1885 for underscore_name in self.metadata:
1886 field_name = underscore_name.replace('_', '-')
1887 metadata['%s%s' % (prefix, field_name.lower())] = (
1888 self.metadata[underscore_name])
1889 return metadata
1890
1891 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl,
1892 headers=None):
1893 metadata_plus = self._normalize_metadata(metadata_plus)
1894 metadata_minus = self._normalize_metadata(metadata_minus)
1895 metadata = self._get_remote_metadata()
1896 metadata.update(metadata_plus)
1897 for h in metadata_minus:
1898 if h in metadata:
1899 del metadata[h]
1900 src_bucket = self.bucket
1901 # Boto prepends the meta prefix when adding headers, so strip prefix in
1902 # metadata before sending back in to copy_key() call.
1903 rewritten_metadata = {}
1904 for h in metadata:
1905 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')):
1906 rewritten_h = (h.replace('x-goog-meta-', '')
1907 .replace('x-amz-meta-', ''))
1908 else:
1909 rewritten_h = h
1910 rewritten_metadata[rewritten_h] = metadata[h]
1911 metadata = rewritten_metadata
1912 src_bucket.copy_key(self.name, self.bucket.name, self.name,
1913 metadata=metadata, preserve_acl=preserve_acl,
1914 headers=headers)
1915
1916 def restore(self, days, headers=None):
1917 """Restore an object from an archive.
1918
1919 :type days: int
1920 :param days: The lifetime of the restored object (must
1921 be at least 1 day). If the object is already restored
1922 then this parameter can be used to readjust the lifetime
1923 of the restored object. In this case, the days
1924 param is with respect to the initial time of the request.
1925 If the object has not been restored, this param is with
1926 respect to the completion time of the request.
1927
1928 """
1929 response = self.bucket.connection.make_request(
1930 'POST', self.bucket.name, self.name,
1931 data=self.RestoreBody % days,
1932 headers=headers, query_args='restore')
1933 if response.status not in (200, 202):
1934 provider = self.bucket.connection.provider
1935 raise provider.storage_response_error(response.status,
1936 response.reason,
1937 response.read())