Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boto/s3/key.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 # Copyright (c) 2006-2012 Mitch Garnaat http://garnaat.org/ | |
| 2 # Copyright (c) 2011, Nexenta Systems Inc. | |
| 3 # Copyright (c) 2012 Amazon.com, Inc. or its affiliates. All Rights Reserved | |
| 4 # | |
| 5 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 6 # copy of this software and associated documentation files (the | |
| 7 # "Software"), to deal in the Software without restriction, including | |
| 8 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 9 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 10 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 11 # lowing conditions: | |
| 12 # | |
| 13 # The above copyright notice and this permission notice shall be included | |
| 14 # in all copies or substantial portions of the Software. | |
| 15 # | |
| 16 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 17 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 18 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 19 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 20 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 21 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 22 # IN THE SOFTWARE. | |
| 23 import email.utils | |
| 24 import errno | |
| 25 import hashlib | |
| 26 import mimetypes | |
| 27 import os | |
| 28 import re | |
| 29 import base64 | |
| 30 import binascii | |
| 31 import math | |
| 32 from hashlib import md5 | |
| 33 import boto.utils | |
| 34 from boto.compat import BytesIO, six, urllib, encodebytes | |
| 35 | |
| 36 from boto.exception import BotoClientError | |
| 37 from boto.exception import StorageDataError | |
| 38 from boto.exception import PleaseRetryException | |
| 39 from boto.provider import Provider | |
| 40 from boto.s3.keyfile import KeyFile | |
| 41 from boto.s3.user import User | |
| 42 from boto import UserAgent | |
| 43 from boto.utils import compute_md5, compute_hash | |
| 44 from boto.utils import find_matching_headers | |
| 45 from boto.utils import merge_headers_by_name | |
| 46 | |
| 47 | |
| 48 class Key(object): | |
| 49 """ | |
| 50 Represents a key (object) in an S3 bucket. | |
| 51 | |
| 52 :ivar bucket: The parent :class:`boto.s3.bucket.Bucket`. | |
| 53 :ivar name: The name of this Key object. | |
| 54 :ivar metadata: A dictionary containing user metadata that you | |
| 55 wish to store with the object or that has been retrieved from | |
| 56 an existing object. | |
| 57 :ivar cache_control: The value of the `Cache-Control` HTTP header. | |
| 58 :ivar content_type: The value of the `Content-Type` HTTP header. | |
| 59 :ivar content_encoding: The value of the `Content-Encoding` HTTP header. | |
| 60 :ivar content_disposition: The value of the `Content-Disposition` HTTP | |
| 61 header. | |
| 62 :ivar content_language: The value of the `Content-Language` HTTP header. | |
| 63 :ivar etag: The `etag` associated with this object. | |
| 64 :ivar last_modified: The string timestamp representing the last | |
| 65 time this object was modified in S3. | |
| 66 :ivar owner: The ID of the owner of this object. | |
| 67 :ivar storage_class: The storage class of the object. Currently, one of: | |
| 68 STANDARD | REDUCED_REDUNDANCY | GLACIER | |
| 69 :ivar md5: The MD5 hash of the contents of the object. | |
| 70 :ivar size: The size, in bytes, of the object. | |
| 71 :ivar version_id: The version ID of this object, if it is a versioned | |
| 72 object. | |
| 73 :ivar encrypted: Whether the object is encrypted while at rest on | |
| 74 the server. | |
| 75 """ | |
| 76 | |
| 77 DefaultContentType = 'application/octet-stream' | |
| 78 | |
| 79 RestoreBody = """<?xml version="1.0" encoding="UTF-8"?> | |
| 80 <RestoreRequest xmlns="http://s3.amazonaws.com/doc/2006-03-01"> | |
| 81 <Days>%s</Days> | |
| 82 </RestoreRequest>""" | |
| 83 | |
| 84 | |
| 85 BufferSize = boto.config.getint('Boto', 'key_buffer_size', 8192) | |
| 86 | |
| 87 # The object metadata fields a user can set, other than custom metadata | |
| 88 # fields (i.e., those beginning with a provider-specific prefix like | |
| 89 # x-amz-meta). | |
| 90 base_user_settable_fields = set(["cache-control", "content-disposition", | |
| 91 "content-encoding", "content-language", | |
| 92 "content-md5", "content-type", | |
| 93 "x-robots-tag", "expires"]) | |
| 94 _underscore_base_user_settable_fields = set() | |
| 95 for f in base_user_settable_fields: | |
| 96 _underscore_base_user_settable_fields.add(f.replace('-', '_')) | |
| 97 # Metadata fields, whether user-settable or not, other than custom | |
| 98 # metadata fields (i.e., those beginning with a provider specific prefix | |
| 99 # like x-amz-meta). | |
| 100 base_fields = (base_user_settable_fields | | |
| 101 set(["last-modified", "content-length", "date", "etag"])) | |
| 102 | |
| 103 | |
| 104 | |
| 105 def __init__(self, bucket=None, name=None): | |
| 106 self.bucket = bucket | |
| 107 self.name = name | |
| 108 self.metadata = {} | |
| 109 self.cache_control = None | |
| 110 self.content_type = self.DefaultContentType | |
| 111 self.content_encoding = None | |
| 112 self.content_disposition = None | |
| 113 self.content_language = None | |
| 114 self.filename = None | |
| 115 self.etag = None | |
| 116 self.is_latest = False | |
| 117 self.last_modified = None | |
| 118 self.owner = None | |
| 119 self._storage_class = None | |
| 120 self.path = None | |
| 121 self.resp = None | |
| 122 self.mode = None | |
| 123 self.size = None | |
| 124 self.version_id = None | |
| 125 self.source_version_id = None | |
| 126 self.delete_marker = False | |
| 127 self.encrypted = None | |
| 128 # If the object is being restored, this attribute will be set to True. | |
| 129 # If the object is restored, it will be set to False. Otherwise this | |
| 130 # value will be None. If the restore is completed (ongoing_restore = | |
| 131 # False), the expiry_date will be populated with the expiry date of the | |
| 132 # restored object. | |
| 133 self.ongoing_restore = None | |
| 134 self.expiry_date = None | |
| 135 self.local_hashes = {} | |
| 136 | |
| 137 def __repr__(self): | |
| 138 if self.bucket: | |
| 139 name = u'<Key: %s,%s>' % (self.bucket.name, self.name) | |
| 140 else: | |
| 141 name = u'<Key: None,%s>' % self.name | |
| 142 | |
| 143 # Encode to bytes for Python 2 to prevent display decoding issues | |
| 144 if not isinstance(name, str): | |
| 145 name = name.encode('utf-8') | |
| 146 | |
| 147 return name | |
| 148 | |
| 149 def __iter__(self): | |
| 150 return self | |
| 151 | |
| 152 @property | |
| 153 def provider(self): | |
| 154 provider = None | |
| 155 if self.bucket and self.bucket.connection: | |
| 156 provider = self.bucket.connection.provider | |
| 157 return provider | |
| 158 | |
| 159 def _get_key(self): | |
| 160 return self.name | |
| 161 | |
| 162 def _set_key(self, value): | |
| 163 self.name = value | |
| 164 | |
| 165 key = property(_get_key, _set_key); | |
| 166 | |
| 167 def _get_md5(self): | |
| 168 if 'md5' in self.local_hashes and self.local_hashes['md5']: | |
| 169 return binascii.b2a_hex(self.local_hashes['md5']) | |
| 170 | |
| 171 def _set_md5(self, value): | |
| 172 if value: | |
| 173 self.local_hashes['md5'] = binascii.a2b_hex(value) | |
| 174 elif 'md5' in self.local_hashes: | |
| 175 self.local_hashes.pop('md5', None) | |
| 176 | |
| 177 md5 = property(_get_md5, _set_md5); | |
| 178 | |
| 179 def _get_base64md5(self): | |
| 180 if 'md5' in self.local_hashes and self.local_hashes['md5']: | |
| 181 md5 = self.local_hashes['md5'] | |
| 182 if not isinstance(md5, bytes): | |
| 183 md5 = md5.encode('utf-8') | |
| 184 return binascii.b2a_base64(md5).decode('utf-8').rstrip('\n') | |
| 185 | |
| 186 def _set_base64md5(self, value): | |
| 187 if value: | |
| 188 if not isinstance(value, six.string_types): | |
| 189 value = value.decode('utf-8') | |
| 190 self.local_hashes['md5'] = binascii.a2b_base64(value) | |
| 191 elif 'md5' in self.local_hashes: | |
| 192 del self.local_hashes['md5'] | |
| 193 | |
| 194 base64md5 = property(_get_base64md5, _set_base64md5); | |
| 195 | |
| 196 def _get_storage_class(self): | |
| 197 if self._storage_class is None and self.bucket: | |
| 198 # Attempt to fetch storage class | |
| 199 list_items = list(self.bucket.list(self.name.encode('utf-8'))) | |
| 200 if len(list_items) and getattr(list_items[0], '_storage_class', | |
| 201 None): | |
| 202 self._storage_class = list_items[0]._storage_class | |
| 203 else: | |
| 204 # Key is not yet saved? Just use default... | |
| 205 self._storage_class = 'STANDARD' | |
| 206 | |
| 207 return self._storage_class | |
| 208 | |
| 209 def _set_storage_class(self, value): | |
| 210 self._storage_class = value | |
| 211 | |
| 212 storage_class = property(_get_storage_class, _set_storage_class) | |
| 213 | |
| 214 def get_md5_from_hexdigest(self, md5_hexdigest): | |
| 215 """ | |
| 216 A utility function to create the 2-tuple (md5hexdigest, base64md5) | |
| 217 from just having a precalculated md5_hexdigest. | |
| 218 """ | |
| 219 digest = binascii.unhexlify(md5_hexdigest) | |
| 220 base64md5 = encodebytes(digest) | |
| 221 if base64md5[-1] == '\n': | |
| 222 base64md5 = base64md5[0:-1] | |
| 223 return (md5_hexdigest, base64md5) | |
| 224 | |
| 225 def handle_encryption_headers(self, resp): | |
| 226 provider = self.bucket.connection.provider | |
| 227 if provider.server_side_encryption_header: | |
| 228 self.encrypted = resp.getheader( | |
| 229 provider.server_side_encryption_header, None) | |
| 230 else: | |
| 231 self.encrypted = None | |
| 232 | |
| 233 def handle_storage_class_header(self, resp): | |
| 234 provider = self.bucket.connection.provider | |
| 235 if provider.storage_class_header: | |
| 236 self._storage_class = resp.getheader( | |
| 237 provider.storage_class_header, None) | |
| 238 if (self._storage_class is None and | |
| 239 provider.get_provider_name() == 'aws'): | |
| 240 # S3 docs for HEAD object requests say S3 will return this | |
| 241 # header for all objects except Standard storage class objects. | |
| 242 self._storage_class = 'STANDARD' | |
| 243 | |
| 244 | |
| 245 def handle_version_headers(self, resp, force=False): | |
| 246 provider = self.bucket.connection.provider | |
| 247 # If the Key object already has a version_id attribute value, it | |
| 248 # means that it represents an explicit version and the user is | |
| 249 # doing a get_contents_*(version_id=<foo>) to retrieve another | |
| 250 # version of the Key. In that case, we don't really want to | |
| 251 # overwrite the version_id in this Key object. Comprende? | |
| 252 if self.version_id is None or force: | |
| 253 self.version_id = resp.getheader(provider.version_id, None) | |
| 254 self.source_version_id = resp.getheader(provider.copy_source_version_id, | |
| 255 None) | |
| 256 if resp.getheader(provider.delete_marker, 'false') == 'true': | |
| 257 self.delete_marker = True | |
| 258 else: | |
| 259 self.delete_marker = False | |
| 260 | |
| 261 def handle_restore_headers(self, response): | |
| 262 provider = self.bucket.connection.provider | |
| 263 header = response.getheader(provider.restore_header) | |
| 264 if header is None: | |
| 265 return | |
| 266 parts = header.split(',', 1) | |
| 267 for part in parts: | |
| 268 key, val = [i.strip() for i in part.split('=')] | |
| 269 val = val.replace('"', '') | |
| 270 if key == 'ongoing-request': | |
| 271 self.ongoing_restore = True if val.lower() == 'true' else False | |
| 272 elif key == 'expiry-date': | |
| 273 self.expiry_date = val | |
| 274 | |
| 275 def handle_addl_headers(self, headers): | |
| 276 """ | |
| 277 Used by Key subclasses to do additional, provider-specific | |
| 278 processing of response headers. No-op for this base class. | |
| 279 """ | |
| 280 pass | |
| 281 | |
| 282 def open_read(self, headers=None, query_args='', | |
| 283 override_num_retries=None, response_headers=None): | |
| 284 """ | |
| 285 Open this key for reading | |
| 286 | |
| 287 :type headers: dict | |
| 288 :param headers: Headers to pass in the web request | |
| 289 | |
| 290 :type query_args: string | |
| 291 :param query_args: Arguments to pass in the query string | |
| 292 (ie, 'torrent') | |
| 293 | |
| 294 :type override_num_retries: int | |
| 295 :param override_num_retries: If not None will override configured | |
| 296 num_retries parameter for underlying GET. | |
| 297 | |
| 298 :type response_headers: dict | |
| 299 :param response_headers: A dictionary containing HTTP | |
| 300 headers/values that will override any headers associated | |
| 301 with the stored object in the response. See | |
| 302 http://goo.gl/EWOPb for details. | |
| 303 """ | |
| 304 if self.resp is None: | |
| 305 self.mode = 'r' | |
| 306 | |
| 307 provider = self.bucket.connection.provider | |
| 308 self.resp = self.bucket.connection.make_request( | |
| 309 'GET', self.bucket.name, self.name, headers, | |
| 310 query_args=query_args, | |
| 311 override_num_retries=override_num_retries) | |
| 312 if self.resp.status < 199 or self.resp.status > 299: | |
| 313 body = self.resp.read() | |
| 314 raise provider.storage_response_error(self.resp.status, | |
| 315 self.resp.reason, body) | |
| 316 response_headers = self.resp.msg | |
| 317 self.metadata = boto.utils.get_aws_metadata(response_headers, | |
| 318 provider) | |
| 319 for name, value in response_headers.items(): | |
| 320 # To get correct size for Range GETs, use Content-Range | |
| 321 # header if one was returned. If not, use Content-Length | |
| 322 # header. | |
| 323 if (name.lower() == 'content-length' and | |
| 324 'Content-Range' not in response_headers): | |
| 325 self.size = int(value) | |
| 326 elif name.lower() == 'content-range': | |
| 327 end_range = re.sub('.*/(.*)', '\\1', value) | |
| 328 self.size = int(end_range) | |
| 329 elif name.lower() in Key.base_fields: | |
| 330 self.__dict__[name.lower().replace('-', '_')] = value | |
| 331 self.handle_version_headers(self.resp) | |
| 332 self.handle_encryption_headers(self.resp) | |
| 333 self.handle_restore_headers(self.resp) | |
| 334 self.handle_addl_headers(self.resp.getheaders()) | |
| 335 | |
| 336 def open_write(self, headers=None, override_num_retries=None): | |
| 337 """ | |
| 338 Open this key for writing. | |
| 339 Not yet implemented | |
| 340 | |
| 341 :type headers: dict | |
| 342 :param headers: Headers to pass in the write request | |
| 343 | |
| 344 :type override_num_retries: int | |
| 345 :param override_num_retries: If not None will override configured | |
| 346 num_retries parameter for underlying PUT. | |
| 347 """ | |
| 348 raise BotoClientError('Not Implemented') | |
| 349 | |
| 350 def open(self, mode='r', headers=None, query_args=None, | |
| 351 override_num_retries=None): | |
| 352 if mode == 'r': | |
| 353 self.mode = 'r' | |
| 354 self.open_read(headers=headers, query_args=query_args, | |
| 355 override_num_retries=override_num_retries) | |
| 356 elif mode == 'w': | |
| 357 self.mode = 'w' | |
| 358 self.open_write(headers=headers, | |
| 359 override_num_retries=override_num_retries) | |
| 360 else: | |
| 361 raise BotoClientError('Invalid mode: %s' % mode) | |
| 362 | |
| 363 closed = False | |
| 364 | |
| 365 def close(self, fast=False): | |
| 366 """ | |
| 367 Close this key. | |
| 368 | |
| 369 :type fast: bool | |
| 370 :param fast: True if you want the connection to be closed without first | |
| 371 reading the content. This should only be used in cases where subsequent | |
| 372 calls don't need to return the content from the open HTTP connection. | |
| 373 Note: As explained at | |
| 374 http://docs.python.org/2/library/httplib.html#httplib.HTTPConnection.getresponse, | |
| 375 callers must read the whole response before sending a new request to the | |
| 376 server. Calling Key.close(fast=True) and making a subsequent request to | |
| 377 the server will work because boto will get an httplib exception and | |
| 378 close/reopen the connection. | |
| 379 | |
| 380 """ | |
| 381 if self.resp and not fast: | |
| 382 self.resp.read() | |
| 383 self.resp = None | |
| 384 self.mode = None | |
| 385 self.closed = True | |
| 386 | |
| 387 def next(self): | |
| 388 """ | |
| 389 By providing a next method, the key object supports use as an iterator. | |
| 390 For example, you can now say: | |
| 391 | |
| 392 for bytes in key: | |
| 393 write bytes to a file or whatever | |
| 394 | |
| 395 All of the HTTP connection stuff is handled for you. | |
| 396 """ | |
| 397 self.open_read() | |
| 398 data = self.resp.read(self.BufferSize) | |
| 399 if not data: | |
| 400 self.close() | |
| 401 raise StopIteration | |
| 402 return data | |
| 403 | |
| 404 # Python 3 iterator support | |
| 405 __next__ = next | |
| 406 | |
| 407 def read(self, size=0): | |
| 408 self.open_read() | |
| 409 if size == 0: | |
| 410 data = self.resp.read() | |
| 411 else: | |
| 412 data = self.resp.read(size) | |
| 413 if not data: | |
| 414 self.close() | |
| 415 return data | |
| 416 | |
| 417 def change_storage_class(self, new_storage_class, dst_bucket=None, | |
| 418 validate_dst_bucket=True): | |
| 419 """ | |
| 420 Change the storage class of an existing key. | |
| 421 Depending on whether a different destination bucket is supplied | |
| 422 or not, this will either move the item within the bucket, preserving | |
| 423 all metadata and ACL info bucket changing the storage class or it | |
| 424 will copy the item to the provided destination bucket, also | |
| 425 preserving metadata and ACL info. | |
| 426 | |
| 427 :type new_storage_class: string | |
| 428 :param new_storage_class: The new storage class for the Key. | |
| 429 Possible values are: | |
| 430 * STANDARD | |
| 431 * REDUCED_REDUNDANCY | |
| 432 | |
| 433 :type dst_bucket: string | |
| 434 :param dst_bucket: The name of a destination bucket. If not | |
| 435 provided the current bucket of the key will be used. | |
| 436 | |
| 437 :type validate_dst_bucket: bool | |
| 438 :param validate_dst_bucket: If True, will validate the dst_bucket | |
| 439 by using an extra list request. | |
| 440 """ | |
| 441 bucket_name = dst_bucket or self.bucket.name | |
| 442 if new_storage_class == 'STANDARD': | |
| 443 return self.copy(bucket_name, self.name, | |
| 444 reduced_redundancy=False, preserve_acl=True, | |
| 445 validate_dst_bucket=validate_dst_bucket) | |
| 446 elif new_storage_class == 'REDUCED_REDUNDANCY': | |
| 447 return self.copy(bucket_name, self.name, | |
| 448 reduced_redundancy=True, preserve_acl=True, | |
| 449 validate_dst_bucket=validate_dst_bucket) | |
| 450 else: | |
| 451 raise BotoClientError('Invalid storage class: %s' % | |
| 452 new_storage_class) | |
| 453 | |
| 454 def copy(self, dst_bucket, dst_key, metadata=None, | |
| 455 reduced_redundancy=False, preserve_acl=False, | |
| 456 encrypt_key=False, validate_dst_bucket=True): | |
| 457 """ | |
| 458 Copy this Key to another bucket. | |
| 459 | |
| 460 :type dst_bucket: string | |
| 461 :param dst_bucket: The name of the destination bucket | |
| 462 | |
| 463 :type dst_key: string | |
| 464 :param dst_key: The name of the destination key | |
| 465 | |
| 466 :type metadata: dict | |
| 467 :param metadata: Metadata to be associated with new key. If | |
| 468 metadata is supplied, it will replace the metadata of the | |
| 469 source key being copied. If no metadata is supplied, the | |
| 470 source key's metadata will be copied to the new key. | |
| 471 | |
| 472 :type reduced_redundancy: bool | |
| 473 :param reduced_redundancy: If True, this will force the | |
| 474 storage class of the new Key to be REDUCED_REDUNDANCY | |
| 475 regardless of the storage class of the key being copied. | |
| 476 The Reduced Redundancy Storage (RRS) feature of S3, | |
| 477 provides lower redundancy at lower storage cost. | |
| 478 | |
| 479 :type preserve_acl: bool | |
| 480 :param preserve_acl: If True, the ACL from the source key will | |
| 481 be copied to the destination key. If False, the | |
| 482 destination key will have the default ACL. Note that | |
| 483 preserving the ACL in the new key object will require two | |
| 484 additional API calls to S3, one to retrieve the current | |
| 485 ACL and one to set that ACL on the new object. If you | |
| 486 don't care about the ACL, a value of False will be | |
| 487 significantly more efficient. | |
| 488 | |
| 489 :type encrypt_key: bool | |
| 490 :param encrypt_key: If True, the new copy of the object will | |
| 491 be encrypted on the server-side by S3 and will be stored | |
| 492 in an encrypted form while at rest in S3. | |
| 493 | |
| 494 :type validate_dst_bucket: bool | |
| 495 :param validate_dst_bucket: If True, will validate the dst_bucket | |
| 496 by using an extra list request. | |
| 497 | |
| 498 :rtype: :class:`boto.s3.key.Key` or subclass | |
| 499 :returns: An instance of the newly created key object | |
| 500 """ | |
| 501 dst_bucket = self.bucket.connection.lookup(dst_bucket, | |
| 502 validate_dst_bucket) | |
| 503 if reduced_redundancy: | |
| 504 storage_class = 'REDUCED_REDUNDANCY' | |
| 505 else: | |
| 506 storage_class = self.storage_class | |
| 507 return dst_bucket.copy_key(dst_key, self.bucket.name, | |
| 508 self.name, metadata, | |
| 509 storage_class=storage_class, | |
| 510 preserve_acl=preserve_acl, | |
| 511 encrypt_key=encrypt_key, | |
| 512 src_version_id=self.version_id) | |
| 513 | |
| 514 def startElement(self, name, attrs, connection): | |
| 515 if name == 'Owner': | |
| 516 self.owner = User(self) | |
| 517 return self.owner | |
| 518 else: | |
| 519 return None | |
| 520 | |
| 521 def endElement(self, name, value, connection): | |
| 522 if name == 'Key': | |
| 523 self.name = value | |
| 524 elif name == 'ETag': | |
| 525 self.etag = value | |
| 526 elif name == 'IsLatest': | |
| 527 if value == 'true': | |
| 528 self.is_latest = True | |
| 529 else: | |
| 530 self.is_latest = False | |
| 531 elif name == 'LastModified': | |
| 532 self.last_modified = value | |
| 533 elif name == 'Size': | |
| 534 self.size = int(value) | |
| 535 elif name == 'StorageClass': | |
| 536 self.storage_class = value | |
| 537 elif name == 'Owner': | |
| 538 pass | |
| 539 elif name == 'VersionId': | |
| 540 self.version_id = value | |
| 541 else: | |
| 542 setattr(self, name, value) | |
| 543 | |
| 544 def exists(self, headers=None): | |
| 545 """ | |
| 546 Returns True if the key exists | |
| 547 | |
| 548 :rtype: bool | |
| 549 :return: Whether the key exists on S3 | |
| 550 """ | |
| 551 return bool(self.bucket.lookup(self.name, headers=headers)) | |
| 552 | |
| 553 def delete(self, headers=None): | |
| 554 """ | |
| 555 Delete this key from S3 | |
| 556 """ | |
| 557 return self.bucket.delete_key(self.name, version_id=self.version_id, | |
| 558 headers=headers) | |
| 559 | |
| 560 def get_metadata(self, name): | |
| 561 return self.metadata.get(name) | |
| 562 | |
| 563 def set_metadata(self, name, value): | |
| 564 # Ensure that metadata that is vital to signing is in the correct | |
| 565 # case. Applies to ``Content-Type`` & ``Content-MD5``. | |
| 566 if name.lower() == 'content-type': | |
| 567 self.metadata['Content-Type'] = value | |
| 568 elif name.lower() == 'content-md5': | |
| 569 self.metadata['Content-MD5'] = value | |
| 570 else: | |
| 571 self.metadata[name] = value | |
| 572 if name.lower() in Key.base_user_settable_fields: | |
| 573 self.__dict__[name.lower().replace('-', '_')] = value | |
| 574 | |
| 575 def update_metadata(self, d): | |
| 576 self.metadata.update(d) | |
| 577 | |
| 578 # convenience methods for setting/getting ACL | |
| 579 def set_acl(self, acl_str, headers=None): | |
| 580 if self.bucket is not None: | |
| 581 self.bucket.set_acl(acl_str, self.name, headers=headers) | |
| 582 | |
| 583 def get_acl(self, headers=None): | |
| 584 if self.bucket is not None: | |
| 585 return self.bucket.get_acl(self.name, headers=headers) | |
| 586 | |
| 587 def get_xml_acl(self, headers=None): | |
| 588 if self.bucket is not None: | |
| 589 return self.bucket.get_xml_acl(self.name, headers=headers) | |
| 590 | |
| 591 def set_xml_acl(self, acl_str, headers=None): | |
| 592 if self.bucket is not None: | |
| 593 return self.bucket.set_xml_acl(acl_str, self.name, headers=headers) | |
| 594 | |
| 595 def set_canned_acl(self, acl_str, headers=None): | |
| 596 return self.bucket.set_canned_acl(acl_str, self.name, headers) | |
| 597 | |
| 598 def get_redirect(self): | |
| 599 """Return the redirect location configured for this key. | |
| 600 | |
| 601 If no redirect is configured (via set_redirect), then None | |
| 602 will be returned. | |
| 603 | |
| 604 """ | |
| 605 response = self.bucket.connection.make_request( | |
| 606 'HEAD', self.bucket.name, self.name) | |
| 607 if response.status == 200: | |
| 608 return response.getheader('x-amz-website-redirect-location') | |
| 609 else: | |
| 610 raise self.provider.storage_response_error( | |
| 611 response.status, response.reason, response.read()) | |
| 612 | |
| 613 def set_redirect(self, redirect_location, headers=None): | |
| 614 """Configure this key to redirect to another location. | |
| 615 | |
| 616 When the bucket associated with this key is accessed from the website | |
| 617 endpoint, a 301 redirect will be issued to the specified | |
| 618 `redirect_location`. | |
| 619 | |
| 620 :type redirect_location: string | |
| 621 :param redirect_location: The location to redirect. | |
| 622 | |
| 623 """ | |
| 624 if headers is None: | |
| 625 headers = {} | |
| 626 else: | |
| 627 headers = headers.copy() | |
| 628 | |
| 629 headers['x-amz-website-redirect-location'] = redirect_location | |
| 630 response = self.bucket.connection.make_request('PUT', self.bucket.name, | |
| 631 self.name, headers) | |
| 632 if response.status == 200: | |
| 633 return True | |
| 634 else: | |
| 635 raise self.provider.storage_response_error( | |
| 636 response.status, response.reason, response.read()) | |
| 637 | |
| 638 def make_public(self, headers=None): | |
| 639 return self.bucket.set_canned_acl('public-read', self.name, headers) | |
| 640 | |
| 641 def generate_url(self, expires_in, method='GET', headers=None, | |
| 642 query_auth=True, force_http=False, response_headers=None, | |
| 643 expires_in_absolute=False, version_id=None, | |
| 644 policy=None, reduced_redundancy=False, encrypt_key=False): | |
| 645 """ | |
| 646 Generate a URL to access this key. | |
| 647 | |
| 648 :type expires_in: int | |
| 649 :param expires_in: How long the url is valid for, in seconds. | |
| 650 | |
| 651 :type method: string | |
| 652 :param method: The method to use for retrieving the file | |
| 653 (default is GET). | |
| 654 | |
| 655 :type headers: dict | |
| 656 :param headers: Any headers to pass along in the request. | |
| 657 | |
| 658 :type query_auth: bool | |
| 659 :param query_auth: If True, signs the request in the URL. | |
| 660 | |
| 661 :type force_http: bool | |
| 662 :param force_http: If True, http will be used instead of https. | |
| 663 | |
| 664 :type response_headers: dict | |
| 665 :param response_headers: A dictionary containing HTTP | |
| 666 headers/values that will override any headers associated | |
| 667 with the stored object in the response. See | |
| 668 http://goo.gl/EWOPb for details. | |
| 669 | |
| 670 :type expires_in_absolute: bool | |
| 671 :param expires_in_absolute: | |
| 672 | |
| 673 :type version_id: string | |
| 674 :param version_id: The version_id of the object to GET. If specified | |
| 675 this overrides any value in the key. | |
| 676 | |
| 677 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
| 678 :param policy: A canned ACL policy that will be applied to the | |
| 679 new key in S3. | |
| 680 | |
| 681 :type reduced_redundancy: bool | |
| 682 :param reduced_redundancy: If True, this will set the storage | |
| 683 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
| 684 Redundancy Storage (RRS) feature of S3, provides lower | |
| 685 redundancy at lower storage cost. | |
| 686 | |
| 687 :type encrypt_key: bool | |
| 688 :param encrypt_key: If True, the new copy of the object will | |
| 689 be encrypted on the server-side by S3 and will be stored | |
| 690 in an encrypted form while at rest in S3. | |
| 691 | |
| 692 :rtype: string | |
| 693 :return: The URL to access the key | |
| 694 """ | |
| 695 provider = self.bucket.connection.provider | |
| 696 version_id = version_id or self.version_id | |
| 697 if headers is None: | |
| 698 headers = {} | |
| 699 else: | |
| 700 headers = headers.copy() | |
| 701 | |
| 702 # add headers accordingly (usually PUT case) | |
| 703 if policy: | |
| 704 headers[provider.acl_header] = policy | |
| 705 if reduced_redundancy: | |
| 706 self.storage_class = 'REDUCED_REDUNDANCY' | |
| 707 if provider.storage_class_header: | |
| 708 headers[provider.storage_class_header] = self.storage_class | |
| 709 if encrypt_key: | |
| 710 headers[provider.server_side_encryption_header] = 'AES256' | |
| 711 headers = boto.utils.merge_meta(headers, self.metadata, provider) | |
| 712 | |
| 713 return self.bucket.connection.generate_url(expires_in, method, | |
| 714 self.bucket.name, self.name, | |
| 715 headers, query_auth, | |
| 716 force_http, | |
| 717 response_headers, | |
| 718 expires_in_absolute, | |
| 719 version_id) | |
| 720 | |
| 721 def send_file(self, fp, headers=None, cb=None, num_cb=10, | |
| 722 query_args=None, chunked_transfer=False, size=None): | |
| 723 """ | |
| 724 Upload a file to a key into a bucket on S3. | |
| 725 | |
| 726 :type fp: file | |
| 727 :param fp: The file pointer to upload. The file pointer must | |
| 728 point at the offset from which you wish to upload. | |
| 729 ie. if uploading the full file, it should point at the | |
| 730 start of the file. Normally when a file is opened for | |
| 731 reading, the fp will point at the first byte. See the | |
| 732 bytes parameter below for more info. | |
| 733 | |
| 734 :type headers: dict | |
| 735 :param headers: The headers to pass along with the PUT request | |
| 736 | |
| 737 :type num_cb: int | |
| 738 :param num_cb: (optional) If a callback is specified with the | |
| 739 cb parameter this parameter determines the granularity of | |
| 740 the callback by defining the maximum number of times the | |
| 741 callback will be called during the file | |
| 742 transfer. Providing a negative integer will cause your | |
| 743 callback to be called with each buffer read. | |
| 744 | |
| 745 :type query_args: string | |
| 746 :param query_args: (optional) Arguments to pass in the query string. | |
| 747 | |
| 748 :type chunked_transfer: boolean | |
| 749 :param chunked_transfer: (optional) If true, we use chunked | |
| 750 Transfer-Encoding. | |
| 751 | |
| 752 :type size: int | |
| 753 :param size: (optional) The Maximum number of bytes to read | |
| 754 from the file pointer (fp). This is useful when uploading | |
| 755 a file in multiple parts where you are splitting the file | |
| 756 up into different ranges to be uploaded. If not specified, | |
| 757 the default behaviour is to read all bytes from the file | |
| 758 pointer. Less bytes may be available. | |
| 759 """ | |
| 760 self._send_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
| 761 query_args=query_args, | |
| 762 chunked_transfer=chunked_transfer, size=size) | |
| 763 | |
| 764 def _send_file_internal(self, fp, headers=None, cb=None, num_cb=10, | |
| 765 query_args=None, chunked_transfer=False, size=None, | |
| 766 hash_algs=None): | |
| 767 provider = self.bucket.connection.provider | |
| 768 try: | |
| 769 spos = fp.tell() | |
| 770 except IOError: | |
| 771 spos = None | |
| 772 self.read_from_stream = False | |
| 773 | |
| 774 # If hash_algs is unset and the MD5 hasn't already been computed, | |
| 775 # default to an MD5 hash_alg to hash the data on-the-fly. | |
| 776 if hash_algs is None and not self.md5: | |
| 777 hash_algs = {'md5': md5} | |
| 778 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) | |
| 779 | |
| 780 def sender(http_conn, method, path, data, headers): | |
| 781 # This function is called repeatedly for temporary retries | |
| 782 # so we must be sure the file pointer is pointing at the | |
| 783 # start of the data. | |
| 784 if spos is not None and spos != fp.tell(): | |
| 785 fp.seek(spos) | |
| 786 elif spos is None and self.read_from_stream: | |
| 787 # if seek is not supported, and we've read from this | |
| 788 # stream already, then we need to abort retries to | |
| 789 # avoid setting bad data. | |
| 790 raise provider.storage_data_error( | |
| 791 'Cannot retry failed request. fp does not support seeking.') | |
| 792 | |
| 793 # If the caller explicitly specified host header, tell putrequest | |
| 794 # not to add a second host header. Similarly for accept-encoding. | |
| 795 skips = {} | |
| 796 if boto.utils.find_matching_headers('host', headers): | |
| 797 skips['skip_host'] = 1 | |
| 798 if boto.utils.find_matching_headers('accept-encoding', headers): | |
| 799 skips['skip_accept_encoding'] = 1 | |
| 800 http_conn.putrequest(method, path, **skips) | |
| 801 for key in headers: | |
| 802 http_conn.putheader(key, headers[key]) | |
| 803 http_conn.endheaders() | |
| 804 | |
| 805 save_debug = self.bucket.connection.debug | |
| 806 self.bucket.connection.debug = 0 | |
| 807 # If the debuglevel < 4 we don't want to show connection | |
| 808 # payload, so turn off HTTP connection-level debug output (to | |
| 809 # be restored below). | |
| 810 # Use the getattr approach to allow this to work in AppEngine. | |
| 811 if getattr(http_conn, 'debuglevel', 0) < 4: | |
| 812 http_conn.set_debuglevel(0) | |
| 813 | |
| 814 data_len = 0 | |
| 815 if cb: | |
| 816 if size: | |
| 817 cb_size = size | |
| 818 elif self.size: | |
| 819 cb_size = self.size | |
| 820 else: | |
| 821 cb_size = 0 | |
| 822 if chunked_transfer and cb_size == 0: | |
| 823 # For chunked Transfer, we call the cb for every 1MB | |
| 824 # of data transferred, except when we know size. | |
| 825 cb_count = (1024 * 1024) / self.BufferSize | |
| 826 elif num_cb > 1: | |
| 827 cb_count = int( | |
| 828 math.ceil(cb_size / self.BufferSize / (num_cb - 1.0))) | |
| 829 elif num_cb < 0: | |
| 830 cb_count = -1 | |
| 831 else: | |
| 832 cb_count = 0 | |
| 833 i = 0 | |
| 834 cb(data_len, cb_size) | |
| 835 | |
| 836 bytes_togo = size | |
| 837 if bytes_togo and bytes_togo < self.BufferSize: | |
| 838 chunk = fp.read(bytes_togo) | |
| 839 else: | |
| 840 chunk = fp.read(self.BufferSize) | |
| 841 | |
| 842 if not isinstance(chunk, bytes): | |
| 843 chunk = chunk.encode('utf-8') | |
| 844 | |
| 845 if spos is None: | |
| 846 # read at least something from a non-seekable fp. | |
| 847 self.read_from_stream = True | |
| 848 while chunk: | |
| 849 chunk_len = len(chunk) | |
| 850 data_len += chunk_len | |
| 851 if chunked_transfer: | |
| 852 http_conn.send('%x;\r\n' % chunk_len) | |
| 853 http_conn.send(chunk) | |
| 854 http_conn.send('\r\n') | |
| 855 else: | |
| 856 http_conn.send(chunk) | |
| 857 for alg in digesters: | |
| 858 digesters[alg].update(chunk) | |
| 859 if bytes_togo: | |
| 860 bytes_togo -= chunk_len | |
| 861 if bytes_togo <= 0: | |
| 862 break | |
| 863 if cb: | |
| 864 i += 1 | |
| 865 if i == cb_count or cb_count == -1: | |
| 866 cb(data_len, cb_size) | |
| 867 i = 0 | |
| 868 if bytes_togo and bytes_togo < self.BufferSize: | |
| 869 chunk = fp.read(bytes_togo) | |
| 870 else: | |
| 871 chunk = fp.read(self.BufferSize) | |
| 872 | |
| 873 if not isinstance(chunk, bytes): | |
| 874 chunk = chunk.encode('utf-8') | |
| 875 | |
| 876 self.size = data_len | |
| 877 | |
| 878 for alg in digesters: | |
| 879 self.local_hashes[alg] = digesters[alg].digest() | |
| 880 | |
| 881 if chunked_transfer: | |
| 882 http_conn.send('0\r\n') | |
| 883 # http_conn.send("Content-MD5: %s\r\n" % self.base64md5) | |
| 884 http_conn.send('\r\n') | |
| 885 | |
| 886 if cb and (cb_count <= 1 or i > 0) and data_len > 0: | |
| 887 cb(data_len, cb_size) | |
| 888 | |
| 889 http_conn.set_debuglevel(save_debug) | |
| 890 self.bucket.connection.debug = save_debug | |
| 891 response = http_conn.getresponse() | |
| 892 body = response.read() | |
| 893 | |
| 894 if not self.should_retry(response, chunked_transfer): | |
| 895 raise provider.storage_response_error( | |
| 896 response.status, response.reason, body) | |
| 897 | |
| 898 return response | |
| 899 | |
| 900 if not headers: | |
| 901 headers = {} | |
| 902 else: | |
| 903 headers = headers.copy() | |
| 904 # Overwrite user-supplied user-agent. | |
| 905 for header in find_matching_headers('User-Agent', headers): | |
| 906 del headers[header] | |
| 907 headers['User-Agent'] = UserAgent | |
| 908 # If storage_class is None, then a user has not explicitly requested | |
| 909 # a storage class, so we can assume STANDARD here | |
| 910 if self._storage_class not in [None, 'STANDARD']: | |
| 911 headers[provider.storage_class_header] = self.storage_class | |
| 912 if find_matching_headers('Content-Encoding', headers): | |
| 913 self.content_encoding = merge_headers_by_name( | |
| 914 'Content-Encoding', headers) | |
| 915 if find_matching_headers('Content-Language', headers): | |
| 916 self.content_language = merge_headers_by_name( | |
| 917 'Content-Language', headers) | |
| 918 content_type_headers = find_matching_headers('Content-Type', headers) | |
| 919 if content_type_headers: | |
| 920 # Some use cases need to suppress sending of the Content-Type | |
| 921 # header and depend on the receiving server to set the content | |
| 922 # type. This can be achieved by setting headers['Content-Type'] | |
| 923 # to None when calling this method. | |
| 924 if (len(content_type_headers) == 1 and | |
| 925 headers[content_type_headers[0]] is None): | |
| 926 # Delete null Content-Type value to skip sending that header. | |
| 927 del headers[content_type_headers[0]] | |
| 928 else: | |
| 929 self.content_type = merge_headers_by_name( | |
| 930 'Content-Type', headers) | |
| 931 elif self.path: | |
| 932 self.content_type = mimetypes.guess_type(self.path)[0] | |
| 933 if self.content_type is None: | |
| 934 self.content_type = self.DefaultContentType | |
| 935 headers['Content-Type'] = self.content_type | |
| 936 else: | |
| 937 headers['Content-Type'] = self.content_type | |
| 938 if self.base64md5: | |
| 939 headers['Content-MD5'] = self.base64md5 | |
| 940 if chunked_transfer: | |
| 941 headers['Transfer-Encoding'] = 'chunked' | |
| 942 #if not self.base64md5: | |
| 943 # headers['Trailer'] = "Content-MD5" | |
| 944 else: | |
| 945 headers['Content-Length'] = str(self.size) | |
| 946 # This is terrible. We need a SHA256 of the body for SigV4, but to do | |
| 947 # the chunked ``sender`` behavior above, the ``fp`` isn't available to | |
| 948 # the auth mechanism (because closures). Detect if it's SigV4 & embelish | |
| 949 # while we can before the auth calculations occur. | |
| 950 if 'hmac-v4-s3' in self.bucket.connection._required_auth_capability(): | |
| 951 kwargs = {'fp': fp, 'hash_algorithm': hashlib.sha256} | |
| 952 if size is not None: | |
| 953 kwargs['size'] = size | |
| 954 headers['_sha256'] = compute_hash(**kwargs)[0] | |
| 955 headers['Expect'] = '100-Continue' | |
| 956 headers = boto.utils.merge_meta(headers, self.metadata, provider) | |
| 957 resp = self.bucket.connection.make_request( | |
| 958 'PUT', | |
| 959 self.bucket.name, | |
| 960 self.name, | |
| 961 headers, | |
| 962 sender=sender, | |
| 963 query_args=query_args | |
| 964 ) | |
| 965 self.handle_version_headers(resp, force=True) | |
| 966 self.handle_addl_headers(resp.getheaders()) | |
| 967 | |
| 968 def should_retry(self, response, chunked_transfer=False): | |
| 969 provider = self.bucket.connection.provider | |
| 970 | |
| 971 if not chunked_transfer: | |
| 972 if response.status in [500, 503]: | |
| 973 # 500 & 503 can be plain retries. | |
| 974 return True | |
| 975 | |
| 976 if response.getheader('location'): | |
| 977 # If there's a redirect, plain retry. | |
| 978 return True | |
| 979 | |
| 980 if 200 <= response.status <= 299: | |
| 981 self.etag = response.getheader('etag') | |
| 982 md5 = self.md5 | |
| 983 if isinstance(md5, bytes): | |
| 984 md5 = md5.decode('utf-8') | |
| 985 | |
| 986 # If you use customer-provided encryption keys, the ETag value that | |
| 987 # Amazon S3 returns in the response will not be the MD5 of the | |
| 988 # object. | |
| 989 amz_server_side_encryption_customer_algorithm = response.getheader( | |
| 990 'x-amz-server-side-encryption-customer-algorithm', None) | |
| 991 # The same is applicable for KMS-encrypted objects in gs buckets. | |
| 992 goog_customer_managed_encryption = response.getheader( | |
| 993 'x-goog-encryption-kms-key-name', None) | |
| 994 if (amz_server_side_encryption_customer_algorithm is None and | |
| 995 goog_customer_managed_encryption is None): | |
| 996 if self.etag != '"%s"' % md5: | |
| 997 raise provider.storage_data_error( | |
| 998 'ETag from S3 did not match computed MD5. ' | |
| 999 '%s vs. %s' % (self.etag, self.md5)) | |
| 1000 | |
| 1001 return True | |
| 1002 | |
| 1003 if response.status == 400: | |
| 1004 # The 400 must be trapped so the retry handler can check to | |
| 1005 # see if it was a timeout. | |
| 1006 # If ``RequestTimeout`` is present, we'll retry. Otherwise, bomb | |
| 1007 # out. | |
| 1008 body = response.read() | |
| 1009 err = provider.storage_response_error( | |
| 1010 response.status, | |
| 1011 response.reason, | |
| 1012 body | |
| 1013 ) | |
| 1014 | |
| 1015 if err.error_code in ['RequestTimeout']: | |
| 1016 raise PleaseRetryException( | |
| 1017 "Saw %s, retrying" % err.error_code, | |
| 1018 response=response | |
| 1019 ) | |
| 1020 | |
| 1021 return False | |
| 1022 | |
| 1023 def compute_md5(self, fp, size=None): | |
| 1024 """ | |
| 1025 :type fp: file | |
| 1026 :param fp: File pointer to the file to MD5 hash. The file | |
| 1027 pointer will be reset to the same position before the | |
| 1028 method returns. | |
| 1029 | |
| 1030 :type size: int | |
| 1031 :param size: (optional) The Maximum number of bytes to read | |
| 1032 from the file pointer (fp). This is useful when uploading | |
| 1033 a file in multiple parts where the file is being split | |
| 1034 in place into different parts. Less bytes may be available. | |
| 1035 """ | |
| 1036 hex_digest, b64_digest, data_size = compute_md5(fp, size=size) | |
| 1037 # Returned values are MD5 hash, base64 encoded MD5 hash, and data size. | |
| 1038 # The internal implementation of compute_md5() needs to return the | |
| 1039 # data size but we don't want to return that value to the external | |
| 1040 # caller because it changes the class interface (i.e. it might | |
| 1041 # break some code) so we consume the third tuple value here and | |
| 1042 # return the remainder of the tuple to the caller, thereby preserving | |
| 1043 # the existing interface. | |
| 1044 self.size = data_size | |
| 1045 return (hex_digest, b64_digest) | |
| 1046 | |
| 1047 def set_contents_from_stream(self, fp, headers=None, replace=True, | |
| 1048 cb=None, num_cb=10, policy=None, | |
| 1049 reduced_redundancy=False, query_args=None, | |
| 1050 size=None): | |
| 1051 """ | |
| 1052 Store an object using the name of the Key object as the key in | |
| 1053 cloud and the contents of the data stream pointed to by 'fp' as | |
| 1054 the contents. | |
| 1055 | |
| 1056 The stream object is not seekable and total size is not known. | |
| 1057 This has the implication that we can't specify the | |
| 1058 Content-Size and Content-MD5 in the header. So for huge | |
| 1059 uploads, the delay in calculating MD5 is avoided but with a | |
| 1060 penalty of inability to verify the integrity of the uploaded | |
| 1061 data. | |
| 1062 | |
| 1063 :type fp: file | |
| 1064 :param fp: the file whose contents are to be uploaded | |
| 1065 | |
| 1066 :type headers: dict | |
| 1067 :param headers: additional HTTP headers to be sent with the | |
| 1068 PUT request. | |
| 1069 | |
| 1070 :type replace: bool | |
| 1071 :param replace: If this parameter is False, the method will first check | |
| 1072 to see if an object exists in the bucket with the same key. If it | |
| 1073 does, it won't overwrite it. The default value is True which will | |
| 1074 overwrite the object. | |
| 1075 | |
| 1076 :type cb: function | |
| 1077 :param cb: a callback function that will be called to report | |
| 1078 progress on the upload. The callback should accept two integer | |
| 1079 parameters, the first representing the number of bytes that have | |
| 1080 been successfully transmitted to GS and the second representing the | |
| 1081 total number of bytes that need to be transmitted. | |
| 1082 | |
| 1083 :type num_cb: int | |
| 1084 :param num_cb: (optional) If a callback is specified with the | |
| 1085 cb parameter, this parameter determines the granularity of | |
| 1086 the callback by defining the maximum number of times the | |
| 1087 callback will be called during the file transfer. | |
| 1088 | |
| 1089 :type policy: :class:`boto.gs.acl.CannedACLStrings` | |
| 1090 :param policy: A canned ACL policy that will be applied to the new key | |
| 1091 in GS. | |
| 1092 | |
| 1093 :type reduced_redundancy: bool | |
| 1094 :param reduced_redundancy: If True, this will set the storage | |
| 1095 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
| 1096 Redundancy Storage (RRS) feature of S3, provides lower | |
| 1097 redundancy at lower storage cost. | |
| 1098 | |
| 1099 :type size: int | |
| 1100 :param size: (optional) The Maximum number of bytes to read from | |
| 1101 the file pointer (fp). This is useful when uploading a | |
| 1102 file in multiple parts where you are splitting the file up | |
| 1103 into different ranges to be uploaded. If not specified, | |
| 1104 the default behaviour is to read all bytes from the file | |
| 1105 pointer. Less bytes may be available. | |
| 1106 """ | |
| 1107 | |
| 1108 provider = self.bucket.connection.provider | |
| 1109 if not provider.supports_chunked_transfer(): | |
| 1110 raise BotoClientError('%s does not support chunked transfer' | |
| 1111 % provider.get_provider_name()) | |
| 1112 | |
| 1113 # Name of the Object should be specified explicitly for Streams. | |
| 1114 if not self.name or self.name == '': | |
| 1115 raise BotoClientError('Cannot determine the destination ' | |
| 1116 'object name for the given stream') | |
| 1117 | |
| 1118 if headers is None: | |
| 1119 headers = {} | |
| 1120 if policy: | |
| 1121 headers[provider.acl_header] = policy | |
| 1122 | |
| 1123 if reduced_redundancy: | |
| 1124 self.storage_class = 'REDUCED_REDUNDANCY' | |
| 1125 if provider.storage_class_header: | |
| 1126 headers[provider.storage_class_header] = self.storage_class | |
| 1127 | |
| 1128 if self.bucket is not None: | |
| 1129 if not replace: | |
| 1130 if self.bucket.lookup(self.name): | |
| 1131 return | |
| 1132 self.send_file(fp, headers, cb, num_cb, query_args, | |
| 1133 chunked_transfer=True, size=size) | |
| 1134 | |
| 1135 def set_contents_from_file(self, fp, headers=None, replace=True, | |
| 1136 cb=None, num_cb=10, policy=None, md5=None, | |
| 1137 reduced_redundancy=False, query_args=None, | |
| 1138 encrypt_key=False, size=None, rewind=False): | |
| 1139 """ | |
| 1140 Store an object in S3 using the name of the Key object as the | |
| 1141 key in S3 and the contents of the file pointed to by 'fp' as the | |
| 1142 contents. The data is read from 'fp' from its current position until | |
| 1143 'size' bytes have been read or EOF. | |
| 1144 | |
| 1145 :type fp: file | |
| 1146 :param fp: the file whose contents to upload | |
| 1147 | |
| 1148 :type headers: dict | |
| 1149 :param headers: Additional HTTP headers that will be sent with | |
| 1150 the PUT request. | |
| 1151 | |
| 1152 :type replace: bool | |
| 1153 :param replace: If this parameter is False, the method will | |
| 1154 first check to see if an object exists in the bucket with | |
| 1155 the same key. If it does, it won't overwrite it. The | |
| 1156 default value is True which will overwrite the object. | |
| 1157 | |
| 1158 :type cb: function | |
| 1159 :param cb: a callback function that will be called to report | |
| 1160 progress on the upload. The callback should accept two | |
| 1161 integer parameters, the first representing the number of | |
| 1162 bytes that have been successfully transmitted to S3 and | |
| 1163 the second representing the size of the to be transmitted | |
| 1164 object. | |
| 1165 | |
| 1166 :type num_cb: int | |
| 1167 :param num_cb: (optional) If a callback is specified with the | |
| 1168 cb parameter this parameter determines the granularity of | |
| 1169 the callback by defining the maximum number of times the | |
| 1170 callback will be called during the file transfer. | |
| 1171 | |
| 1172 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
| 1173 :param policy: A canned ACL policy that will be applied to the | |
| 1174 new key in S3. | |
| 1175 | |
| 1176 :type md5: A tuple containing the hexdigest version of the MD5 | |
| 1177 checksum of the file as the first element and the | |
| 1178 Base64-encoded version of the plain checksum as the second | |
| 1179 element. This is the same format returned by the | |
| 1180 compute_md5 method. | |
| 1181 :param md5: If you need to compute the MD5 for any reason | |
| 1182 prior to upload, it's silly to have to do it twice so this | |
| 1183 param, if present, will be used as the MD5 values of the | |
| 1184 file. Otherwise, the checksum will be computed. | |
| 1185 | |
| 1186 :type reduced_redundancy: bool | |
| 1187 :param reduced_redundancy: If True, this will set the storage | |
| 1188 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
| 1189 Redundancy Storage (RRS) feature of S3, provides lower | |
| 1190 redundancy at lower storage cost. | |
| 1191 | |
| 1192 :type encrypt_key: bool | |
| 1193 :param encrypt_key: If True, the new copy of the object will | |
| 1194 be encrypted on the server-side by S3 and will be stored | |
| 1195 in an encrypted form while at rest in S3. | |
| 1196 | |
| 1197 :type size: int | |
| 1198 :param size: (optional) The Maximum number of bytes to read | |
| 1199 from the file pointer (fp). This is useful when uploading | |
| 1200 a file in multiple parts where you are splitting the file | |
| 1201 up into different ranges to be uploaded. If not specified, | |
| 1202 the default behaviour is to read all bytes from the file | |
| 1203 pointer. Less bytes may be available. | |
| 1204 | |
| 1205 :type rewind: bool | |
| 1206 :param rewind: (optional) If True, the file pointer (fp) will | |
| 1207 be rewound to the start before any bytes are read from | |
| 1208 it. The default behaviour is False which reads from the | |
| 1209 current position of the file pointer (fp). | |
| 1210 | |
| 1211 :rtype: int | |
| 1212 :return: The number of bytes written to the key. | |
| 1213 """ | |
| 1214 provider = self.bucket.connection.provider | |
| 1215 headers = headers or {} | |
| 1216 if policy: | |
| 1217 headers[provider.acl_header] = policy | |
| 1218 if encrypt_key: | |
| 1219 headers[provider.server_side_encryption_header] = 'AES256' | |
| 1220 | |
| 1221 if rewind: | |
| 1222 # caller requests reading from beginning of fp. | |
| 1223 fp.seek(0, os.SEEK_SET) | |
| 1224 else: | |
| 1225 # The following seek/tell/seek logic is intended | |
| 1226 # to detect applications using the older interface to | |
| 1227 # set_contents_from_file(), which automatically rewound the | |
| 1228 # file each time the Key was reused. This changed with commit | |
| 1229 # 14ee2d03f4665fe20d19a85286f78d39d924237e, to support uploads | |
| 1230 # split into multiple parts and uploaded in parallel, and at | |
| 1231 # the time of that commit this check was added because otherwise | |
| 1232 # older programs would get a success status and upload an empty | |
| 1233 # object. Unfortuantely, it's very inefficient for fp's implemented | |
| 1234 # by KeyFile (used, for example, by gsutil when copying between | |
| 1235 # providers). So, we skip the check for the KeyFile case. | |
| 1236 # TODO: At some point consider removing this seek/tell/seek | |
| 1237 # logic, after enough time has passed that it's unlikely any | |
| 1238 # programs remain that assume the older auto-rewind interface. | |
| 1239 if not isinstance(fp, KeyFile): | |
| 1240 spos = fp.tell() | |
| 1241 fp.seek(0, os.SEEK_END) | |
| 1242 if fp.tell() == spos: | |
| 1243 fp.seek(0, os.SEEK_SET) | |
| 1244 if fp.tell() != spos: | |
| 1245 # Raise an exception as this is likely a programming | |
| 1246 # error whereby there is data before the fp but nothing | |
| 1247 # after it. | |
| 1248 fp.seek(spos) | |
| 1249 raise AttributeError('fp is at EOF. Use rewind option ' | |
| 1250 'or seek() to data start.') | |
| 1251 # seek back to the correct position. | |
| 1252 fp.seek(spos) | |
| 1253 | |
| 1254 if reduced_redundancy: | |
| 1255 self.storage_class = 'REDUCED_REDUNDANCY' | |
| 1256 if provider.storage_class_header: | |
| 1257 headers[provider.storage_class_header] = self.storage_class | |
| 1258 # TODO - What if provider doesn't support reduced reduncancy? | |
| 1259 # What if different providers provide different classes? | |
| 1260 if hasattr(fp, 'name'): | |
| 1261 self.path = fp.name | |
| 1262 if self.bucket is not None: | |
| 1263 if not md5 and provider.supports_chunked_transfer(): | |
| 1264 # defer md5 calculation to on the fly and | |
| 1265 # we don't know anything about size yet. | |
| 1266 chunked_transfer = True | |
| 1267 self.size = None | |
| 1268 else: | |
| 1269 chunked_transfer = False | |
| 1270 if isinstance(fp, KeyFile): | |
| 1271 # Avoid EOF seek for KeyFile case as it's very inefficient. | |
| 1272 key = fp.getkey() | |
| 1273 size = key.size - fp.tell() | |
| 1274 self.size = size | |
| 1275 # At present both GCS and S3 use MD5 for the etag for | |
| 1276 # non-multipart-uploaded objects. If the etag is 32 hex | |
| 1277 # chars use it as an MD5, to avoid having to read the file | |
| 1278 # twice while transferring. | |
| 1279 if (re.match('^"[a-fA-F0-9]{32}"$', key.etag)): | |
| 1280 etag = key.etag.strip('"') | |
| 1281 md5 = (etag, base64.b64encode(binascii.unhexlify(etag))) | |
| 1282 if not md5: | |
| 1283 # compute_md5() and also set self.size to actual | |
| 1284 # size of the bytes read computing the md5. | |
| 1285 md5 = self.compute_md5(fp, size) | |
| 1286 # adjust size if required | |
| 1287 size = self.size | |
| 1288 elif size: | |
| 1289 self.size = size | |
| 1290 else: | |
| 1291 # If md5 is provided, still need to size so | |
| 1292 # calculate based on bytes to end of content | |
| 1293 spos = fp.tell() | |
| 1294 fp.seek(0, os.SEEK_END) | |
| 1295 self.size = fp.tell() - spos | |
| 1296 fp.seek(spos) | |
| 1297 size = self.size | |
| 1298 self.md5 = md5[0] | |
| 1299 self.base64md5 = md5[1] | |
| 1300 | |
| 1301 if self.name is None: | |
| 1302 self.name = self.md5 | |
| 1303 if not replace: | |
| 1304 if self.bucket.lookup(self.name): | |
| 1305 return | |
| 1306 | |
| 1307 self.send_file(fp, headers=headers, cb=cb, num_cb=num_cb, | |
| 1308 query_args=query_args, | |
| 1309 chunked_transfer=chunked_transfer, size=size) | |
| 1310 # return number of bytes written. | |
| 1311 return self.size | |
| 1312 | |
| 1313 def set_contents_from_filename(self, filename, headers=None, replace=True, | |
| 1314 cb=None, num_cb=10, policy=None, md5=None, | |
| 1315 reduced_redundancy=False, | |
| 1316 encrypt_key=False): | |
| 1317 """ | |
| 1318 Store an object in S3 using the name of the Key object as the | |
| 1319 key in S3 and the contents of the file named by 'filename'. | |
| 1320 See set_contents_from_file method for details about the | |
| 1321 parameters. | |
| 1322 | |
| 1323 :type filename: string | |
| 1324 :param filename: The name of the file that you want to put onto S3 | |
| 1325 | |
| 1326 :type headers: dict | |
| 1327 :param headers: Additional headers to pass along with the | |
| 1328 request to AWS. | |
| 1329 | |
| 1330 :type replace: bool | |
| 1331 :param replace: If True, replaces the contents of the file | |
| 1332 if it already exists. | |
| 1333 | |
| 1334 :type cb: function | |
| 1335 :param cb: a callback function that will be called to report | |
| 1336 progress on the upload. The callback should accept two | |
| 1337 integer parameters, the first representing the number of | |
| 1338 bytes that have been successfully transmitted to S3 and | |
| 1339 the second representing the size of the to be transmitted | |
| 1340 object. | |
| 1341 | |
| 1342 :type cb: int | |
| 1343 :param num_cb: (optional) If a callback is specified with the | |
| 1344 cb parameter this parameter determines the granularity of | |
| 1345 the callback by defining the maximum number of times the | |
| 1346 callback will be called during the file transfer. | |
| 1347 | |
| 1348 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
| 1349 :param policy: A canned ACL policy that will be applied to the | |
| 1350 new key in S3. | |
| 1351 | |
| 1352 :type md5: A tuple containing the hexdigest version of the MD5 | |
| 1353 checksum of the file as the first element and the | |
| 1354 Base64-encoded version of the plain checksum as the second | |
| 1355 element. This is the same format returned by the | |
| 1356 compute_md5 method. | |
| 1357 :param md5: If you need to compute the MD5 for any reason | |
| 1358 prior to upload, it's silly to have to do it twice so this | |
| 1359 param, if present, will be used as the MD5 values of the | |
| 1360 file. Otherwise, the checksum will be computed. | |
| 1361 | |
| 1362 :type reduced_redundancy: bool | |
| 1363 :param reduced_redundancy: If True, this will set the storage | |
| 1364 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
| 1365 Redundancy Storage (RRS) feature of S3, provides lower | |
| 1366 redundancy at lower storage cost. :type encrypt_key: bool | |
| 1367 :param encrypt_key: If True, the new copy of the object | |
| 1368 will be encrypted on the server-side by S3 and will be | |
| 1369 stored in an encrypted form while at rest in S3. | |
| 1370 | |
| 1371 :rtype: int | |
| 1372 :return: The number of bytes written to the key. | |
| 1373 """ | |
| 1374 with open(filename, 'rb') as fp: | |
| 1375 return self.set_contents_from_file(fp, headers, replace, cb, | |
| 1376 num_cb, policy, md5, | |
| 1377 reduced_redundancy, | |
| 1378 encrypt_key=encrypt_key) | |
| 1379 | |
| 1380 def set_contents_from_string(self, string_data, headers=None, replace=True, | |
| 1381 cb=None, num_cb=10, policy=None, md5=None, | |
| 1382 reduced_redundancy=False, | |
| 1383 encrypt_key=False): | |
| 1384 """ | |
| 1385 Store an object in S3 using the name of the Key object as the | |
| 1386 key in S3 and the string 's' as the contents. | |
| 1387 See set_contents_from_file method for details about the | |
| 1388 parameters. | |
| 1389 | |
| 1390 :type headers: dict | |
| 1391 :param headers: Additional headers to pass along with the | |
| 1392 request to AWS. | |
| 1393 | |
| 1394 :type replace: bool | |
| 1395 :param replace: If True, replaces the contents of the file if | |
| 1396 it already exists. | |
| 1397 | |
| 1398 :type cb: function | |
| 1399 :param cb: a callback function that will be called to report | |
| 1400 progress on the upload. The callback should accept two | |
| 1401 integer parameters, the first representing the number of | |
| 1402 bytes that have been successfully transmitted to S3 and | |
| 1403 the second representing the size of the to be transmitted | |
| 1404 object. | |
| 1405 | |
| 1406 :type num_cb: int | |
| 1407 :param num_cb: (optional) If a callback is specified with the | |
| 1408 num_cb parameter this parameter determines the granularity of | |
| 1409 the callback by defining the maximum number of times the | |
| 1410 callback will be called during the file transfer. | |
| 1411 | |
| 1412 :type policy: :class:`boto.s3.acl.CannedACLStrings` | |
| 1413 :param policy: A canned ACL policy that will be applied to the | |
| 1414 new key in S3. | |
| 1415 | |
| 1416 :type md5: A tuple containing the hexdigest version of the MD5 | |
| 1417 checksum of the file as the first element and the | |
| 1418 Base64-encoded version of the plain checksum as the second | |
| 1419 element. This is the same format returned by the | |
| 1420 compute_md5 method. | |
| 1421 :param md5: If you need to compute the MD5 for any reason | |
| 1422 prior to upload, it's silly to have to do it twice so this | |
| 1423 param, if present, will be used as the MD5 values of the | |
| 1424 file. Otherwise, the checksum will be computed. | |
| 1425 | |
| 1426 :type reduced_redundancy: bool | |
| 1427 :param reduced_redundancy: If True, this will set the storage | |
| 1428 class of the new Key to be REDUCED_REDUNDANCY. The Reduced | |
| 1429 Redundancy Storage (RRS) feature of S3, provides lower | |
| 1430 redundancy at lower storage cost. | |
| 1431 | |
| 1432 :type encrypt_key: bool | |
| 1433 :param encrypt_key: If True, the new copy of the object will | |
| 1434 be encrypted on the server-side by S3 and will be stored | |
| 1435 in an encrypted form while at rest in S3. | |
| 1436 """ | |
| 1437 if not isinstance(string_data, bytes): | |
| 1438 string_data = string_data.encode("utf-8") | |
| 1439 fp = BytesIO(string_data) | |
| 1440 r = self.set_contents_from_file(fp, headers, replace, cb, num_cb, | |
| 1441 policy, md5, reduced_redundancy, | |
| 1442 encrypt_key=encrypt_key) | |
| 1443 fp.close() | |
| 1444 return r | |
| 1445 | |
| 1446 def get_file(self, fp, headers=None, cb=None, num_cb=10, | |
| 1447 torrent=False, version_id=None, override_num_retries=None, | |
| 1448 response_headers=None): | |
| 1449 """ | |
| 1450 Retrieves a file from an S3 Key | |
| 1451 | |
| 1452 :type fp: file | |
| 1453 :param fp: File pointer to put the data into | |
| 1454 | |
| 1455 :type headers: string | |
| 1456 :param: headers to send when retrieving the files | |
| 1457 | |
| 1458 :type cb: function | |
| 1459 :param cb: a callback function that will be called to report | |
| 1460 progress on the upload. The callback should accept two | |
| 1461 integer parameters, the first representing the number of | |
| 1462 bytes that have been successfully transmitted to S3 and | |
| 1463 the second representing the size of the to be transmitted | |
| 1464 object. | |
| 1465 | |
| 1466 :type cb: int | |
| 1467 :param num_cb: (optional) If a callback is specified with the | |
| 1468 cb parameter this parameter determines the granularity of | |
| 1469 the callback by defining the maximum number of times the | |
| 1470 callback will be called during the file transfer. | |
| 1471 | |
| 1472 :type torrent: bool | |
| 1473 :param torrent: Flag for whether to get a torrent for the file | |
| 1474 | |
| 1475 :type override_num_retries: int | |
| 1476 :param override_num_retries: If not None will override configured | |
| 1477 num_retries parameter for underlying GET. | |
| 1478 | |
| 1479 :type response_headers: dict | |
| 1480 :param response_headers: A dictionary containing HTTP | |
| 1481 headers/values that will override any headers associated | |
| 1482 with the stored object in the response. See | |
| 1483 http://goo.gl/EWOPb for details. | |
| 1484 | |
| 1485 :type version_id: str | |
| 1486 :param version_id: The ID of a particular version of the object. | |
| 1487 If this parameter is not supplied but the Key object has | |
| 1488 a ``version_id`` attribute, that value will be used when | |
| 1489 retrieving the object. You can set the Key object's | |
| 1490 ``version_id`` attribute to None to always grab the latest | |
| 1491 version from a version-enabled bucket. | |
| 1492 """ | |
| 1493 self._get_file_internal(fp, headers=headers, cb=cb, num_cb=num_cb, | |
| 1494 torrent=torrent, version_id=version_id, | |
| 1495 override_num_retries=override_num_retries, | |
| 1496 response_headers=response_headers, | |
| 1497 hash_algs=None, | |
| 1498 query_args=None) | |
| 1499 | |
| 1500 def _get_file_internal(self, fp, headers=None, cb=None, num_cb=10, | |
| 1501 torrent=False, version_id=None, override_num_retries=None, | |
| 1502 response_headers=None, hash_algs=None, query_args=None): | |
| 1503 if headers is None: | |
| 1504 headers = {} | |
| 1505 save_debug = self.bucket.connection.debug | |
| 1506 if self.bucket.connection.debug == 1: | |
| 1507 self.bucket.connection.debug = 0 | |
| 1508 | |
| 1509 query_args = query_args or [] | |
| 1510 if torrent: | |
| 1511 query_args.append('torrent') | |
| 1512 | |
| 1513 if hash_algs is None and not torrent: | |
| 1514 hash_algs = {'md5': md5} | |
| 1515 digesters = dict((alg, hash_algs[alg]()) for alg in hash_algs or {}) | |
| 1516 | |
| 1517 # If a version_id is passed in, use that. If not, check to see | |
| 1518 # if the Key object has an explicit version_id and, if so, use that. | |
| 1519 # Otherwise, don't pass a version_id query param. | |
| 1520 if version_id is None: | |
| 1521 version_id = self.version_id | |
| 1522 if version_id: | |
| 1523 query_args.append('versionId=%s' % version_id) | |
| 1524 if response_headers: | |
| 1525 for key in response_headers: | |
| 1526 query_args.append('%s=%s' % ( | |
| 1527 key, urllib.parse.quote(response_headers[key]))) | |
| 1528 query_args = '&'.join(query_args) | |
| 1529 self.open('r', headers, query_args=query_args, | |
| 1530 override_num_retries=override_num_retries) | |
| 1531 | |
| 1532 data_len = 0 | |
| 1533 if cb: | |
| 1534 if self.size is None: | |
| 1535 cb_size = 0 | |
| 1536 else: | |
| 1537 cb_size = self.size | |
| 1538 if self.size is None and num_cb != -1: | |
| 1539 # If size is not available due to chunked transfer for example, | |
| 1540 # we'll call the cb for every 1MB of data transferred. | |
| 1541 cb_count = (1024 * 1024) / self.BufferSize | |
| 1542 elif num_cb > 1: | |
| 1543 cb_count = int(math.ceil(cb_size/self.BufferSize/(num_cb-1.0))) | |
| 1544 elif num_cb < 0: | |
| 1545 cb_count = -1 | |
| 1546 else: | |
| 1547 cb_count = 0 | |
| 1548 i = 0 | |
| 1549 cb(data_len, cb_size) | |
| 1550 try: | |
| 1551 for bytes in self: | |
| 1552 fp.write(bytes) | |
| 1553 data_len += len(bytes) | |
| 1554 for alg in digesters: | |
| 1555 digesters[alg].update(bytes) | |
| 1556 if cb: | |
| 1557 if cb_size > 0 and data_len >= cb_size: | |
| 1558 break | |
| 1559 i += 1 | |
| 1560 if i == cb_count or cb_count == -1: | |
| 1561 cb(data_len, cb_size) | |
| 1562 i = 0 | |
| 1563 except IOError as e: | |
| 1564 if e.errno == errno.ENOSPC: | |
| 1565 raise StorageDataError('Out of space for destination file ' | |
| 1566 '%s' % fp.name) | |
| 1567 raise | |
| 1568 if cb and (cb_count <= 1 or i > 0) and data_len > 0: | |
| 1569 cb(data_len, cb_size) | |
| 1570 for alg in digesters: | |
| 1571 self.local_hashes[alg] = digesters[alg].digest() | |
| 1572 if self.size is None and not torrent and "Range" not in headers: | |
| 1573 self.size = data_len | |
| 1574 self.close() | |
| 1575 self.bucket.connection.debug = save_debug | |
| 1576 | |
| 1577 def get_torrent_file(self, fp, headers=None, cb=None, num_cb=10): | |
| 1578 """ | |
| 1579 Get a torrent file (see to get_file) | |
| 1580 | |
| 1581 :type fp: file | |
| 1582 :param fp: The file pointer of where to put the torrent | |
| 1583 | |
| 1584 :type headers: dict | |
| 1585 :param headers: Headers to be passed | |
| 1586 | |
| 1587 :type cb: function | |
| 1588 :param cb: a callback function that will be called to report | |
| 1589 progress on the upload. The callback should accept two | |
| 1590 integer parameters, the first representing the number of | |
| 1591 bytes that have been successfully transmitted to S3 and | |
| 1592 the second representing the size of the to be transmitted | |
| 1593 object. | |
| 1594 | |
| 1595 :type cb: int | |
| 1596 :param num_cb: (optional) If a callback is specified with the | |
| 1597 cb parameter this parameter determines the granularity of | |
| 1598 the callback by defining the maximum number of times the | |
| 1599 callback will be called during the file transfer. | |
| 1600 | |
| 1601 """ | |
| 1602 return self.get_file(fp, headers, cb, num_cb, torrent=True) | |
| 1603 | |
| 1604 def get_contents_to_file(self, fp, headers=None, | |
| 1605 cb=None, num_cb=10, | |
| 1606 torrent=False, | |
| 1607 version_id=None, | |
| 1608 res_download_handler=None, | |
| 1609 response_headers=None): | |
| 1610 """ | |
| 1611 Retrieve an object from S3 using the name of the Key object as the | |
| 1612 key in S3. Write the contents of the object to the file pointed | |
| 1613 to by 'fp'. | |
| 1614 | |
| 1615 :type fp: File -like object | |
| 1616 :param fp: | |
| 1617 | |
| 1618 :type headers: dict | |
| 1619 :param headers: additional HTTP headers that will be sent with | |
| 1620 the GET request. | |
| 1621 | |
| 1622 :type cb: function | |
| 1623 :param cb: a callback function that will be called to report | |
| 1624 progress on the upload. The callback should accept two | |
| 1625 integer parameters, the first representing the number of | |
| 1626 bytes that have been successfully transmitted to S3 and | |
| 1627 the second representing the size of the to be transmitted | |
| 1628 object. | |
| 1629 | |
| 1630 :type cb: int | |
| 1631 :param num_cb: (optional) If a callback is specified with the | |
| 1632 cb parameter this parameter determines the granularity of | |
| 1633 the callback by defining the maximum number of times the | |
| 1634 callback will be called during the file transfer. | |
| 1635 | |
| 1636 :type torrent: bool | |
| 1637 :param torrent: If True, returns the contents of a torrent | |
| 1638 file as a string. | |
| 1639 | |
| 1640 :type res_upload_handler: ResumableDownloadHandler | |
| 1641 :param res_download_handler: If provided, this handler will | |
| 1642 perform the download. | |
| 1643 | |
| 1644 :type response_headers: dict | |
| 1645 :param response_headers: A dictionary containing HTTP | |
| 1646 headers/values that will override any headers associated | |
| 1647 with the stored object in the response. See | |
| 1648 http://goo.gl/EWOPb for details. | |
| 1649 | |
| 1650 :type version_id: str | |
| 1651 :param version_id: The ID of a particular version of the object. | |
| 1652 If this parameter is not supplied but the Key object has | |
| 1653 a ``version_id`` attribute, that value will be used when | |
| 1654 retrieving the object. You can set the Key object's | |
| 1655 ``version_id`` attribute to None to always grab the latest | |
| 1656 version from a version-enabled bucket. | |
| 1657 """ | |
| 1658 if self.bucket is not None: | |
| 1659 if res_download_handler: | |
| 1660 res_download_handler.get_file(self, fp, headers, cb, num_cb, | |
| 1661 torrent=torrent, | |
| 1662 version_id=version_id) | |
| 1663 else: | |
| 1664 self.get_file(fp, headers, cb, num_cb, torrent=torrent, | |
| 1665 version_id=version_id, | |
| 1666 response_headers=response_headers) | |
| 1667 | |
| 1668 def get_contents_to_filename(self, filename, headers=None, | |
| 1669 cb=None, num_cb=10, | |
| 1670 torrent=False, | |
| 1671 version_id=None, | |
| 1672 res_download_handler=None, | |
| 1673 response_headers=None): | |
| 1674 """ | |
| 1675 Retrieve an object from S3 using the name of the Key object as the | |
| 1676 key in S3. Store contents of the object to a file named by 'filename'. | |
| 1677 See get_contents_to_file method for details about the | |
| 1678 parameters. | |
| 1679 | |
| 1680 :type filename: string | |
| 1681 :param filename: The filename of where to put the file contents | |
| 1682 | |
| 1683 :type headers: dict | |
| 1684 :param headers: Any additional headers to send in the request | |
| 1685 | |
| 1686 :type cb: function | |
| 1687 :param cb: a callback function that will be called to report | |
| 1688 progress on the upload. The callback should accept two | |
| 1689 integer parameters, the first representing the number of | |
| 1690 bytes that have been successfully transmitted to S3 and | |
| 1691 the second representing the size of the to be transmitted | |
| 1692 object. | |
| 1693 | |
| 1694 :type num_cb: int | |
| 1695 :param num_cb: (optional) If a callback is specified with the | |
| 1696 cb parameter this parameter determines the granularity of | |
| 1697 the callback by defining the maximum number of times the | |
| 1698 callback will be called during the file transfer. | |
| 1699 | |
| 1700 :type torrent: bool | |
| 1701 :param torrent: If True, returns the contents of a torrent file | |
| 1702 as a string. | |
| 1703 | |
| 1704 :type res_upload_handler: ResumableDownloadHandler | |
| 1705 :param res_download_handler: If provided, this handler will | |
| 1706 perform the download. | |
| 1707 | |
| 1708 :type response_headers: dict | |
| 1709 :param response_headers: A dictionary containing HTTP | |
| 1710 headers/values that will override any headers associated | |
| 1711 with the stored object in the response. See | |
| 1712 http://goo.gl/EWOPb for details. | |
| 1713 | |
| 1714 :type version_id: str | |
| 1715 :param version_id: The ID of a particular version of the object. | |
| 1716 If this parameter is not supplied but the Key object has | |
| 1717 a ``version_id`` attribute, that value will be used when | |
| 1718 retrieving the object. You can set the Key object's | |
| 1719 ``version_id`` attribute to None to always grab the latest | |
| 1720 version from a version-enabled bucket. | |
| 1721 """ | |
| 1722 try: | |
| 1723 with open(filename, 'wb') as fp: | |
| 1724 self.get_contents_to_file(fp, headers, cb, num_cb, | |
| 1725 torrent=torrent, | |
| 1726 version_id=version_id, | |
| 1727 res_download_handler=res_download_handler, | |
| 1728 response_headers=response_headers) | |
| 1729 except Exception: | |
| 1730 os.remove(filename) | |
| 1731 raise | |
| 1732 # if last_modified date was sent from s3, try to set file's timestamp | |
| 1733 if self.last_modified is not None: | |
| 1734 try: | |
| 1735 modified_tuple = email.utils.parsedate_tz(self.last_modified) | |
| 1736 modified_stamp = int(email.utils.mktime_tz(modified_tuple)) | |
| 1737 os.utime(fp.name, (modified_stamp, modified_stamp)) | |
| 1738 except Exception: | |
| 1739 pass | |
| 1740 | |
| 1741 def get_contents_as_string(self, headers=None, | |
| 1742 cb=None, num_cb=10, | |
| 1743 torrent=False, | |
| 1744 version_id=None, | |
| 1745 response_headers=None, encoding=None): | |
| 1746 """ | |
| 1747 Retrieve an object from S3 using the name of the Key object as the | |
| 1748 key in S3. Return the contents of the object as a string. | |
| 1749 See get_contents_to_file method for details about the | |
| 1750 parameters. | |
| 1751 | |
| 1752 :type headers: dict | |
| 1753 :param headers: Any additional headers to send in the request | |
| 1754 | |
| 1755 :type cb: function | |
| 1756 :param cb: a callback function that will be called to report | |
| 1757 progress on the upload. The callback should accept two | |
| 1758 integer parameters, the first representing the number of | |
| 1759 bytes that have been successfully transmitted to S3 and | |
| 1760 the second representing the size of the to be transmitted | |
| 1761 object. | |
| 1762 | |
| 1763 :type cb: int | |
| 1764 :param num_cb: (optional) If a callback is specified with the | |
| 1765 cb parameter this parameter determines the granularity of | |
| 1766 the callback by defining the maximum number of times the | |
| 1767 callback will be called during the file transfer. | |
| 1768 | |
| 1769 :type torrent: bool | |
| 1770 :param torrent: If True, returns the contents of a torrent file | |
| 1771 as a string. | |
| 1772 | |
| 1773 :type response_headers: dict | |
| 1774 :param response_headers: A dictionary containing HTTP | |
| 1775 headers/values that will override any headers associated | |
| 1776 with the stored object in the response. See | |
| 1777 http://goo.gl/EWOPb for details. | |
| 1778 | |
| 1779 :type version_id: str | |
| 1780 :param version_id: The ID of a particular version of the object. | |
| 1781 If this parameter is not supplied but the Key object has | |
| 1782 a ``version_id`` attribute, that value will be used when | |
| 1783 retrieving the object. You can set the Key object's | |
| 1784 ``version_id`` attribute to None to always grab the latest | |
| 1785 version from a version-enabled bucket. | |
| 1786 | |
| 1787 :type encoding: str | |
| 1788 :param encoding: The text encoding to use, such as ``utf-8`` | |
| 1789 or ``iso-8859-1``. If set, then a string will be returned. | |
| 1790 Defaults to ``None`` and returns bytes. | |
| 1791 | |
| 1792 :rtype: bytes or str | |
| 1793 :returns: The contents of the file as bytes or a string | |
| 1794 """ | |
| 1795 fp = BytesIO() | |
| 1796 self.get_contents_to_file(fp, headers, cb, num_cb, torrent=torrent, | |
| 1797 version_id=version_id, | |
| 1798 response_headers=response_headers) | |
| 1799 value = fp.getvalue() | |
| 1800 | |
| 1801 if encoding is not None: | |
| 1802 value = value.decode(encoding) | |
| 1803 | |
| 1804 return value | |
| 1805 | |
| 1806 def add_email_grant(self, permission, email_address, headers=None): | |
| 1807 """ | |
| 1808 Convenience method that provides a quick way to add an email grant | |
| 1809 to a key. This method retrieves the current ACL, creates a new | |
| 1810 grant based on the parameters passed in, adds that grant to the ACL | |
| 1811 and then PUT's the new ACL back to S3. | |
| 1812 | |
| 1813 :type permission: string | |
| 1814 :param permission: The permission being granted. Should be one of: | |
| 1815 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). | |
| 1816 | |
| 1817 :type email_address: string | |
| 1818 :param email_address: The email address associated with the AWS | |
| 1819 account your are granting the permission to. | |
| 1820 | |
| 1821 :type recursive: boolean | |
| 1822 :param recursive: A boolean value to controls whether the | |
| 1823 command will apply the grant to all keys within the bucket | |
| 1824 or not. The default value is False. By passing a True | |
| 1825 value, the call will iterate through all keys in the | |
| 1826 bucket and apply the same grant to each key. CAUTION: If | |
| 1827 you have a lot of keys, this could take a long time! | |
| 1828 """ | |
| 1829 policy = self.get_acl(headers=headers) | |
| 1830 policy.acl.add_email_grant(permission, email_address) | |
| 1831 self.set_acl(policy, headers=headers) | |
| 1832 | |
| 1833 def add_user_grant(self, permission, user_id, headers=None, | |
| 1834 display_name=None): | |
| 1835 """ | |
| 1836 Convenience method that provides a quick way to add a canonical | |
| 1837 user grant to a key. This method retrieves the current ACL, | |
| 1838 creates a new grant based on the parameters passed in, adds that | |
| 1839 grant to the ACL and then PUT's the new ACL back to S3. | |
| 1840 | |
| 1841 :type permission: string | |
| 1842 :param permission: The permission being granted. Should be one of: | |
| 1843 (READ, WRITE, READ_ACP, WRITE_ACP, FULL_CONTROL). | |
| 1844 | |
| 1845 :type user_id: string | |
| 1846 :param user_id: The canonical user id associated with the AWS | |
| 1847 account your are granting the permission to. | |
| 1848 | |
| 1849 :type display_name: string | |
| 1850 :param display_name: An option string containing the user's | |
| 1851 Display Name. Only required on Walrus. | |
| 1852 """ | |
| 1853 policy = self.get_acl(headers=headers) | |
| 1854 policy.acl.add_user_grant(permission, user_id, | |
| 1855 display_name=display_name) | |
| 1856 self.set_acl(policy, headers=headers) | |
| 1857 | |
| 1858 def _normalize_metadata(self, metadata): | |
| 1859 if type(metadata) == set: | |
| 1860 norm_metadata = set() | |
| 1861 for k in metadata: | |
| 1862 norm_metadata.add(k.lower()) | |
| 1863 else: | |
| 1864 norm_metadata = {} | |
| 1865 for k in metadata: | |
| 1866 norm_metadata[k.lower()] = metadata[k] | |
| 1867 return norm_metadata | |
| 1868 | |
| 1869 def _get_remote_metadata(self, headers=None): | |
| 1870 """ | |
| 1871 Extracts metadata from existing URI into a dict, so we can | |
| 1872 overwrite/delete from it to form the new set of metadata to apply to a | |
| 1873 key. | |
| 1874 """ | |
| 1875 metadata = {} | |
| 1876 for underscore_name in self._underscore_base_user_settable_fields: | |
| 1877 if hasattr(self, underscore_name): | |
| 1878 value = getattr(self, underscore_name) | |
| 1879 if value: | |
| 1880 # Generate HTTP field name corresponding to "_" named field. | |
| 1881 field_name = underscore_name.replace('_', '-') | |
| 1882 metadata[field_name.lower()] = value | |
| 1883 # self.metadata contains custom metadata, which are all user-settable. | |
| 1884 prefix = self.provider.metadata_prefix | |
| 1885 for underscore_name in self.metadata: | |
| 1886 field_name = underscore_name.replace('_', '-') | |
| 1887 metadata['%s%s' % (prefix, field_name.lower())] = ( | |
| 1888 self.metadata[underscore_name]) | |
| 1889 return metadata | |
| 1890 | |
| 1891 def set_remote_metadata(self, metadata_plus, metadata_minus, preserve_acl, | |
| 1892 headers=None): | |
| 1893 metadata_plus = self._normalize_metadata(metadata_plus) | |
| 1894 metadata_minus = self._normalize_metadata(metadata_minus) | |
| 1895 metadata = self._get_remote_metadata() | |
| 1896 metadata.update(metadata_plus) | |
| 1897 for h in metadata_minus: | |
| 1898 if h in metadata: | |
| 1899 del metadata[h] | |
| 1900 src_bucket = self.bucket | |
| 1901 # Boto prepends the meta prefix when adding headers, so strip prefix in | |
| 1902 # metadata before sending back in to copy_key() call. | |
| 1903 rewritten_metadata = {} | |
| 1904 for h in metadata: | |
| 1905 if (h.startswith('x-goog-meta-') or h.startswith('x-amz-meta-')): | |
| 1906 rewritten_h = (h.replace('x-goog-meta-', '') | |
| 1907 .replace('x-amz-meta-', '')) | |
| 1908 else: | |
| 1909 rewritten_h = h | |
| 1910 rewritten_metadata[rewritten_h] = metadata[h] | |
| 1911 metadata = rewritten_metadata | |
| 1912 src_bucket.copy_key(self.name, self.bucket.name, self.name, | |
| 1913 metadata=metadata, preserve_acl=preserve_acl, | |
| 1914 headers=headers) | |
| 1915 | |
| 1916 def restore(self, days, headers=None): | |
| 1917 """Restore an object from an archive. | |
| 1918 | |
| 1919 :type days: int | |
| 1920 :param days: The lifetime of the restored object (must | |
| 1921 be at least 1 day). If the object is already restored | |
| 1922 then this parameter can be used to readjust the lifetime | |
| 1923 of the restored object. In this case, the days | |
| 1924 param is with respect to the initial time of the request. | |
| 1925 If the object has not been restored, this param is with | |
| 1926 respect to the completion time of the request. | |
| 1927 | |
| 1928 """ | |
| 1929 response = self.bucket.connection.make_request( | |
| 1930 'POST', self.bucket.name, self.name, | |
| 1931 data=self.RestoreBody % days, | |
| 1932 headers=headers, query_args='restore') | |
| 1933 if response.status not in (200, 202): | |
| 1934 provider = self.bucket.connection.provider | |
| 1935 raise provider.storage_response_error(response.status, | |
| 1936 response.reason, | |
| 1937 response.read()) |
