Mercurial > repos > guerler > springsuite
comparison planemo/lib/python3.7/site-packages/boto/cloudsearch2/document.py @ 0:d30785e31577 draft
"planemo upload commit 6eee67778febed82ddd413c3ca40b3183a3898f1"
| author | guerler |
|---|---|
| date | Fri, 31 Jul 2020 00:18:57 -0400 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:d30785e31577 |
|---|---|
| 1 # Copyright (c) 2012 Mitch Garnaat http://garnaat.org/ | |
| 2 # Copyright (c) 2014 Amazon.com, Inc. or its affiliates. All Rights Reserved | |
| 3 # | |
| 4 # Permission is hereby granted, free of charge, to any person obtaining a | |
| 5 # copy of this software and associated documentation files (the | |
| 6 # "Software"), to deal in the Software without restriction, including | |
| 7 # without limitation the rights to use, copy, modify, merge, publish, dis- | |
| 8 # tribute, sublicense, and/or sell copies of the Software, and to permit | |
| 9 # persons to whom the Software is furnished to do so, subject to the fol- | |
| 10 # lowing conditions: | |
| 11 # | |
| 12 # The above copyright notice and this permission notice shall be included | |
| 13 # in all copies or substantial portions of the Software. | |
| 14 # | |
| 15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | |
| 16 # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL- | |
| 17 # ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT | |
| 18 # SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, | |
| 19 # WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
| 20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS | |
| 21 # IN THE SOFTWARE. | |
| 22 # | |
| 23 | |
| 24 import boto.exception | |
| 25 from boto.compat import json | |
| 26 import requests | |
| 27 import boto | |
| 28 from boto.cloudsearchdomain.layer1 import CloudSearchDomainConnection | |
| 29 | |
| 30 | |
| 31 class SearchServiceException(Exception): | |
| 32 pass | |
| 33 | |
| 34 | |
| 35 class CommitMismatchError(Exception): | |
| 36 # Let's do some extra work and let the user handle errors on his/her own. | |
| 37 | |
| 38 errors = None | |
| 39 | |
| 40 | |
| 41 class EncodingError(Exception): | |
| 42 """ | |
| 43 Content sent for Cloud Search indexing was incorrectly encoded. | |
| 44 | |
| 45 This usually happens when a document is marked as unicode but non-unicode | |
| 46 characters are present. | |
| 47 """ | |
| 48 pass | |
| 49 | |
| 50 | |
| 51 class ContentTooLongError(Exception): | |
| 52 """ | |
| 53 Content sent for Cloud Search indexing was too long | |
| 54 | |
| 55 This will usually happen when documents queued for indexing add up to more | |
| 56 than the limit allowed per upload batch (5MB) | |
| 57 | |
| 58 """ | |
| 59 pass | |
| 60 | |
| 61 | |
| 62 class DocumentServiceConnection(object): | |
| 63 """ | |
| 64 A CloudSearch document service. | |
| 65 | |
| 66 The DocumentServiceConection is used to add, remove and update documents in | |
| 67 CloudSearch. Commands are uploaded to CloudSearch in SDF (Search Document | |
| 68 Format). | |
| 69 | |
| 70 To generate an appropriate SDF, use :func:`add` to add or update documents, | |
| 71 as well as :func:`delete` to remove documents. | |
| 72 | |
| 73 Once the set of documents is ready to be index, use :func:`commit` to send | |
| 74 the commands to CloudSearch. | |
| 75 | |
| 76 If there are a lot of documents to index, it may be preferable to split the | |
| 77 generation of SDF data and the actual uploading into CloudSearch. Retrieve | |
| 78 the current SDF with :func:`get_sdf`. If this file is the uploaded into S3, | |
| 79 it can be retrieved back afterwards for upload into CloudSearch using | |
| 80 :func:`add_sdf_from_s3`. | |
| 81 | |
| 82 The SDF is not cleared after a :func:`commit`. If you wish to continue | |
| 83 using the DocumentServiceConnection for another batch upload of commands, | |
| 84 you will need to :func:`clear_sdf` first to stop the previous batch of | |
| 85 commands from being uploaded again. | |
| 86 | |
| 87 """ | |
| 88 | |
| 89 def __init__(self, domain=None, endpoint=None): | |
| 90 self.domain = domain | |
| 91 self.endpoint = endpoint | |
| 92 if not self.endpoint: | |
| 93 self.endpoint = domain.doc_service_endpoint | |
| 94 self.documents_batch = [] | |
| 95 self._sdf = None | |
| 96 | |
| 97 # Copy proxy settings from connection and check if request should be signed | |
| 98 self.proxy = {} | |
| 99 self.sign_request = False | |
| 100 if self.domain and self.domain.layer1: | |
| 101 if self.domain.layer1.use_proxy: | |
| 102 self.proxy = {'http': self.domain.layer1.get_proxy_url_with_auth()} | |
| 103 | |
| 104 self.sign_request = getattr(self.domain.layer1, 'sign_request', False) | |
| 105 | |
| 106 if self.sign_request: | |
| 107 # Create a domain connection to send signed requests | |
| 108 layer1 = self.domain.layer1 | |
| 109 self.domain_connection = CloudSearchDomainConnection( | |
| 110 host=self.endpoint, | |
| 111 aws_access_key_id=layer1.aws_access_key_id, | |
| 112 aws_secret_access_key=layer1.aws_secret_access_key, | |
| 113 region=layer1.region, | |
| 114 provider=layer1.provider | |
| 115 ) | |
| 116 | |
| 117 def add(self, _id, fields): | |
| 118 """ | |
| 119 Add a document to be processed by the DocumentService | |
| 120 | |
| 121 The document will not actually be added until :func:`commit` is called | |
| 122 | |
| 123 :type _id: string | |
| 124 :param _id: A unique ID used to refer to this document. | |
| 125 | |
| 126 :type fields: dict | |
| 127 :param fields: A dictionary of key-value pairs to be uploaded . | |
| 128 """ | |
| 129 | |
| 130 d = {'type': 'add', 'id': _id, 'fields': fields} | |
| 131 self.documents_batch.append(d) | |
| 132 | |
| 133 def delete(self, _id): | |
| 134 """ | |
| 135 Schedule a document to be removed from the CloudSearch service | |
| 136 | |
| 137 The document will not actually be scheduled for removal until | |
| 138 :func:`commit` is called | |
| 139 | |
| 140 :type _id: string | |
| 141 :param _id: The unique ID of this document. | |
| 142 """ | |
| 143 | |
| 144 d = {'type': 'delete', 'id': _id} | |
| 145 self.documents_batch.append(d) | |
| 146 | |
| 147 def get_sdf(self): | |
| 148 """ | |
| 149 Generate the working set of documents in Search Data Format (SDF) | |
| 150 | |
| 151 :rtype: string | |
| 152 :returns: JSON-formatted string of the documents in SDF | |
| 153 """ | |
| 154 | |
| 155 return self._sdf if self._sdf else json.dumps(self.documents_batch) | |
| 156 | |
| 157 def clear_sdf(self): | |
| 158 """ | |
| 159 Clear the working documents from this DocumentServiceConnection | |
| 160 | |
| 161 This should be used after :func:`commit` if the connection will be | |
| 162 reused for another set of documents. | |
| 163 """ | |
| 164 | |
| 165 self._sdf = None | |
| 166 self.documents_batch = [] | |
| 167 | |
| 168 def add_sdf_from_s3(self, key_obj): | |
| 169 """ | |
| 170 Load an SDF from S3 | |
| 171 | |
| 172 Using this method will result in documents added through | |
| 173 :func:`add` and :func:`delete` being ignored. | |
| 174 | |
| 175 :type key_obj: :class:`boto.s3.key.Key` | |
| 176 :param key_obj: An S3 key which contains an SDF | |
| 177 """ | |
| 178 #@todo:: (lucas) would be nice if this could just take an s3://uri..." | |
| 179 | |
| 180 self._sdf = key_obj.get_contents_as_string() | |
| 181 | |
| 182 def _commit_with_auth(self, sdf, api_version): | |
| 183 return self.domain_connection.upload_documents(sdf, 'application/json') | |
| 184 | |
| 185 def _commit_without_auth(self, sdf, api_version): | |
| 186 url = "http://%s/%s/documents/batch" % (self.endpoint, api_version) | |
| 187 | |
| 188 # Keep-alive is automatic in a post-1.0 requests world. | |
| 189 session = requests.Session() | |
| 190 session.proxies = self.proxy | |
| 191 adapter = requests.adapters.HTTPAdapter( | |
| 192 pool_connections=20, | |
| 193 pool_maxsize=50, | |
| 194 max_retries=5 | |
| 195 ) | |
| 196 session.mount('http://', adapter) | |
| 197 session.mount('https://', adapter) | |
| 198 | |
| 199 resp = session.post(url, data=sdf, headers={'Content-Type': 'application/json'}) | |
| 200 return resp | |
| 201 | |
| 202 def commit(self): | |
| 203 """ | |
| 204 Actually send an SDF to CloudSearch for processing | |
| 205 | |
| 206 If an SDF file has been explicitly loaded it will be used. Otherwise, | |
| 207 documents added through :func:`add` and :func:`delete` will be used. | |
| 208 | |
| 209 :rtype: :class:`CommitResponse` | |
| 210 :returns: A summary of documents added and deleted | |
| 211 """ | |
| 212 | |
| 213 sdf = self.get_sdf() | |
| 214 | |
| 215 if ': null' in sdf: | |
| 216 boto.log.error('null value in sdf detected. This will probably ' | |
| 217 'raise 500 error.') | |
| 218 index = sdf.index(': null') | |
| 219 boto.log.error(sdf[index - 100:index + 100]) | |
| 220 | |
| 221 api_version = '2013-01-01' | |
| 222 if self.domain and self.domain.layer1: | |
| 223 api_version = self.domain.layer1.APIVersion | |
| 224 | |
| 225 if self.sign_request: | |
| 226 r = self._commit_with_auth(sdf, api_version) | |
| 227 else: | |
| 228 r = self._commit_without_auth(sdf, api_version) | |
| 229 | |
| 230 return CommitResponse(r, self, sdf, signed_request=self.sign_request) | |
| 231 | |
| 232 | |
| 233 class CommitResponse(object): | |
| 234 """Wrapper for response to Cloudsearch document batch commit. | |
| 235 | |
| 236 :type response: :class:`requests.models.Response` | |
| 237 :param response: Response from Cloudsearch /documents/batch API | |
| 238 | |
| 239 :type doc_service: :class:`boto.cloudsearch2.document.DocumentServiceConnection` | |
| 240 :param doc_service: Object containing the documents posted and methods to | |
| 241 retry | |
| 242 | |
| 243 :raises: :class:`boto.exception.BotoServerError` | |
| 244 :raises: :class:`boto.cloudsearch2.document.SearchServiceException` | |
| 245 :raises: :class:`boto.cloudsearch2.document.EncodingError` | |
| 246 :raises: :class:`boto.cloudsearch2.document.ContentTooLongError` | |
| 247 """ | |
| 248 def __init__(self, response, doc_service, sdf, signed_request=False): | |
| 249 self.response = response | |
| 250 self.doc_service = doc_service | |
| 251 self.sdf = sdf | |
| 252 self.signed_request = signed_request | |
| 253 | |
| 254 if self.signed_request: | |
| 255 self.content = response | |
| 256 else: | |
| 257 _body = response.content.decode('utf-8') | |
| 258 | |
| 259 try: | |
| 260 self.content = json.loads(_body) | |
| 261 except: | |
| 262 boto.log.error('Error indexing documents.\nResponse Content:\n{0}' | |
| 263 '\n\nSDF:\n{1}'.format(_body, self.sdf)) | |
| 264 raise boto.exception.BotoServerError(self.response.status_code, '', | |
| 265 body=_body) | |
| 266 | |
| 267 self.status = self.content['status'] | |
| 268 if self.status == 'error': | |
| 269 self.errors = [e.get('message') for e in self.content.get('errors', | |
| 270 [])] | |
| 271 for e in self.errors: | |
| 272 if "Illegal Unicode character" in e: | |
| 273 raise EncodingError("Illegal Unicode character in document") | |
| 274 elif e == "The Content-Length is too long": | |
| 275 raise ContentTooLongError("Content was too long") | |
| 276 else: | |
| 277 self.errors = [] | |
| 278 | |
| 279 self.adds = self.content['adds'] | |
| 280 self.deletes = self.content['deletes'] | |
| 281 self._check_num_ops('add', self.adds) | |
| 282 self._check_num_ops('delete', self.deletes) | |
| 283 | |
| 284 def _check_num_ops(self, type_, response_num): | |
| 285 """Raise exception if number of ops in response doesn't match commit | |
| 286 | |
| 287 :type type_: str | |
| 288 :param type_: Type of commit operation: 'add' or 'delete' | |
| 289 | |
| 290 :type response_num: int | |
| 291 :param response_num: Number of adds or deletes in the response. | |
| 292 | |
| 293 :raises: :class:`boto.cloudsearch2.document.CommitMismatchError` | |
| 294 """ | |
| 295 commit_num = len([d for d in self.doc_service.documents_batch | |
| 296 if d['type'] == type_]) | |
| 297 | |
| 298 if response_num != commit_num: | |
| 299 if self.signed_request: | |
| 300 boto.log.debug(self.response) | |
| 301 else: | |
| 302 boto.log.debug(self.response.content) | |
| 303 # There will always be a commit mismatch error if there is any | |
| 304 # errors on cloudsearch. self.errors gets lost when this | |
| 305 # CommitMismatchError is raised. Whoever is using boto has no idea | |
| 306 # why their commit failed. They can't even notify the user of the | |
| 307 # cause by parsing the error messages from amazon. So let's | |
| 308 # attach the self.errors to the exceptions if we already spent | |
| 309 # time and effort collecting them out of the response. | |
| 310 exc = CommitMismatchError( | |
| 311 'Incorrect number of {0}s returned. Commit: {1} Response: {2}' | |
| 312 .format(type_, commit_num, response_num) | |
| 313 ) | |
| 314 exc.errors = self.errors | |
| 315 raise exc |
