Mercurial > repos > drosofff > fetch_fasta_from_ncbi
comparison retrieve_fasta_from_NCBI.py @ 5:c6de5c7b4ae3 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit 11ca680184135ef39a6c552d9f3cc427a8ed6c4c
author | drosofff |
---|---|
date | Fri, 16 Jun 2017 05:28:17 -0400 |
parents | 64f45c5e94a0 |
children |
comparison
equal
deleted
inserted
replaced
4:64f45c5e94a0 | 5:c6de5c7b4ae3 |
---|---|
28 import time | 28 import time |
29 import urllib | 29 import urllib |
30 import urllib2 | 30 import urllib2 |
31 import httplib | 31 import httplib |
32 import re | 32 import re |
33 | |
34 | |
35 class QueryException(Exception): | |
36 pass | |
33 | 37 |
34 | 38 |
35 class Eutils: | 39 class Eutils: |
36 | 40 |
37 def __init__(self, options, logger): | 41 def __init__(self, options, logger): |
60 self.get_count_value() | 64 self.get_count_value() |
61 | 65 |
62 # If no UIDs are found exit script | 66 # If no UIDs are found exit script |
63 if self.count > 0: | 67 if self.count > 0: |
64 self.get_uids_list() | 68 self.get_uids_list() |
65 self.get_sequences() | 69 try: |
70 self.get_sequences() | |
71 except QueryException as e: | |
72 self.logger.error("Exiting script.") | |
73 raise e | |
66 else: | 74 else: |
67 self.logger.info("No UIDs were found. Exiting script.") | 75 self.logger.error("No UIDs were found. Exiting script.") |
76 raise Exception("") | |
68 | 77 |
69 def get_count_value(self): | 78 def get_count_value(self): |
70 """ | 79 """ |
71 just to retrieve Count (number of UIDs) | 80 just to retrieve Count (number of UIDs) |
72 Total number of UIDs from the retrieved set to be shown in the XML | 81 Total number of UIDs from the retrieved set to be shown in the XML |
193 fasta = response.read() | 202 fasta = response.read() |
194 response.close() | 203 response.close() |
195 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) | 204 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) |
196 or ("Error" in fasta) or (not fasta.startswith(">") ) ): | 205 or ("Error" in fasta) or (not fasta.startswith(">") ) ): |
197 serverTransaction = False | 206 serverTransaction = False |
207 if ( response_code != 200 ): | |
208 self.logger.info("urlopen error: Response code is not 200") | |
209 elif ( "Resource temporarily unavailable" in fasta ): | |
210 self.logger.info("Ressource temporarily unavailable") | |
211 elif ( "Error" in fasta ): | |
212 self.logger.info("Error in fasta") | |
213 else: | |
214 self.logger.info("Fasta doesn't start with '>'") | |
198 else: | 215 else: |
199 serverTransaction = True | 216 serverTransaction = True |
200 except urllib2.HTTPError as e: | 217 except urllib2.HTTPError as e: |
201 serverTransaction = False | 218 serverTransaction = False |
202 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) | 219 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) |
205 self.logger.info("urlopen error: Failed to reach a server") | 222 self.logger.info("urlopen error: Failed to reach a server") |
206 self.logger.info("Reason :%s" % ( e.reason ) ) | 223 self.logger.info("Reason :%s" % ( e.reason ) ) |
207 except httplib.IncompleteRead as e: | 224 except httplib.IncompleteRead as e: |
208 serverTransaction = False | 225 serverTransaction = False |
209 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) | 226 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) |
227 if (counter > 500): | |
228 serverTransaction = True | |
229 if (counter > 500): | |
230 raise QueryException({"message":"500 Server Transaction Trials attempted for this batch. Aborting."}) | |
210 fasta = self.sanitiser(self.dbname, fasta) | 231 fasta = self.sanitiser(self.dbname, fasta) |
211 time.sleep(0.1) | 232 time.sleep(0.1) |
212 return fasta | 233 return fasta |
213 | 234 |
214 def sanitiser(self, db, fastaseq): | 235 def sanitiser(self, db, fastaseq): |
268 batch = uids_list[start:end] | 289 batch = uids_list[start:end] |
269 if self.epost(self.dbname, ",".join(batch)) != -1: | 290 if self.epost(self.dbname, ",".join(batch)) != -1: |
270 mfasta = '' | 291 mfasta = '' |
271 while not mfasta: | 292 while not mfasta: |
272 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 293 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) |
273 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 294 try: |
274 out.write(mfasta + '\n') | 295 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) |
296 out.write(mfasta + '\n') | |
297 except QueryException as e: | |
298 self.logger.error("%s" % e.message) | |
299 raise e | |
275 | 300 |
276 | 301 |
277 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 302 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' |
278 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 303 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' |
279 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | 304 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] |
299 kwargs['filename'] = options.logfile | 324 kwargs['filename'] = options.logfile |
300 logging.basicConfig(**kwargs) | 325 logging.basicConfig(**kwargs) |
301 logger = logging.getLogger('data_from_NCBI') | 326 logger = logging.getLogger('data_from_NCBI') |
302 | 327 |
303 E = Eutils(options, logger) | 328 E = Eutils(options, logger) |
304 E.retrieve() | 329 try: |
330 E.retrieve() | |
331 except Exception as e: | |
332 sys.exit(1) | |
305 | 333 |
306 | 334 |
307 if __name__ == "__main__": | 335 if __name__ == "__main__": |
308 __main__() | 336 __main__() |