comparison retrieve_fasta_from_NCBI.py @ 5:c6de5c7b4ae3 draft default tip

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit 11ca680184135ef39a6c552d9f3cc427a8ed6c4c
author drosofff
date Fri, 16 Jun 2017 05:28:17 -0400
parents 64f45c5e94a0
children
comparison
equal deleted inserted replaced
4:64f45c5e94a0 5:c6de5c7b4ae3
28 import time 28 import time
29 import urllib 29 import urllib
30 import urllib2 30 import urllib2
31 import httplib 31 import httplib
32 import re 32 import re
33
34
35 class QueryException(Exception):
36 pass
33 37
34 38
35 class Eutils: 39 class Eutils:
36 40
37 def __init__(self, options, logger): 41 def __init__(self, options, logger):
60 self.get_count_value() 64 self.get_count_value()
61 65
62 # If no UIDs are found exit script 66 # If no UIDs are found exit script
63 if self.count > 0: 67 if self.count > 0:
64 self.get_uids_list() 68 self.get_uids_list()
65 self.get_sequences() 69 try:
70 self.get_sequences()
71 except QueryException as e:
72 self.logger.error("Exiting script.")
73 raise e
66 else: 74 else:
67 self.logger.info("No UIDs were found. Exiting script.") 75 self.logger.error("No UIDs were found. Exiting script.")
76 raise Exception("")
68 77
69 def get_count_value(self): 78 def get_count_value(self):
70 """ 79 """
71 just to retrieve Count (number of UIDs) 80 just to retrieve Count (number of UIDs)
72 Total number of UIDs from the retrieved set to be shown in the XML 81 Total number of UIDs from the retrieved set to be shown in the XML
193 fasta = response.read() 202 fasta = response.read()
194 response.close() 203 response.close()
195 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) 204 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta)
196 or ("Error" in fasta) or (not fasta.startswith(">") ) ): 205 or ("Error" in fasta) or (not fasta.startswith(">") ) ):
197 serverTransaction = False 206 serverTransaction = False
207 if ( response_code != 200 ):
208 self.logger.info("urlopen error: Response code is not 200")
209 elif ( "Resource temporarily unavailable" in fasta ):
210 self.logger.info("Ressource temporarily unavailable")
211 elif ( "Error" in fasta ):
212 self.logger.info("Error in fasta")
213 else:
214 self.logger.info("Fasta doesn't start with '>'")
198 else: 215 else:
199 serverTransaction = True 216 serverTransaction = True
200 except urllib2.HTTPError as e: 217 except urllib2.HTTPError as e:
201 serverTransaction = False 218 serverTransaction = False
202 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) 219 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) )
205 self.logger.info("urlopen error: Failed to reach a server") 222 self.logger.info("urlopen error: Failed to reach a server")
206 self.logger.info("Reason :%s" % ( e.reason ) ) 223 self.logger.info("Reason :%s" % ( e.reason ) )
207 except httplib.IncompleteRead as e: 224 except httplib.IncompleteRead as e:
208 serverTransaction = False 225 serverTransaction = False
209 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) 226 self.logger.info("IncompleteRead error: %s" % ( e.partial ) )
227 if (counter > 500):
228 serverTransaction = True
229 if (counter > 500):
230 raise QueryException({"message":"500 Server Transaction Trials attempted for this batch. Aborting."})
210 fasta = self.sanitiser(self.dbname, fasta) 231 fasta = self.sanitiser(self.dbname, fasta)
211 time.sleep(0.1) 232 time.sleep(0.1)
212 return fasta 233 return fasta
213 234
214 def sanitiser(self, db, fastaseq): 235 def sanitiser(self, db, fastaseq):
268 batch = uids_list[start:end] 289 batch = uids_list[start:end]
269 if self.epost(self.dbname, ",".join(batch)) != -1: 290 if self.epost(self.dbname, ",".join(batch)) != -1:
270 mfasta = '' 291 mfasta = ''
271 while not mfasta: 292 while not mfasta:
272 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) 293 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1))
273 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) 294 try:
274 out.write(mfasta + '\n') 295 mfasta = self.efetch(self.dbname, self.query_key, self.webenv)
296 out.write(mfasta + '\n')
297 except QueryException as e:
298 self.logger.error("%s" % e.message)
299 raise e
275 300
276 301
277 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' 302 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s'
278 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' 303 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S'
279 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] 304 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
299 kwargs['filename'] = options.logfile 324 kwargs['filename'] = options.logfile
300 logging.basicConfig(**kwargs) 325 logging.basicConfig(**kwargs)
301 logger = logging.getLogger('data_from_NCBI') 326 logger = logging.getLogger('data_from_NCBI')
302 327
303 E = Eutils(options, logger) 328 E = Eutils(options, logger)
304 E.retrieve() 329 try:
330 E.retrieve()
331 except Exception as e:
332 sys.exit(1)
305 333
306 334
307 if __name__ == "__main__": 335 if __name__ == "__main__":
308 __main__() 336 __main__()