Mercurial > repos > drosofff > fetch_fasta_from_ncbi
comparison retrieve_fasta_from_NCBI.py @ 5:c6de5c7b4ae3 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/fetch_fasta_from_ncbi commit 11ca680184135ef39a6c552d9f3cc427a8ed6c4c
| author | drosofff | 
|---|---|
| date | Fri, 16 Jun 2017 05:28:17 -0400 | 
| parents | 64f45c5e94a0 | 
| children | 
   comparison
  equal
  deleted
  inserted
  replaced
| 4:64f45c5e94a0 | 5:c6de5c7b4ae3 | 
|---|---|
| 28 import time | 28 import time | 
| 29 import urllib | 29 import urllib | 
| 30 import urllib2 | 30 import urllib2 | 
| 31 import httplib | 31 import httplib | 
| 32 import re | 32 import re | 
| 33 | |
| 34 | |
| 35 class QueryException(Exception): | |
| 36 pass | |
| 33 | 37 | 
| 34 | 38 | 
| 35 class Eutils: | 39 class Eutils: | 
| 36 | 40 | 
| 37 def __init__(self, options, logger): | 41 def __init__(self, options, logger): | 
| 60 self.get_count_value() | 64 self.get_count_value() | 
| 61 | 65 | 
| 62 # If no UIDs are found exit script | 66 # If no UIDs are found exit script | 
| 63 if self.count > 0: | 67 if self.count > 0: | 
| 64 self.get_uids_list() | 68 self.get_uids_list() | 
| 65 self.get_sequences() | 69 try: | 
| 70 self.get_sequences() | |
| 71 except QueryException as e: | |
| 72 self.logger.error("Exiting script.") | |
| 73 raise e | |
| 66 else: | 74 else: | 
| 67 self.logger.info("No UIDs were found. Exiting script.") | 75 self.logger.error("No UIDs were found. Exiting script.") | 
| 76 raise Exception("") | |
| 68 | 77 | 
| 69 def get_count_value(self): | 78 def get_count_value(self): | 
| 70 """ | 79 """ | 
| 71 just to retrieve Count (number of UIDs) | 80 just to retrieve Count (number of UIDs) | 
| 72 Total number of UIDs from the retrieved set to be shown in the XML | 81 Total number of UIDs from the retrieved set to be shown in the XML | 
| 193 fasta = response.read() | 202 fasta = response.read() | 
| 194 response.close() | 203 response.close() | 
| 195 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) | 204 if ( (response_code != 200) or ("Resource temporarily unavailable" in fasta) | 
| 196 or ("Error" in fasta) or (not fasta.startswith(">") ) ): | 205 or ("Error" in fasta) or (not fasta.startswith(">") ) ): | 
| 197 serverTransaction = False | 206 serverTransaction = False | 
| 207 if ( response_code != 200 ): | |
| 208 self.logger.info("urlopen error: Response code is not 200") | |
| 209 elif ( "Resource temporarily unavailable" in fasta ): | |
| 210 self.logger.info("Ressource temporarily unavailable") | |
| 211 elif ( "Error" in fasta ): | |
| 212 self.logger.info("Error in fasta") | |
| 213 else: | |
| 214 self.logger.info("Fasta doesn't start with '>'") | |
| 198 else: | 215 else: | 
| 199 serverTransaction = True | 216 serverTransaction = True | 
| 200 except urllib2.HTTPError as e: | 217 except urllib2.HTTPError as e: | 
| 201 serverTransaction = False | 218 serverTransaction = False | 
| 202 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) | 219 self.logger.info("urlopen error:%s, %s" % (e.code, e.read() ) ) | 
| 205 self.logger.info("urlopen error: Failed to reach a server") | 222 self.logger.info("urlopen error: Failed to reach a server") | 
| 206 self.logger.info("Reason :%s" % ( e.reason ) ) | 223 self.logger.info("Reason :%s" % ( e.reason ) ) | 
| 207 except httplib.IncompleteRead as e: | 224 except httplib.IncompleteRead as e: | 
| 208 serverTransaction = False | 225 serverTransaction = False | 
| 209 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) | 226 self.logger.info("IncompleteRead error: %s" % ( e.partial ) ) | 
| 227 if (counter > 500): | |
| 228 serverTransaction = True | |
| 229 if (counter > 500): | |
| 230 raise QueryException({"message":"500 Server Transaction Trials attempted for this batch. Aborting."}) | |
| 210 fasta = self.sanitiser(self.dbname, fasta) | 231 fasta = self.sanitiser(self.dbname, fasta) | 
| 211 time.sleep(0.1) | 232 time.sleep(0.1) | 
| 212 return fasta | 233 return fasta | 
| 213 | 234 | 
| 214 def sanitiser(self, db, fastaseq): | 235 def sanitiser(self, db, fastaseq): | 
| 268 batch = uids_list[start:end] | 289 batch = uids_list[start:end] | 
| 269 if self.epost(self.dbname, ",".join(batch)) != -1: | 290 if self.epost(self.dbname, ",".join(batch)) != -1: | 
| 270 mfasta = '' | 291 mfasta = '' | 
| 271 while not mfasta: | 292 while not mfasta: | 
| 272 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 293 self.logger.info("retrieving batch %d" % ((start / batch_size) + 1)) | 
| 273 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 294 try: | 
| 274 out.write(mfasta + '\n') | 295 mfasta = self.efetch(self.dbname, self.query_key, self.webenv) | 
| 296 out.write(mfasta + '\n') | |
| 297 except QueryException as e: | |
| 298 self.logger.error("%s" % e.message) | |
| 299 raise e | |
| 275 | 300 | 
| 276 | 301 | 
| 277 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 302 LOG_FORMAT = '%(asctime)s|%(levelname)-8s|%(message)s' | 
| 278 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 303 LOG_DATEFMT = '%Y-%m-%d %H:%M:%S' | 
| 279 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | 304 LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] | 
| 299 kwargs['filename'] = options.logfile | 324 kwargs['filename'] = options.logfile | 
| 300 logging.basicConfig(**kwargs) | 325 logging.basicConfig(**kwargs) | 
| 301 logger = logging.getLogger('data_from_NCBI') | 326 logger = logging.getLogger('data_from_NCBI') | 
| 302 | 327 | 
| 303 E = Eutils(options, logger) | 328 E = Eutils(options, logger) | 
| 304 E.retrieve() | 329 try: | 
| 330 E.retrieve() | |
| 331 except Exception as e: | |
| 332 sys.exit(1) | |
| 305 | 333 | 
| 306 | 334 | 
| 307 if __name__ == "__main__": | 335 if __name__ == "__main__": | 
| 308 __main__() | 336 __main__() | 
