| 
0
 | 
     1 #!/usr/bin/env python
 | 
| 
 | 
     2 import sys
 | 
| 
 | 
     3 import argparse
 | 
| 
 | 
     4 import logging
 | 
| 
 | 
     5 from Bio import SeqIO
 | 
| 
 | 
     6 
 | 
| 
 | 
     7 logging.basicConfig(level=logging.INFO)
 | 
| 
 | 
     8 log = logging.getLogger()
 | 
| 
 | 
     9 
 | 
| 
 | 
    10 
 | 
| 
 | 
    11 def drop_id(fasta_file=None):
 | 
| 
 | 
    12     for rec in SeqIO.parse(fasta_file, "fasta"):
 | 
| 
 | 
    13         rec.description = ""
 | 
| 
 | 
    14         ind = str(rec.seq).find("##")
 | 
| 
 | 
    15         if (
 | 
| 
 | 
    16             ind != -1
 | 
| 
 | 
    17         ):  # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence
 | 
| 
 | 
    18             rec.seq = rec.seq[0:ind]
 | 
| 
 | 
    19         yield rec
 | 
| 
 | 
    20 
 | 
| 
 | 
    21 
 | 
| 
 | 
    22 if __name__ == "__main__":
 | 
| 
 | 
    23     parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
 | 
| 
 | 
    24     parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file")
 | 
| 
 | 
    25 
 | 
| 
 | 
    26     args = parser.parse_args()
 | 
| 
 | 
    27     for rec in drop_id(**vars(args)):
 | 
| 
 | 
    28         SeqIO.write([rec], sys.stdout, "fasta")
 |