0
|
1 #!/usr/bin/env python
|
|
2 import sys
|
|
3 import argparse
|
|
4 import logging
|
|
5 from Bio import SeqIO
|
|
6
|
|
7 logging.basicConfig(level=logging.INFO)
|
|
8 log = logging.getLogger()
|
|
9
|
|
10
|
|
11 def drop_id(fasta_file=None):
|
|
12 for rec in SeqIO.parse(fasta_file, "fasta"):
|
|
13 rec.description = ""
|
|
14 ind = str(rec.seq).find("##")
|
|
15 if (
|
|
16 ind != -1
|
|
17 ): # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence
|
|
18 rec.seq = rec.seq[0:ind]
|
|
19 yield rec
|
|
20
|
|
21
|
|
22 if __name__ == "__main__":
|
|
23 parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
|
|
24 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file")
|
|
25
|
|
26 args = parser.parse_args()
|
|
27 for rec in drop_id(**vars(args)):
|
|
28 SeqIO.write([rec], sys.stdout, "fasta")
|