annotate fasta_remove_id.py @ 3:03414db20efc draft

planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
author cpt
date Mon, 05 Jun 2023 02:41:42 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
1 #!/usr/bin/env python
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
2 import sys
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
3 import argparse
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
4 import logging
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
5 from Bio import SeqIO
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
6
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
7 logging.basicConfig(level=logging.INFO)
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
8 log = logging.getLogger()
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
9
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
10
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
11 def drop_id(fasta_file=None):
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
12 for rec in SeqIO.parse(fasta_file, "fasta"):
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
13 rec.description = ""
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
14 ind = str(rec.seq).find("##")
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
15 if (
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
16 ind != -1
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
17 ): # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
18 rec.seq = rec.seq[0:ind]
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
19 yield rec
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
20
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
21
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
22 if __name__ == "__main__":
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
23 parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences")
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
24 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file")
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
25
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
26 args = parser.parse_args()
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
27 for rec in drop_id(**vars(args)):
03414db20efc planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff changeset
28 SeqIO.write([rec], sys.stdout, "fasta")