Mercurial > repos > cpt > cpt_fasta_remove_id
annotate fasta_remove_id.py @ 3:03414db20efc draft
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
| author | cpt |
|---|---|
| date | Mon, 05 Jun 2023 02:41:42 +0000 |
| parents | |
| children |
| rev | line source |
|---|---|
|
3
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
1 #!/usr/bin/env python |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
2 import sys |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
3 import argparse |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
4 import logging |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
5 from Bio import SeqIO |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
6 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
7 logging.basicConfig(level=logging.INFO) |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
8 log = logging.getLogger() |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
9 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
10 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
11 def drop_id(fasta_file=None): |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
12 for rec in SeqIO.parse(fasta_file, "fasta"): |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
13 rec.description = "" |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
14 ind = str(rec.seq).find("##") |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
15 if ( |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
16 ind != -1 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
17 ): # This method causes mid-file comments (such as from Apollo sequences) to be appended to the end of the previous sequence |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
18 rec.seq = rec.seq[0:ind] |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
19 yield rec |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
20 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
21 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
22 if __name__ == "__main__": |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
23 parser = argparse.ArgumentParser(description="Identify shine-dalgarno sequences") |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
24 parser.add_argument("fasta_file", type=argparse.FileType("r"), help="Genbank file") |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
25 |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
26 args = parser.parse_args() |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
27 for rec in drop_id(**vars(args)): |
|
03414db20efc
planemo upload commit 94b0cd1fff0826c6db3e7dc0c91c0c5a8be8bb0c
cpt
parents:
diff
changeset
|
28 SeqIO.write([rec], sys.stdout, "fasta") |
