Mercurial > repos > bgruening > augustus
annotate extract_features.py @ 9:bd0e53f3a891 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit fbd55928544683a7f7e6e10dadabe698bc71b0e4
author | iuc |
---|---|
date | Fri, 04 Oct 2024 11:32:08 +0000 |
parents | 09855551d713 |
children |
rev | line source |
---|---|
0 | 1 #!/usr/bin/env python |
2 | |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
3 import argparse |
0 | 4 import sys |
5 import textwrap | |
6 | |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
7 |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
8 def main(args): |
0 | 9 """ |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
10 Extract the protein and coding section from an augustus gff, gtf file |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
11 Example file: |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
12 HS04636 AUGUSTUS stop_codon 6901 6903 . + 0 Parent=g1.t1 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
13 HS04636 AUGUSTUS transcription_end_site 8857 8857 . + . Parent=g1.t1 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
14 # protein sequence = [MLARALLLCAVLALSHTANPCCSHPCQNRGVCMSVGFDQYKCDCTRTGFYGENCSTPEFLTRIKLFLKPTPNTVHYIL |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
15 # THFKGFWNVVNNIPFLRNAIMSYVLTSRSHLIDSPPTYNADYGYKSWEAFSNLSYYTRALPPVPDDCPTPLGVKGKKQLPDSNEIVEKLLLRRKFIPD |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
16 # PQGSNMMFAFFAQHFTHQFFKTDHKRGPAFTNGLGHGVDLNHIYGETLARQRKLRLFKDGKMKYQIIDGEMYPPTVKDTQAEMIYPPQVPEHLRFAVG |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
17 # QEVFGLVPGLMMYATIWLREHNRVCDVLKQEHPEWGDEQLFQTSRLILIGETIKIVIEDYVQHLSGYHFKLKFDPELLFNKQFQYQNRIAAEFNTLYH |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
18 # WHPLLPDTFQIHDQKYNYQQFIYNNSILLEHGITQFVESFTRQIAGRVAGGRNVPPAVQKVSQASIDQSRQMKYQSFNEYRKRFMLKPYESFEELTGE |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
19 # KEMSAELEALYGDIDAVELYPALLVEKPRPDAIFGETMVEVGAPFSLKGLMGNVICSPAYWKPSTFGGEVGFQIINTASIQSLICNNVKGCPFTSFSV |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
20 # PDPELIKTVTINASSSRSGLDDINPTVLLKERSTEL] |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
21 # end gene g1 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
22 ### |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
23 # |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
24 # ----- prediction on sequence number 2 (length = 2344, name = HS08198) ----- |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
25 # |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
26 # Predicted genes for sequence number 2 on both strands |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
27 # start gene g2 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
28 HS08198 AUGUSTUS gene 86 2344 1 + . ID=g2 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
29 HS08198 AUGUSTUS transcript 86 2344 . + . ID=g2.t1;Parent=g2 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
30 HS08198 AUGUSTUS transcription_start_site 86 86 . + . Parent=g2.t1 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
31 HS08198 AUGUSTUS exon 86 582 . + . Parent=g2.t1 |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
32 HS08198 AUGUSTUS start_codon 445 447 . + 0 Parent=g2.t1 |
0 | 33 """ |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
34 protein_seq = "" |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
35 coding_seq = "" |
0 | 36 if args.protein: |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
37 po = open(args.protein, "w+") |
0 | 38 if args.codingseq: |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
39 co = open(args.codingseq, "w+") |
0 | 40 |
41 for line in sys.stdin: | |
42 # protein- and coding-sequence are stored as comments | |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
43 if line.startswith("#"): |
0 | 44 line = line[2:].strip() |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
45 if line.startswith("start gene"): |
0 | 46 gene_name = line[11:].strip() |
47 | |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
48 if protein_seq: |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
49 if line.endswith("]"): |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
50 protein_seq += line[:-1] |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
51 po.write( |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
52 ">%s\n%s\n" |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
53 % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80))) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
54 ) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
55 protein_seq = "" |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
56 else: |
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
57 protein_seq += line |
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
58 |
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
59 if coding_seq: |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
60 if line.endswith("]"): |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
61 coding_seq += line[:-1] |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
62 co.write( |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
63 ">%s\n%s\n" |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
64 % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80))) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
65 ) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
66 coding_seq = "" |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
67 else: |
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
68 coding_seq += line |
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
69 |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
70 if args.protein and line.startswith("protein sequence = ["): |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
71 if line.endswith("]"): |
0 | 72 protein_seq = line[20:-1] |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
73 po.write( |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
74 ">%s\n%s\n" |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
75 % (gene_name, "\n".join(textwrap.wrap(protein_seq, 80))) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
76 ) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
77 protein_seq = "" |
0 | 78 else: |
79 line = line[20:] | |
80 protein_seq = line | |
81 | |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
82 if args.codingseq and line.startswith("coding sequence = ["): |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
83 if line.endswith("]"): |
0 | 84 coding_seq = line[19:-1] |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
85 co.write( |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
86 ">%s\n%s\n" |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
87 % (gene_name, "\n".join(textwrap.wrap(coding_seq, 80))) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
88 ) |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
89 coding_seq = "" |
0 | 90 else: |
91 line = line[19:] | |
92 coding_seq = line | |
93 | |
94 if args.codingseq: | |
95 co.close() | |
96 if args.protein: | |
97 po.close() | |
98 | |
4
4de31938431b
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit 2896dcfd180800d00ea413a59264ef8b11788b8e
iuc
parents:
0
diff
changeset
|
99 |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
100 if __name__ == "__main__": |
0 | 101 parser = argparse.ArgumentParser() |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
102 parser.add_argument("-p", "--protein", help="Path to the protein file.") |
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
103 parser.add_argument("-c", "--codingseq", help="Path to the coding file.") |
0 | 104 |
105 args = parser.parse_args() | |
7
09855551d713
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/augustus commit bba7f5df059fcbeb06e89cf689e9a04d4f22cb76"
iuc
parents:
4
diff
changeset
|
106 main(args) |