Mercurial > repos > iuc > exonerate
comparison exonerategff_to_gff3.py @ 3:a03dead1bede draft default tip
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exonerate commit a141c63903d1a598569985e400125d4e7de42801"
author | iuc |
---|---|
date | Sun, 01 Mar 2020 04:48:34 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
2:b03ae2ba8688 | 3:a03dead1bede |
---|---|
1 #!/usr/bin/env python3 | |
2 | |
3 """ | |
4 Converts a GFF produced by exonerate into a more standard GFF3 (e.g. usable in JBrowse) | |
5 """ | |
6 | |
7 import argparse | |
8 import sys | |
9 | |
10 from BCBio import GFF | |
11 from Bio.SeqFeature import FeatureLocation, SeqFeature | |
12 | |
13 parser = argparse.ArgumentParser() | |
14 parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin) | |
15 parser.add_argument('outfile', nargs='?', type=argparse.FileType('a'), default=sys.stdout) | |
16 args = parser.parse_args() | |
17 | |
18 | |
19 scaffs = [] | |
20 gene_number = 0 | |
21 for scaff in GFF.parse(args.infile): | |
22 scaff.annotations = {} | |
23 scaff.seq = "" | |
24 kept_features = [] | |
25 current_gene = None | |
26 exon_number = 0 | |
27 last_utr = None | |
28 | |
29 for feature in scaff.features: | |
30 | |
31 if feature.type == "gene": | |
32 gene_number += 1 | |
33 mrna_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="mRNA", strand=feature.location.strand) | |
34 mrna_feature.sub_features = [] | |
35 mrna_feature.qualifiers['source'] = feature.qualifiers['source'] | |
36 mrna_id = "mRNA_" + str(gene_number) | |
37 mrna_feature.qualifiers['ID'] = mrna_id | |
38 feature.sub_features = [mrna_feature] | |
39 feature.qualifiers['ID'] = "gene_" + str(gene_number) | |
40 if 'gene_orientation' in feature.qualifiers: | |
41 del feature.qualifiers['gene_orientation'] | |
42 if current_gene: | |
43 kept_features.append(current_gene) | |
44 | |
45 current_gene = feature | |
46 exon_number = 0 | |
47 last_utr = None | |
48 | |
49 elif feature.type == 'utr5': | |
50 feature.type = 'five_prime_UTR' | |
51 feature.qualifiers['ID'] = '%s_five_prime_UTR' % (mrna_id) | |
52 mrna_feature.sub_features.append(feature) | |
53 last_utr = {'start': feature.location.start, 'end': feature.location.end} | |
54 | |
55 elif feature.type == 'utr3': | |
56 feature.type = 'three_prime_UTR' | |
57 feature.qualifiers['ID'] = '%s_three_prime_UTR' % (mrna_id) | |
58 mrna_feature.sub_features.append(feature) | |
59 last_utr = {'start': feature.location.start, 'end': feature.location.end} | |
60 | |
61 elif feature.type == 'exon': | |
62 exon_number += 1 | |
63 feature.qualifiers['ID'] = '%s_exon_%s' % (mrna_id, exon_number) | |
64 mrna_feature.sub_features.append(feature) | |
65 | |
66 if last_utr is None: | |
67 cds_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="CDS", strand=feature.location.strand) | |
68 cds_feature.sub_features = [] | |
69 cds_feature.qualifiers['source'] = feature.qualifiers['source'] | |
70 cds_feature.qualifiers['ID'] = mrna_id + "_CDS" | |
71 mrna_feature.sub_features.append(cds_feature) | |
72 elif feature.location.start != last_utr['start'] or feature.location.end != last_utr['end']: | |
73 if feature.location.start > last_utr['start']: | |
74 cds_feature = SeqFeature(FeatureLocation(feature.location.start, last_utr['start']), type="CDS", strand=feature.location.strand) | |
75 cds_feature.sub_features = [] | |
76 cds_feature.qualifiers['source'] = feature.qualifiers['source'] | |
77 cds_feature.qualifiers['ID'] = mrna_id + "_CDS" | |
78 mrna_feature.sub_features.append(cds_feature) | |
79 if feature.location.end < last_utr['end']: | |
80 cds_feature = SeqFeature(FeatureLocation(feature.location.end, last_utr['end']), type="CDS", strand=feature.location.strand) | |
81 cds_feature.sub_features = [] | |
82 cds_feature.qualifiers['source'] = feature.qualifiers['source'] | |
83 cds_feature.qualifiers['ID'] = mrna_id + "_CDS" | |
84 mrna_feature.sub_features.append(cds_feature) | |
85 | |
86 last_utr = None | |
87 | |
88 elif feature.type == 'similarity': | |
89 if current_gene is None: | |
90 # We haven't seen any gene, just convert similarity to match | |
91 feature.type = 'match' | |
92 kept_features.append(feature) | |
93 | |
94 last_utr = None | |
95 | |
96 elif feature.type not in ['splice3', 'splice5', 'similarity', 'intron']: | |
97 mrna_feature.sub_features.append(feature) | |
98 last_utr = None | |
99 | |
100 # For the last one | |
101 if current_gene: | |
102 kept_features.append(current_gene) | |
103 | |
104 scaff.features = kept_features | |
105 | |
106 if len(kept_features): | |
107 GFF.write([scaff], args.outfile) |