comparison exonerategff_to_gff3.py @ 3:a03dead1bede draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/exonerate commit a141c63903d1a598569985e400125d4e7de42801"
author iuc
date Sun, 01 Mar 2020 04:48:34 -0500
parents
children
comparison
equal deleted inserted replaced
2:b03ae2ba8688 3:a03dead1bede
1 #!/usr/bin/env python3
2
3 """
4 Converts a GFF produced by exonerate into a more standard GFF3 (e.g. usable in JBrowse)
5 """
6
7 import argparse
8 import sys
9
10 from BCBio import GFF
11 from Bio.SeqFeature import FeatureLocation, SeqFeature
12
13 parser = argparse.ArgumentParser()
14 parser.add_argument('infile', nargs='?', type=argparse.FileType('r'), default=sys.stdin)
15 parser.add_argument('outfile', nargs='?', type=argparse.FileType('a'), default=sys.stdout)
16 args = parser.parse_args()
17
18
19 scaffs = []
20 gene_number = 0
21 for scaff in GFF.parse(args.infile):
22 scaff.annotations = {}
23 scaff.seq = ""
24 kept_features = []
25 current_gene = None
26 exon_number = 0
27 last_utr = None
28
29 for feature in scaff.features:
30
31 if feature.type == "gene":
32 gene_number += 1
33 mrna_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="mRNA", strand=feature.location.strand)
34 mrna_feature.sub_features = []
35 mrna_feature.qualifiers['source'] = feature.qualifiers['source']
36 mrna_id = "mRNA_" + str(gene_number)
37 mrna_feature.qualifiers['ID'] = mrna_id
38 feature.sub_features = [mrna_feature]
39 feature.qualifiers['ID'] = "gene_" + str(gene_number)
40 if 'gene_orientation' in feature.qualifiers:
41 del feature.qualifiers['gene_orientation']
42 if current_gene:
43 kept_features.append(current_gene)
44
45 current_gene = feature
46 exon_number = 0
47 last_utr = None
48
49 elif feature.type == 'utr5':
50 feature.type = 'five_prime_UTR'
51 feature.qualifiers['ID'] = '%s_five_prime_UTR' % (mrna_id)
52 mrna_feature.sub_features.append(feature)
53 last_utr = {'start': feature.location.start, 'end': feature.location.end}
54
55 elif feature.type == 'utr3':
56 feature.type = 'three_prime_UTR'
57 feature.qualifiers['ID'] = '%s_three_prime_UTR' % (mrna_id)
58 mrna_feature.sub_features.append(feature)
59 last_utr = {'start': feature.location.start, 'end': feature.location.end}
60
61 elif feature.type == 'exon':
62 exon_number += 1
63 feature.qualifiers['ID'] = '%s_exon_%s' % (mrna_id, exon_number)
64 mrna_feature.sub_features.append(feature)
65
66 if last_utr is None:
67 cds_feature = SeqFeature(FeatureLocation(feature.location.start, feature.location.end), type="CDS", strand=feature.location.strand)
68 cds_feature.sub_features = []
69 cds_feature.qualifiers['source'] = feature.qualifiers['source']
70 cds_feature.qualifiers['ID'] = mrna_id + "_CDS"
71 mrna_feature.sub_features.append(cds_feature)
72 elif feature.location.start != last_utr['start'] or feature.location.end != last_utr['end']:
73 if feature.location.start > last_utr['start']:
74 cds_feature = SeqFeature(FeatureLocation(feature.location.start, last_utr['start']), type="CDS", strand=feature.location.strand)
75 cds_feature.sub_features = []
76 cds_feature.qualifiers['source'] = feature.qualifiers['source']
77 cds_feature.qualifiers['ID'] = mrna_id + "_CDS"
78 mrna_feature.sub_features.append(cds_feature)
79 if feature.location.end < last_utr['end']:
80 cds_feature = SeqFeature(FeatureLocation(feature.location.end, last_utr['end']), type="CDS", strand=feature.location.strand)
81 cds_feature.sub_features = []
82 cds_feature.qualifiers['source'] = feature.qualifiers['source']
83 cds_feature.qualifiers['ID'] = mrna_id + "_CDS"
84 mrna_feature.sub_features.append(cds_feature)
85
86 last_utr = None
87
88 elif feature.type == 'similarity':
89 if current_gene is None:
90 # We haven't seen any gene, just convert similarity to match
91 feature.type = 'match'
92 kept_features.append(feature)
93
94 last_utr = None
95
96 elif feature.type not in ['splice3', 'splice5', 'similarity', 'intron']:
97 mrna_feature.sub_features.append(feature)
98 last_utr = None
99
100 # For the last one
101 if current_gene:
102 kept_features.append(current_gene)
103
104 scaff.features = kept_features
105
106 if len(kept_features):
107 GFF.write([scaff], args.outfile)