Mercurial > repos > gga > apollo_feat_from_gff3
comparison create_features_from_gff3.py @ 0:5aa3bc8d0253 draft
planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit f745b23c84a615bf434d717c8c0e553a012f0268
author | gga |
---|---|
date | Mon, 11 Sep 2017 05:45:37 -0400 |
parents | |
children | 7c4a3e737e08 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5aa3bc8d0253 |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 import logging | |
4 import sys | |
5 import time | |
6 | |
7 from builtins import str | |
8 | |
9 from BCBio import GFF | |
10 | |
11 from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry | |
12 logging.basicConfig(level=logging.INFO) | |
13 log = logging.getLogger(__name__) | |
14 | |
15 | |
16 if __name__ == '__main__': | |
17 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') | |
18 WAAuth(parser) | |
19 parser.add_argument('email', help='User Email') | |
20 parser.add_argument('--source', help='URL where the input dataset can be found.') | |
21 OrgOrGuess(parser) | |
22 | |
23 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') | |
24 args = parser.parse_args() | |
25 | |
26 wa = WebApolloInstance(args.apollo, args.username, args.password) | |
27 # User must have an account | |
28 gx_user = AssertUser(wa.users.loadUsers(email=args.email)) | |
29 | |
30 # Get organism | |
31 org_cn = GuessOrg(args, wa) | |
32 if isinstance(org_cn, list): | |
33 org_cn = org_cn[0] | |
34 | |
35 # TODO: Check user perms on org. | |
36 org = wa.organisms.findOrganismByCn(org_cn) | |
37 | |
38 bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID'] | |
39 | |
40 sys.stdout.write('# ') | |
41 sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages'])) | |
42 sys.stdout.write('\n') | |
43 # print(wa.annotations.getFeatures()) | |
44 for rec in GFF.parse(args.gff3): | |
45 wa.annotations.setSequence(rec.id, org['id']) | |
46 for feature in rec.features: | |
47 # We can only handle genes right now | |
48 if feature.type not in ('gene', 'terminator'): | |
49 continue | |
50 # Convert the feature into a presentation that Apollo will accept | |
51 featureData = featuresToFeatureSchema([feature]) | |
52 if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]): | |
53 # We're experiencing a (transient?) problem where gene_001 to | |
54 # gene_025 will be rejected. Thus, hardcode to a known working | |
55 # gene name and update later. | |
56 | |
57 featureData[0]['name'] = 'tRNA_000' | |
58 tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0] | |
59 tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0] | |
60 | |
61 if 'Name' in feature.qualifiers: | |
62 if feature.qualifiers['Name'][0].startswith('tRNA-'): | |
63 tRNA_type = feature.qualifiers['Name'][0] | |
64 | |
65 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
66 | |
67 def func0(): | |
68 wa.annotations.setName( | |
69 newfeature['features'][0]['uniquename'], | |
70 tRNA_type, | |
71 ) | |
72 retry(func0) | |
73 | |
74 if args.source: | |
75 gene_id = newfeature['features'][0]['parent_id'] | |
76 | |
77 def setSource(): | |
78 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
79 retry(setSource) | |
80 | |
81 sys.stdout.write('\t'.join([ | |
82 feature.id, | |
83 newfeature['features'][0]['uniquename'], | |
84 'success', | |
85 ])) | |
86 elif featureData[0]['type']['name'] == 'terminator': | |
87 # We're experiencing a (transient?) problem where gene_001 to | |
88 # gene_025 will be rejected. Thus, hardcode to a known working | |
89 # gene name and update later. | |
90 featureData[0]['name'] = 'terminator_000' | |
91 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
92 | |
93 def func0(): | |
94 wa.annotations.setName( | |
95 newfeature['features'][0]['uniquename'], | |
96 'terminator' | |
97 ) | |
98 | |
99 retry(func0) | |
100 | |
101 if args.source: | |
102 gene_id = newfeature['features'][0]['parent_id'] | |
103 | |
104 def setSource(): | |
105 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
106 retry(setSource) | |
107 | |
108 sys.stdout.write('\t'.join([ | |
109 feature.id, | |
110 newfeature['features'][0]['uniquename'], | |
111 'success', | |
112 ])) | |
113 else: | |
114 try: | |
115 # We're experiencing a (transient?) problem where gene_001 to | |
116 # gene_025 will be rejected. Thus, hardcode to a known working | |
117 # gene name and update later. | |
118 featureData[0]['name'] = 'gene_000' | |
119 # Extract CDS feature from the feature data, this will be used | |
120 # to set the CDS location correctly (apollo currently screwing | |
121 # this up (2.0.6)) | |
122 CDS = featureData[0]['children'][0]['children'] | |
123 CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location'] | |
124 # Create the new feature | |
125 newfeature = wa.annotations.addFeature(featureData, trustme=True) | |
126 # Extract the UUIDs that apollo returns to us | |
127 mrna_id = newfeature['features'][0]['uniquename'] | |
128 gene_id = newfeature['features'][0]['parent_id'] | |
129 # Sleep to give it time to actually persist the feature. Apollo | |
130 # is terrible about writing + immediately reading back written | |
131 # data. | |
132 time.sleep(1) | |
133 # Correct the translation start, but with strand specific log | |
134 if CDS['strand'] == 1: | |
135 wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax'])) | |
136 else: | |
137 wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1) | |
138 | |
139 # Finally we set the name, this should be correct. | |
140 time.sleep(0.5) | |
141 wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) | |
142 time.sleep(0.5) | |
143 | |
144 def func(): | |
145 wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0]) | |
146 retry(func) | |
147 | |
148 if args.source: | |
149 gene_id = newfeature['features'][0]['parent_id'] | |
150 | |
151 def setSource(): | |
152 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]}) | |
153 retry(setSource) | |
154 extra_attr = {} | |
155 for (key, values) in feature.qualifiers.items(): | |
156 if key in bad_quals: | |
157 continue | |
158 | |
159 if key == 'Note': | |
160 def func2(): | |
161 wa.annotations.addComments(gene_id, values) | |
162 retry(func2) | |
163 else: | |
164 extra_attr[key] = values | |
165 | |
166 def func3(): | |
167 wa.annotations.addAttributes(gene_id, extra_attr) | |
168 retry(func3) | |
169 | |
170 sys.stdout.write('\t'.join([ | |
171 feature.id, | |
172 gene_id, | |
173 'success', | |
174 ])) | |
175 except Exception as e: | |
176 msg = str(e) | |
177 if '\n' in msg: | |
178 msg = msg[0:msg.index('\n')] | |
179 sys.stdout.write('\t'.join([ | |
180 feature.id, | |
181 '', | |
182 'ERROR', | |
183 msg | |
184 ])) | |
185 sys.stdout.write('\n') | |
186 sys.stdout.flush() |