comparison create_features_from_gff3.py @ 0:c6d7f19953a6 draft

planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit f745b23c84a615bf434d717c8c0e553a012f0268
author gga
date Mon, 11 Sep 2017 05:47:25 -0400
parents
children 9968eec7430e
comparison
equal deleted inserted replaced
-1:000000000000 0:c6d7f19953a6
1 #!/usr/bin/env python
2 import argparse
3 import logging
4 import sys
5 import time
6
7 from builtins import str
8
9 from BCBio import GFF
10
11 from webapollo import AssertUser, GuessOrg, OrgOrGuess, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry
12 logging.basicConfig(level=logging.INFO)
13 log = logging.getLogger(__name__)
14
15
16 if __name__ == '__main__':
17 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services')
18 WAAuth(parser)
19 parser.add_argument('email', help='User Email')
20 parser.add_argument('--source', help='URL where the input dataset can be found.')
21 OrgOrGuess(parser)
22
23 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file')
24 args = parser.parse_args()
25
26 wa = WebApolloInstance(args.apollo, args.username, args.password)
27 # User must have an account
28 gx_user = AssertUser(wa.users.loadUsers(email=args.email))
29
30 # Get organism
31 org_cn = GuessOrg(args, wa)
32 if isinstance(org_cn, list):
33 org_cn = org_cn[0]
34
35 # TODO: Check user perms on org.
36 org = wa.organisms.findOrganismByCn(org_cn)
37
38 bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID']
39
40 sys.stdout.write('# ')
41 sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages']))
42 sys.stdout.write('\n')
43 # print(wa.annotations.getFeatures())
44 for rec in GFF.parse(args.gff3):
45 wa.annotations.setSequence(rec.id, org['id'])
46 for feature in rec.features:
47 # We can only handle genes right now
48 if feature.type not in ('gene', 'terminator'):
49 continue
50 # Convert the feature into a presentation that Apollo will accept
51 featureData = featuresToFeatureSchema([feature])
52 if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]):
53 # We're experiencing a (transient?) problem where gene_001 to
54 # gene_025 will be rejected. Thus, hardcode to a known working
55 # gene name and update later.
56
57 featureData[0]['name'] = 'tRNA_000'
58 tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0]
59 tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0]
60
61 if 'Name' in feature.qualifiers:
62 if feature.qualifiers['Name'][0].startswith('tRNA-'):
63 tRNA_type = feature.qualifiers['Name'][0]
64
65 newfeature = wa.annotations.addFeature(featureData, trustme=True)
66
67 def func0():
68 wa.annotations.setName(
69 newfeature['features'][0]['uniquename'],
70 tRNA_type,
71 )
72 retry(func0)
73
74 if args.source:
75 gene_id = newfeature['features'][0]['parent_id']
76
77 def setSource():
78 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
79 retry(setSource)
80
81 sys.stdout.write('\t'.join([
82 feature.id,
83 newfeature['features'][0]['uniquename'],
84 'success',
85 ]))
86 elif featureData[0]['type']['name'] == 'terminator':
87 # We're experiencing a (transient?) problem where gene_001 to
88 # gene_025 will be rejected. Thus, hardcode to a known working
89 # gene name and update later.
90 featureData[0]['name'] = 'terminator_000'
91 newfeature = wa.annotations.addFeature(featureData, trustme=True)
92
93 def func0():
94 wa.annotations.setName(
95 newfeature['features'][0]['uniquename'],
96 'terminator'
97 )
98
99 retry(func0)
100
101 if args.source:
102 gene_id = newfeature['features'][0]['parent_id']
103
104 def setSource():
105 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
106 retry(setSource)
107
108 sys.stdout.write('\t'.join([
109 feature.id,
110 newfeature['features'][0]['uniquename'],
111 'success',
112 ]))
113 else:
114 try:
115 # We're experiencing a (transient?) problem where gene_001 to
116 # gene_025 will be rejected. Thus, hardcode to a known working
117 # gene name and update later.
118 featureData[0]['name'] = 'gene_000'
119 # Extract CDS feature from the feature data, this will be used
120 # to set the CDS location correctly (apollo currently screwing
121 # this up (2.0.6))
122 CDS = featureData[0]['children'][0]['children']
123 CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location']
124 # Create the new feature
125 newfeature = wa.annotations.addFeature(featureData, trustme=True)
126 # Extract the UUIDs that apollo returns to us
127 mrna_id = newfeature['features'][0]['uniquename']
128 gene_id = newfeature['features'][0]['parent_id']
129 # Sleep to give it time to actually persist the feature. Apollo
130 # is terrible about writing + immediately reading back written
131 # data.
132 time.sleep(1)
133 # Correct the translation start, but with strand specific log
134 if CDS['strand'] == 1:
135 wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax']))
136 else:
137 wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1)
138
139 # Finally we set the name, this should be correct.
140 time.sleep(0.5)
141 wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
142 time.sleep(0.5)
143
144 def func():
145 wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
146 retry(func)
147
148 if args.source:
149 gene_id = newfeature['features'][0]['parent_id']
150
151 def setSource():
152 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
153 retry(setSource)
154 extra_attr = {}
155 for (key, values) in feature.qualifiers.items():
156 if key in bad_quals:
157 continue
158
159 if key == 'Note':
160 def func2():
161 wa.annotations.addComments(gene_id, values)
162 retry(func2)
163 else:
164 extra_attr[key] = values
165
166 def func3():
167 wa.annotations.addAttributes(gene_id, extra_attr)
168 retry(func3)
169
170 sys.stdout.write('\t'.join([
171 feature.id,
172 gene_id,
173 'success',
174 ]))
175 except Exception as e:
176 msg = str(e)
177 if '\n' in msg:
178 msg = msg[0:msg.index('\n')]
179 sys.stdout.write('\t'.join([
180 feature.id,
181 '',
182 'ERROR',
183 msg
184 ]))
185 sys.stdout.write('\n')
186 sys.stdout.flush()