comparison create_features_from_gff3.py @ 10:dd63c6e11a95 draft

"planemo upload for repository https://github.com/galaxy-genome-annotation/galaxy-tools/tree/master/tools/apollo commit 08015be1ee8a784e0619f961aaa724857debfd6f"
author gga
date Mon, 02 Dec 2019 05:48:12 -0500
parents 0af4179e6758
children 6ea838bf1f2f
comparison
equal deleted inserted replaced
9:1575f11ac6fc 10:dd63c6e11a95
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 import argparse 2 import argparse
3 import logging 3 import logging
4 import sys
5 import time
6 4
7 from BCBio import GFF 5 from apollo import accessible_organisms
6 from apollo.util import GuessOrg, OrgOrGuess
8 7
9 from six.moves.builtins import str 8 from arrow.apollo import get_apollo_instance
10 9
11 from webapollo import GuessOrg, OrgOrGuess, PermissionCheck, WAAuth, WebApolloInstance, featuresToFeatureSchema, retry 10 from webapollo import UserObj, handle_credentials
12 logging.basicConfig(level=logging.INFO) 11 logging.basicConfig(level=logging.INFO)
13 log = logging.getLogger(__name__) 12 log = logging.getLogger(__name__)
14 13
15 14
16 if __name__ == '__main__': 15 if __name__ == '__main__':
17 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services') 16 parser = argparse.ArgumentParser(description='Sample script to add an attribute to a feature via web services')
18 WAAuth(parser)
19 parser.add_argument('email', help='User Email') 17 parser.add_argument('email', help='User Email')
20 parser.add_argument('--source', help='URL where the input dataset can be found.') 18 parser.add_argument('--source', help='URL where the input dataset can be found.')
21 OrgOrGuess(parser) 19 OrgOrGuess(parser)
22 20
23 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file') 21 parser.add_argument('gff3', type=argparse.FileType('r'), help='GFF3 file')
24 args = parser.parse_args() 22 args = parser.parse_args()
25 23
26 wa = WebApolloInstance(args.apollo, args.username, args.password) 24 wa = get_apollo_instance()
27 # User must have an account 25 # User must have an account
28 gx_user = wa.users.assertOrCreateUser(args.email) 26 gx_user = UserObj(**wa.users._assert_or_create_user(args.email))
27 handle_credentials(gx_user)
29 28
30 # Get organism 29 # Get organism
31 org_cn = GuessOrg(args, wa) 30 org_cn = GuessOrg(args, wa)
32 if isinstance(org_cn, list): 31 if isinstance(org_cn, list):
33 org_cn = org_cn[0] 32 org_cn = org_cn[0]
34 33
35 if not PermissionCheck(gx_user, org_cn, "WRITE"): 34 all_orgs = wa.organisms.get_organisms()
36 raise Exception("Action not permitted") 35 if 'error' in all_orgs:
37 org = wa.organisms.findOrganismByCn(org_cn) 36 all_orgs = []
37 all_orgs = [org['commonName'] for org in all_orgs]
38 if org_cn not in all_orgs:
39 raise Exception("Could not find organism %s" % org_cn)
38 40
39 bad_quals = ['date_creation', 'source', 'owner', 'date_last_modified', 'Name', 'ID'] 41 orgs = accessible_organisms(gx_user, [org_cn], 'WRITE')
42 if not orgs:
43 raise Exception("You do not have write permission on this organism")
40 44
41 sys.stdout.write('# ') 45 wa.annotations.load_gff3(org_cn, args.gff3, args.source)
42 sys.stdout.write('\t'.join(['Feature ID', 'Apollo ID', 'Success', 'Messages']))
43 sys.stdout.write('\n')
44 # print(wa.annotations.getFeatures())
45 for rec in GFF.parse(args.gff3):
46 wa.annotations.setSequence(rec.id, org['id'])
47 for feature in rec.features:
48 # We can only handle genes right now
49 if feature.type not in ('gene', 'terminator'):
50 continue
51 # Convert the feature into a presentation that Apollo will accept
52 featureData = featuresToFeatureSchema([feature])
53 if 'children' in featureData[0] and any([child['type']['name'] == 'tRNA' for child in featureData[0]['children']]):
54 # We're experiencing a (transient?) problem where gene_001 to
55 # gene_025 will be rejected. Thus, hardcode to a known working
56 # gene name and update later.
57
58 featureData[0]['name'] = 'tRNA_000'
59 tRNA_sf = [child for child in feature.sub_features if child.type == 'tRNA'][0]
60 tRNA_type = 'tRNA-' + tRNA_sf.qualifiers.get('Codon', ["Unk"])[0]
61
62 if 'Name' in feature.qualifiers:
63 if feature.qualifiers['Name'][0].startswith('tRNA-'):
64 tRNA_type = feature.qualifiers['Name'][0]
65
66 newfeature = wa.annotations.addFeature(featureData, trustme=True)
67
68 def func0():
69 wa.annotations.setName(
70 newfeature['features'][0]['uniquename'],
71 tRNA_type,
72 )
73 retry(func0)
74
75 if args.source:
76 gene_id = newfeature['features'][0]['parent_id']
77
78 def setSource():
79 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
80 retry(setSource)
81
82 sys.stdout.write('\t'.join([
83 feature.id,
84 newfeature['features'][0]['uniquename'],
85 'success',
86 ]))
87 elif featureData[0]['type']['name'] == 'terminator':
88 # We're experiencing a (transient?) problem where gene_001 to
89 # gene_025 will be rejected. Thus, hardcode to a known working
90 # gene name and update later.
91 featureData[0]['name'] = 'terminator_000'
92 newfeature = wa.annotations.addFeature(featureData, trustme=True)
93
94 def func0():
95 wa.annotations.setName(
96 newfeature['features'][0]['uniquename'],
97 'terminator'
98 )
99
100 retry(func0)
101
102 if args.source:
103 gene_id = newfeature['features'][0]['parent_id']
104
105 def setSource():
106 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
107 retry(setSource)
108
109 sys.stdout.write('\t'.join([
110 feature.id,
111 newfeature['features'][0]['uniquename'],
112 'success',
113 ]))
114 else:
115 try:
116 # We're experiencing a (transient?) problem where gene_001 to
117 # gene_025 will be rejected. Thus, hardcode to a known working
118 # gene name and update later.
119 featureData[0]['name'] = 'gene_000'
120 # Extract CDS feature from the feature data, this will be used
121 # to set the CDS location correctly (apollo currently screwing
122 # this up (2.0.6))
123 CDS = featureData[0]['children'][0]['children']
124 CDS = [x for x in CDS if x['type']['name'] == 'CDS'][0]['location']
125 # Create the new feature
126 newfeature = wa.annotations.addFeature(featureData, trustme=True)
127 # Extract the UUIDs that apollo returns to us
128 mrna_id = newfeature['features'][0]['uniquename']
129 gene_id = newfeature['features'][0]['parent_id']
130 # Sleep to give it time to actually persist the feature. Apollo
131 # is terrible about writing + immediately reading back written
132 # data.
133 time.sleep(1)
134 # Correct the translation start, but with strand specific log
135 if CDS['strand'] == 1:
136 wa.annotations.setTranslationStart(mrna_id, min(CDS['fmin'], CDS['fmax']))
137 else:
138 wa.annotations.setTranslationStart(mrna_id, max(CDS['fmin'], CDS['fmax']) - 1)
139
140 # Finally we set the name, this should be correct.
141 time.sleep(0.5)
142 wa.annotations.setName(mrna_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
143 time.sleep(0.5)
144
145 def func():
146 wa.annotations.setName(gene_id, feature.qualifiers.get('product', feature.qualifiers.get('Name', ["Unknown"]))[0])
147 retry(func)
148
149 if args.source:
150 gene_id = newfeature['features'][0]['parent_id']
151
152 def setSource():
153 wa.annotations.addAttributes(gene_id, {'DatasetSource': [args.source]})
154 retry(setSource)
155 extra_attr = {}
156 for (key, values) in feature.qualifiers.items():
157 if key in bad_quals:
158 continue
159
160 if key == 'Note':
161 def func2():
162 wa.annotations.addComments(gene_id, values)
163 retry(func2)
164 else:
165 extra_attr[key] = values
166
167 def func3():
168 wa.annotations.addAttributes(gene_id, extra_attr)
169 retry(func3)
170
171 sys.stdout.write('\t'.join([
172 feature.id,
173 gene_id,
174 'success',
175 ]))
176 except Exception as e:
177 msg = str(e)
178 if '\n' in msg:
179 msg = msg[0:msg.index('\n')]
180 sys.stdout.write('\t'.join([
181 feature.id,
182 '',
183 'ERROR',
184 msg
185 ]))
186 sys.stdout.write('\n')
187 sys.stdout.flush()