Mercurial > repos > jjohnson > iedb_api
comparison iedb_api.py @ 1:4a89ba6cfc63 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 18698e056ccc2d6d37836bd22728e2d8765e92ec"
author | jjohnson |
---|---|
date | Tue, 25 Feb 2020 17:37:34 -0500 |
parents | 991424605492 |
children | 153d5fa7af53 |
comparison
equal
deleted
inserted
replaced
0:991424605492 | 1:4a89ba6cfc63 |
---|---|
1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
2 """ | 2 |
3 """ | 3 import argparse |
4 import sys | |
5 import os.path | 4 import os.path |
6 import re | 5 import re |
7 import optparse | 6 import sys |
8 import urllib | 7 import time |
9 import urllib2 | 8 |
10 from optparse import OptionParser | 9 from urllib.error import HTTPError |
11 | 10 from urllib.parse import urlencode, unquote |
12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] | 11 from urllib.request import urlopen |
13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] | 12 |
14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] | 13 mhci_methods = ['recommended', 'consensus', |
15 mhcnp_methods = ['mhcnp'] | 14 'netmhcpan_ba', 'netmhcpan_el', |
16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] | 15 'ann', 'smmpmbec', 'smm', |
17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} | 16 'comblib_sidney2008', 'netmhccons', |
18 | 17 'pickpocket', 'netmhcstabpan'] |
19 def warn_err(msg,exit_code=1): | 18 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', |
20 sys.stderr.write(msg) | 19 'nn_align', 'smm_align', 'comblib', 'tepitope'] |
21 if exit_code: | 20 processing_methods = ['recommended', 'netmhcpan', 'ann', |
22 sys.exit(exit_code) | 21 'smmpmbec', 'smm', 'comblib_sidney2008', |
22 'netmhccons', 'pickpocket'] | |
23 mhcnp_methods = ['mhcnp', 'netmhcpan'] | |
24 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', | |
25 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] | |
26 prediction_methods = {'mhci': mhci_methods, | |
27 'mhcii': mhcii_methods, | |
28 'processing': processing_methods, | |
29 'mhcnp': mhcnp_methods, | |
30 'bcell': bcell_methods} | |
31 all_methods = set(mhci_methods + mhcii_methods + | |
32 mhcnp_methods + bcell_methods) | |
33 prediction_lengths = {'mhci': range(8, 16), | |
34 'mhcii': range(11, 31), | |
35 'processing': range(8, 15), | |
36 'mhcnp': range(8, 12), | |
37 'bcell': range(8, 16)} | |
38 | |
39 | |
40 def warn_err(msg, exit_code=1): | |
41 sys.stderr.write(msg) | |
42 if exit_code: | |
43 sys.exit(exit_code) | |
23 | 44 |
24 | 45 |
25 def __main__(): | 46 def __main__(): |
26 #Parse Command Line | 47 # Parse Command Line |
27 parser = optparse.OptionParser() | 48 parser = argparse.ArgumentParser(description='', epilog='') |
28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) | 49 parser.add_argument('-p', '--prediction', |
29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) | 50 default='mhci', |
30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) | 51 choices=prediction_methods.keys(), |
31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) | 52 help='IEDB API prediction service') |
32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) | 53 parser.add_argument('-s', '--sequence', |
33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) | 54 action="append", |
34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) | 55 default=None, |
35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) | 56 help='Peptide Sequence') |
36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) | 57 parser.add_argument('-m', '--method', |
37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) | 58 default='recommended', |
38 (options, args) = parser.parse_args() | 59 choices=all_methods, |
39 | 60 help='prediction method') |
40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | 61 parser.add_argument('-P', '--proteasome', |
41 | 62 default=None, |
42 if not options.allele and options.prediction != 'bcell': | 63 choices=['immuno', 'constitutive'], |
43 warn_err('-a allele required\n', exit_code=1) | 64 help='IEDB processing proteasome type') |
44 | 65 parser.add_argument('-a', '--allele', |
45 if not (options.sequence or options.input): | 66 action="append", |
46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) | 67 default=[], |
47 | 68 help='Alleles for which to make predictions') |
48 if options.output != None: | 69 parser.add_argument('-l', '--length', |
49 try: | 70 action="append", |
50 outputPath = os.path.abspath(options.output) | 71 default=[], |
51 outputFile = open(outputPath, 'w') | 72 help='lengths for which to make predictions, ' + |
52 except Exception, e: | 73 '1 per allele') |
53 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 74 parser.add_argument('-w', '--window_size', |
54 else: | 75 type=int, |
55 outputFile = sys.stdout | 76 default=None, |
56 | 77 help='window_size for bcell prediction') |
57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction | 78 parser.add_argument('-i', '--input', |
58 | 79 default=None, |
59 #TODO parse alleles from the options.alleles file | 80 help='Input file for peptide sequences ' + |
60 alleles = ','.join(options.allele) | 81 '(fasta or tabular)') |
61 lengths = ','.join(options.length) | 82 parser.add_argument('-c', '--column', |
62 method = options.method | 83 default=None, |
63 | 84 help='Peptide Column in a tabular input file') |
64 results = [] | 85 parser.add_argument('-C', '--id_column', |
65 global header | 86 default=None, |
66 header = None | 87 help='ID Column in a tabular input file') |
67 | 88 parser.add_argument('-o', '--output', |
68 sequence_text = [] | 89 default=None, |
69 def add_seq(seqid,seq): | 90 help='Output file for query results') |
70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) | 91 parser.add_argument('-O', '--output2', |
71 | 92 default='iedb_results2', |
72 def query(url,seq,allele,length,seqid=None,method='recommended'): | 93 help='Output file for secondary query results') |
94 parser.add_argument('-t', '--timeout', | |
95 type=int, | |
96 default=600, | |
97 help='Seconds to wait for server response') | |
98 parser.add_argument('-r', '--retries', | |
99 type=int, | |
100 default=5, | |
101 help='Number of times to retry server query') | |
102 parser.add_argument('-S', '--sleep', | |
103 type=int, | |
104 default=300, | |
105 help='Seconds to wait between retries') | |
106 parser.add_argument('-d', '--debug', | |
107 action='store_true', | |
108 default=False, | |
109 help='Turn on wrapper debugging to stderr') | |
110 args = parser.parse_args() | |
111 | |
112 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | |
113 | |
114 if not args.allele and args.prediction != 'bcell': | |
115 warn_err('-a allele required\n', exit_code=1) | |
116 | |
117 if not (args.sequence or args.input): | |
118 warn_err('NO Sequences given: ' + | |
119 'either -s sequence or -i input_file is required\n', | |
120 exit_code=1) | |
121 | |
122 if args.output is not None: | |
123 try: | |
124 outputPath = os.path.abspath(args.output) | |
125 outputFile = open(outputPath, 'w') | |
126 except Exception as e: | |
127 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
128 else: | |
129 outputFile = sys.stdout | |
130 | |
131 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | |
132 args.prediction | |
133 len_param = 'length' if args.prediction != 'bcell' else 'window_size' | |
134 | |
135 # TODO parse alleles from the args.alleles file | |
136 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None | |
137 lengths = ','.join(args.length) | |
138 if args.prediction == 'bcell': | |
139 lengths = args.window_size | |
140 method = args.method | |
141 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
73 global header | 142 global header |
74 params = dict() | 143 header = None |
75 if method: | 144 results = [] |
76 params['method'] = method | 145 global header2 |
77 params['sequence_text'] = seq | 146 header2 = None |
78 params['allele'] = allele | 147 results2 = [] |
79 params['length'] = length | 148 |
80 data = urllib.urlencode(params) | 149 sequence_text = [] |
81 request = urllib2.Request(url, data) | 150 |
82 if options.debug: | 151 def add_seq(seqid, seq): |
83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) | 152 sid = seqid if seqid else "peptide%d" % len(sequence_text) |
84 response = None | 153 sequence_text.append(">%s\n%s" % (sid, seq)) |
85 response = urllib2.urlopen(request) | 154 |
86 if response and response.getcode() == 200: | 155 def query(url, seq, allele, length, seqid=None, method='recommended'): |
87 resp_data = response.readlines() | 156 global header |
88 for line in resp_data: | 157 global header2 |
89 if line.find('eptide') > 0: | 158 params = dict() |
90 header = "#%s%s" % ("ID\t" if seqid else "", line) | 159 if method: |
91 continue | 160 params['method'] = method.encode() |
92 if seqid: | 161 if proteasome: |
93 results.append("%s\t%s" % (seqid,line)) | 162 params['proteasome'] = proteasome.encode() |
163 params['sequence_text'] = seq.encode() | |
164 if allele is not None: | |
165 params['allele'] = allele.encode() | |
166 if length is not None: | |
167 params[len_param] = str(length).encode() | |
168 req_data = urlencode(params) | |
169 if args.debug: | |
170 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
171 retries = max(0, args.retries) + 1 | |
172 for retry in range(1, retries): | |
173 response = None | |
174 try: | |
175 response = urlopen(url, data=req_data.encode('utf-8'), | |
176 timeout=args.timeout) | |
177 if response and response.getcode() == 200: | |
178 data = [line.decode() for line in response.readlines()] | |
179 if args.debug: | |
180 print(data, file=sys.stderr) | |
181 rslts = results | |
182 for ln, line in enumerate(data): | |
183 if line.lower().find('invalid') >= 0: | |
184 msg = '%s %s\n%s' % (url, unquote(req_data), | |
185 ''.join(data)) | |
186 warn_err(msg, exit_code=1) | |
187 if line.find('eptide') > 0: | |
188 header = "#%s%s" %\ | |
189 ("ID\t" if seqid else "", line) | |
190 if args.debug: | |
191 print(header, file=sys.stderr) | |
192 continue | |
193 elif method == 'Bepipred' and line.find('Residue') > 0: | |
194 header2 = "#%s%s" %\ | |
195 ("ID\t" if seqid else "", line) | |
196 if args.debug: | |
197 print(header2, file=sys.stderr) | |
198 rslts = results2 | |
199 continue | |
200 if seqid: | |
201 rslts.extend("%s\t%s" % (seqid, line)) | |
202 else: | |
203 rslts.extend(line) | |
204 break | |
205 else: | |
206 code = response.getcode() if response else 1 | |
207 warn_err("Error connecting to IEDB server\n", | |
208 exit_code=code) | |
209 except HTTPError as e: | |
210 code = None if retry < args.retries else e.code | |
211 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
212 (retry, retries, e), | |
213 exit_code=code) | |
214 time.sleep(args.sleep) | |
215 except Exception as e: | |
216 warn_err("Error connecting to IEDB server %s\n" % e, | |
217 exit_code=3) | |
218 | |
219 if args.sequence: | |
220 for i, seq in enumerate(args.sequence): | |
221 query(url, seq, alleles, lengths, seqid=None, method=method) | |
222 if args.input: | |
223 try: | |
224 fh = open(args.input, 'r') | |
225 if args.column: # tabular | |
226 col = int(args.column) | |
227 idcol = int(args.id_column) if args.id_column else None | |
228 for i, line in enumerate(fh): | |
229 fields = line.split('\t') | |
230 if len(fields) > col: | |
231 seq = re.sub('[_*]', '', fields[col]) | |
232 if re.match(aapat, seq): | |
233 if idcol is not None and idcol < len(fields): | |
234 seqid = fields[idcol] | |
235 else: | |
236 seqid = None | |
237 query(url, seq, alleles, lengths, | |
238 seqid=seqid, method=method) | |
239 else: | |
240 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | |
241 exit_code=None) | |
242 else: # fasta | |
243 seqid = None | |
244 seq = '' | |
245 for i, line in enumerate(fh): | |
246 if line.startswith('>'): | |
247 if seqid and len(seq) > 0: | |
248 query(url, seq, alleles, lengths, | |
249 seqid=seqid, method=method) | |
250 seqid = line[1:].strip() | |
251 seq = '' | |
252 else: | |
253 seq += line.strip() | |
254 if seqid and len(seq) > 0: | |
255 query(url, seq, alleles, lengths, | |
256 seqid=seqid, method=method) | |
257 fh.close() | |
258 except Exception as e: | |
259 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
260 | |
261 if header: | |
262 outputFile.write(header) | |
263 for line in results: | |
264 outputFile.write(line) | |
265 if results2: | |
266 if args.output2: | |
267 try: | |
268 outPath = os.path.abspath(args.output2) | |
269 outFile = open(outPath, 'w') | |
270 except Exception as e: | |
271 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
94 else: | 272 else: |
95 results.append(line) | 273 outFile = sys.stdout |
96 elif not response: | 274 if header2: |
97 warn_err("NO response from IEDB server\n", exit_code=3) | 275 outFile.write(header2) |
98 else: | 276 for line in results2: |
99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) | 277 outFile.write(line) |
100 | 278 |
101 if options.sequence: | 279 |
102 for i,seq in enumerate(options.sequence): | 280 if __name__ == "__main__": |
103 query(url,seq,alleles,lengths,seqid=None,method=method) | 281 __main__() |
104 if options.input: | |
105 try: | |
106 fh = open(options.input,'r') | |
107 if options.column: ## tabular | |
108 col = int(options.column) | |
109 idcol = int(options.id_column) if options.id_column else None | |
110 for i,line in enumerate(fh): | |
111 fields = line.split('\t') | |
112 if len(fields) > col: | |
113 seq = re.sub('[_*]','',fields[col]) | |
114 if re.match(aapat,seq): | |
115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None | |
116 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
117 else: | |
118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None) | |
119 else: ## fasta | |
120 seqid = None | |
121 seq = '' | |
122 for i,line in enumerate(fh): | |
123 if line.startswith('>'): | |
124 if seqid and len(seq) > 0: | |
125 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
126 seqid = line[1:].strip() | |
127 seq = '' | |
128 else: | |
129 seq += line.strip() | |
130 if seqid and len(seq) > 0: | |
131 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
132 fh.close() | |
133 except Exception, e: | |
134 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
135 | |
136 if header: | |
137 outputFile.write(header) | |
138 for line in results: | |
139 outputFile.write(line) | |
140 | |
141 if __name__ == "__main__": __main__() | |
142 |