comparison iedb_api.py @ 1:4a89ba6cfc63 draft

"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 18698e056ccc2d6d37836bd22728e2d8765e92ec"
author jjohnson
date Tue, 25 Feb 2020 17:37:34 -0500
parents 991424605492
children 153d5fa7af53
comparison
equal deleted inserted replaced
0:991424605492 1:4a89ba6cfc63
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 """ 2
3 """ 3 import argparse
4 import sys
5 import os.path 4 import os.path
6 import re 5 import re
7 import optparse 6 import sys
8 import urllib 7 import time
9 import urllib2 8
10 from optparse import OptionParser 9 from urllib.error import HTTPError
11 10 from urllib.parse import urlencode, unquote
12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] 11 from urllib.request import urlopen
13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] 12
14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] 13 mhci_methods = ['recommended', 'consensus',
15 mhcnp_methods = ['mhcnp'] 14 'netmhcpan_ba', 'netmhcpan_el',
16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] 15 'ann', 'smmpmbec', 'smm',
17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} 16 'comblib_sidney2008', 'netmhccons',
18 17 'pickpocket', 'netmhcstabpan']
19 def warn_err(msg,exit_code=1): 18 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan',
20 sys.stderr.write(msg) 19 'nn_align', 'smm_align', 'comblib', 'tepitope']
21 if exit_code: 20 processing_methods = ['recommended', 'netmhcpan', 'ann',
22 sys.exit(exit_code) 21 'smmpmbec', 'smm', 'comblib_sidney2008',
22 'netmhccons', 'pickpocket']
23 mhcnp_methods = ['mhcnp', 'netmhcpan']
24 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz',
25 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0']
26 prediction_methods = {'mhci': mhci_methods,
27 'mhcii': mhcii_methods,
28 'processing': processing_methods,
29 'mhcnp': mhcnp_methods,
30 'bcell': bcell_methods}
31 all_methods = set(mhci_methods + mhcii_methods +
32 mhcnp_methods + bcell_methods)
33 prediction_lengths = {'mhci': range(8, 16),
34 'mhcii': range(11, 31),
35 'processing': range(8, 15),
36 'mhcnp': range(8, 12),
37 'bcell': range(8, 16)}
38
39
40 def warn_err(msg, exit_code=1):
41 sys.stderr.write(msg)
42 if exit_code:
43 sys.exit(exit_code)
23 44
24 45
25 def __main__(): 46 def __main__():
26 #Parse Command Line 47 # Parse Command Line
27 parser = optparse.OptionParser() 48 parser = argparse.ArgumentParser(description='', epilog='')
28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) 49 parser.add_argument('-p', '--prediction',
29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) 50 default='mhci',
30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) 51 choices=prediction_methods.keys(),
31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) 52 help='IEDB API prediction service')
32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) 53 parser.add_argument('-s', '--sequence',
33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) 54 action="append",
34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) 55 default=None,
35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) 56 help='Peptide Sequence')
36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) 57 parser.add_argument('-m', '--method',
37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) 58 default='recommended',
38 (options, args) = parser.parse_args() 59 choices=all_methods,
39 60 help='prediction method')
40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' 61 parser.add_argument('-P', '--proteasome',
41 62 default=None,
42 if not options.allele and options.prediction != 'bcell': 63 choices=['immuno', 'constitutive'],
43 warn_err('-a allele required\n', exit_code=1) 64 help='IEDB processing proteasome type')
44 65 parser.add_argument('-a', '--allele',
45 if not (options.sequence or options.input): 66 action="append",
46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) 67 default=[],
47 68 help='Alleles for which to make predictions')
48 if options.output != None: 69 parser.add_argument('-l', '--length',
49 try: 70 action="append",
50 outputPath = os.path.abspath(options.output) 71 default=[],
51 outputFile = open(outputPath, 'w') 72 help='lengths for which to make predictions, ' +
52 except Exception, e: 73 '1 per allele')
53 warn_err("Unable to open output file: %s\n" % e, exit_code=1) 74 parser.add_argument('-w', '--window_size',
54 else: 75 type=int,
55 outputFile = sys.stdout 76 default=None,
56 77 help='window_size for bcell prediction')
57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction 78 parser.add_argument('-i', '--input',
58 79 default=None,
59 #TODO parse alleles from the options.alleles file 80 help='Input file for peptide sequences ' +
60 alleles = ','.join(options.allele) 81 '(fasta or tabular)')
61 lengths = ','.join(options.length) 82 parser.add_argument('-c', '--column',
62 method = options.method 83 default=None,
63 84 help='Peptide Column in a tabular input file')
64 results = [] 85 parser.add_argument('-C', '--id_column',
65 global header 86 default=None,
66 header = None 87 help='ID Column in a tabular input file')
67 88 parser.add_argument('-o', '--output',
68 sequence_text = [] 89 default=None,
69 def add_seq(seqid,seq): 90 help='Output file for query results')
70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) 91 parser.add_argument('-O', '--output2',
71 92 default='iedb_results2',
72 def query(url,seq,allele,length,seqid=None,method='recommended'): 93 help='Output file for secondary query results')
94 parser.add_argument('-t', '--timeout',
95 type=int,
96 default=600,
97 help='Seconds to wait for server response')
98 parser.add_argument('-r', '--retries',
99 type=int,
100 default=5,
101 help='Number of times to retry server query')
102 parser.add_argument('-S', '--sleep',
103 type=int,
104 default=300,
105 help='Seconds to wait between retries')
106 parser.add_argument('-d', '--debug',
107 action='store_true',
108 default=False,
109 help='Turn on wrapper debugging to stderr')
110 args = parser.parse_args()
111
112 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
113
114 if not args.allele and args.prediction != 'bcell':
115 warn_err('-a allele required\n', exit_code=1)
116
117 if not (args.sequence or args.input):
118 warn_err('NO Sequences given: ' +
119 'either -s sequence or -i input_file is required\n',
120 exit_code=1)
121
122 if args.output is not None:
123 try:
124 outputPath = os.path.abspath(args.output)
125 outputFile = open(outputPath, 'w')
126 except Exception as e:
127 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
128 else:
129 outputFile = sys.stdout
130
131 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
132 args.prediction
133 len_param = 'length' if args.prediction != 'bcell' else 'window_size'
134
135 # TODO parse alleles from the args.alleles file
136 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None
137 lengths = ','.join(args.length)
138 if args.prediction == 'bcell':
139 lengths = args.window_size
140 method = args.method
141 proteasome = args.proteasome if args.prediction == 'processcing' else None
73 global header 142 global header
74 params = dict() 143 header = None
75 if method: 144 results = []
76 params['method'] = method 145 global header2
77 params['sequence_text'] = seq 146 header2 = None
78 params['allele'] = allele 147 results2 = []
79 params['length'] = length 148
80 data = urllib.urlencode(params) 149 sequence_text = []
81 request = urllib2.Request(url, data) 150
82 if options.debug: 151 def add_seq(seqid, seq):
83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) 152 sid = seqid if seqid else "peptide%d" % len(sequence_text)
84 response = None 153 sequence_text.append(">%s\n%s" % (sid, seq))
85 response = urllib2.urlopen(request) 154
86 if response and response.getcode() == 200: 155 def query(url, seq, allele, length, seqid=None, method='recommended'):
87 resp_data = response.readlines() 156 global header
88 for line in resp_data: 157 global header2
89 if line.find('eptide') > 0: 158 params = dict()
90 header = "#%s%s" % ("ID\t" if seqid else "", line) 159 if method:
91 continue 160 params['method'] = method.encode()
92 if seqid: 161 if proteasome:
93 results.append("%s\t%s" % (seqid,line)) 162 params['proteasome'] = proteasome.encode()
163 params['sequence_text'] = seq.encode()
164 if allele is not None:
165 params['allele'] = allele.encode()
166 if length is not None:
167 params[len_param] = str(length).encode()
168 req_data = urlencode(params)
169 if args.debug:
170 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
171 retries = max(0, args.retries) + 1
172 for retry in range(1, retries):
173 response = None
174 try:
175 response = urlopen(url, data=req_data.encode('utf-8'),
176 timeout=args.timeout)
177 if response and response.getcode() == 200:
178 data = [line.decode() for line in response.readlines()]
179 if args.debug:
180 print(data, file=sys.stderr)
181 rslts = results
182 for ln, line in enumerate(data):
183 if line.lower().find('invalid') >= 0:
184 msg = '%s %s\n%s' % (url, unquote(req_data),
185 ''.join(data))
186 warn_err(msg, exit_code=1)
187 if line.find('eptide') > 0:
188 header = "#%s%s" %\
189 ("ID\t" if seqid else "", line)
190 if args.debug:
191 print(header, file=sys.stderr)
192 continue
193 elif method == 'Bepipred' and line.find('Residue') > 0:
194 header2 = "#%s%s" %\
195 ("ID\t" if seqid else "", line)
196 if args.debug:
197 print(header2, file=sys.stderr)
198 rslts = results2
199 continue
200 if seqid:
201 rslts.extend("%s\t%s" % (seqid, line))
202 else:
203 rslts.extend(line)
204 break
205 else:
206 code = response.getcode() if response else 1
207 warn_err("Error connecting to IEDB server\n",
208 exit_code=code)
209 except HTTPError as e:
210 code = None if retry < args.retries else e.code
211 warn_err("%d of %d Error connecting to IEDB server %s\n" %
212 (retry, retries, e),
213 exit_code=code)
214 time.sleep(args.sleep)
215 except Exception as e:
216 warn_err("Error connecting to IEDB server %s\n" % e,
217 exit_code=3)
218
219 if args.sequence:
220 for i, seq in enumerate(args.sequence):
221 query(url, seq, alleles, lengths, seqid=None, method=method)
222 if args.input:
223 try:
224 fh = open(args.input, 'r')
225 if args.column: # tabular
226 col = int(args.column)
227 idcol = int(args.id_column) if args.id_column else None
228 for i, line in enumerate(fh):
229 fields = line.split('\t')
230 if len(fields) > col:
231 seq = re.sub('[_*]', '', fields[col])
232 if re.match(aapat, seq):
233 if idcol is not None and idcol < len(fields):
234 seqid = fields[idcol]
235 else:
236 seqid = None
237 query(url, seq, alleles, lengths,
238 seqid=seqid, method=method)
239 else:
240 warn_err('Line %d, Not a peptide: %s\n' % (i, seq),
241 exit_code=None)
242 else: # fasta
243 seqid = None
244 seq = ''
245 for i, line in enumerate(fh):
246 if line.startswith('>'):
247 if seqid and len(seq) > 0:
248 query(url, seq, alleles, lengths,
249 seqid=seqid, method=method)
250 seqid = line[1:].strip()
251 seq = ''
252 else:
253 seq += line.strip()
254 if seqid and len(seq) > 0:
255 query(url, seq, alleles, lengths,
256 seqid=seqid, method=method)
257 fh.close()
258 except Exception as e:
259 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
260
261 if header:
262 outputFile.write(header)
263 for line in results:
264 outputFile.write(line)
265 if results2:
266 if args.output2:
267 try:
268 outPath = os.path.abspath(args.output2)
269 outFile = open(outPath, 'w')
270 except Exception as e:
271 warn_err("Unable to open output file: %s\n" % e, exit_code=1)
94 else: 272 else:
95 results.append(line) 273 outFile = sys.stdout
96 elif not response: 274 if header2:
97 warn_err("NO response from IEDB server\n", exit_code=3) 275 outFile.write(header2)
98 else: 276 for line in results2:
99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) 277 outFile.write(line)
100 278
101 if options.sequence: 279
102 for i,seq in enumerate(options.sequence): 280 if __name__ == "__main__":
103 query(url,seq,alleles,lengths,seqid=None,method=method) 281 __main__()
104 if options.input:
105 try:
106 fh = open(options.input,'r')
107 if options.column: ## tabular
108 col = int(options.column)
109 idcol = int(options.id_column) if options.id_column else None
110 for i,line in enumerate(fh):
111 fields = line.split('\t')
112 if len(fields) > col:
113 seq = re.sub('[_*]','',fields[col])
114 if re.match(aapat,seq):
115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None
116 query(url,seq,alleles,lengths,seqid=seqid,method=method)
117 else:
118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None)
119 else: ## fasta
120 seqid = None
121 seq = ''
122 for i,line in enumerate(fh):
123 if line.startswith('>'):
124 if seqid and len(seq) > 0:
125 query(url,seq,alleles,lengths,seqid=seqid,method=method)
126 seqid = line[1:].strip()
127 seq = ''
128 else:
129 seq += line.strip()
130 if seqid and len(seq) > 0:
131 query(url,seq,alleles,lengths,seqid=seqid,method=method)
132 fh.close()
133 except Exception, e:
134 warn_err("Unable to open input file: %s\n" % e, exit_code=1)
135
136 if header:
137 outputFile.write(header)
138 for line in results:
139 outputFile.write(line)
140
141 if __name__ == "__main__": __main__()
142