Mercurial > repos > jjohnson > iedb_api
comparison iedb_api.py @ 1:4a89ba6cfc63 draft
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 18698e056ccc2d6d37836bd22728e2d8765e92ec"
| author | jjohnson |
|---|---|
| date | Tue, 25 Feb 2020 17:37:34 -0500 |
| parents | 991424605492 |
| children | 153d5fa7af53 |
comparison
equal
deleted
inserted
replaced
| 0:991424605492 | 1:4a89ba6cfc63 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 """ | 2 |
| 3 """ | 3 import argparse |
| 4 import sys | |
| 5 import os.path | 4 import os.path |
| 6 import re | 5 import re |
| 7 import optparse | 6 import sys |
| 8 import urllib | 7 import time |
| 9 import urllib2 | 8 |
| 10 from optparse import OptionParser | 9 from urllib.error import HTTPError |
| 11 | 10 from urllib.parse import urlencode, unquote |
| 12 mhci_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket'] | 11 from urllib.request import urlopen |
| 13 mhcii_methods = ['recommended','consensus3','NetMHCIIpan','nn_align','smm_align','comblib','tepitope'] | 12 |
| 14 processing_methods = ['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008'] | 13 mhci_methods = ['recommended', 'consensus', |
| 15 mhcnp_methods = ['mhcnp'] | 14 'netmhcpan_ba', 'netmhcpan_el', |
| 16 bcell_methods = ['Bepipred','Chou-FasmanEmini','Karplus-Schulz','Kolaskar-Tongaonkar','Parker'] | 15 'ann', 'smmpmbec', 'smm', |
| 17 prediction_methods = {'mhci':mhci_methods,'mhcii':mhcii_methods,'processing':processing_methods,'mhcnp':mhcnp_methods,'bcell':bcell_methods} | 16 'comblib_sidney2008', 'netmhccons', |
| 18 | 17 'pickpocket', 'netmhcstabpan'] |
| 19 def warn_err(msg,exit_code=1): | 18 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', |
| 20 sys.stderr.write(msg) | 19 'nn_align', 'smm_align', 'comblib', 'tepitope'] |
| 21 if exit_code: | 20 processing_methods = ['recommended', 'netmhcpan', 'ann', |
| 22 sys.exit(exit_code) | 21 'smmpmbec', 'smm', 'comblib_sidney2008', |
| 22 'netmhccons', 'pickpocket'] | |
| 23 mhcnp_methods = ['mhcnp', 'netmhcpan'] | |
| 24 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', | |
| 25 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] | |
| 26 prediction_methods = {'mhci': mhci_methods, | |
| 27 'mhcii': mhcii_methods, | |
| 28 'processing': processing_methods, | |
| 29 'mhcnp': mhcnp_methods, | |
| 30 'bcell': bcell_methods} | |
| 31 all_methods = set(mhci_methods + mhcii_methods + | |
| 32 mhcnp_methods + bcell_methods) | |
| 33 prediction_lengths = {'mhci': range(8, 16), | |
| 34 'mhcii': range(11, 31), | |
| 35 'processing': range(8, 15), | |
| 36 'mhcnp': range(8, 12), | |
| 37 'bcell': range(8, 16)} | |
| 38 | |
| 39 | |
| 40 def warn_err(msg, exit_code=1): | |
| 41 sys.stderr.write(msg) | |
| 42 if exit_code: | |
| 43 sys.exit(exit_code) | |
| 23 | 44 |
| 24 | 45 |
| 25 def __main__(): | 46 def __main__(): |
| 26 #Parse Command Line | 47 # Parse Command Line |
| 27 parser = optparse.OptionParser() | 48 parser = argparse.ArgumentParser(description='', epilog='') |
| 28 parser.add_option( '-p', '--prediction', dest='prediction', default='mhci', choices=['mhci','mhcii','processing','mhcnp','bcell'], help='IEDB API prediction service' ) | 49 parser.add_argument('-p', '--prediction', |
| 29 parser.add_option( '-s', '--sequence', dest='sequence', action="append", default=None, help='Peptide Sequence' ) | 50 default='mhci', |
| 30 parser.add_option( '-m', '--method', dest='method', default='recommended', choices=['recommended','consensus','netmhcpan','ann','smmpmbec','smm','comblib_sidney2008','netmhccons','pickpocket' ], help='prediction method' ) | 51 choices=prediction_methods.keys(), |
| 31 parser.add_option( '-a', '--allele', dest='allele', action="append", default=[], help='Alleles for which to make predictions' ) | 52 help='IEDB API prediction service') |
| 32 parser.add_option( '-l', '--length', dest='length', action="append", default=[], choices=['8', '9', '10', '11', '12', '13', '14', '15'], help='lengths for which to make predictions, 1 per allele' ) | 53 parser.add_argument('-s', '--sequence', |
| 33 parser.add_option( '-i', '--input', dest='input', default=None, help='Input file for peptide sequences (fasta or tabular)' ) | 54 action="append", |
| 34 parser.add_option( '-c', '--column', dest='column', default=None, help='Peptide Column in a tabular input file' ) | 55 default=None, |
| 35 parser.add_option( '-C', '--id_column', dest='id_column', default=None, help='ID Column in a tabular input file' ) | 56 help='Peptide Sequence') |
| 36 parser.add_option( '-o', '--output', dest='output', default=None, help='Output file for query results' ) | 57 parser.add_argument('-m', '--method', |
| 37 parser.add_option( '-d', '--debug', dest='debug', action='store_true', default=False, help='Turn on wrapper debugging to stderr' ) | 58 default='recommended', |
| 38 (options, args) = parser.parse_args() | 59 choices=all_methods, |
| 39 | 60 help='prediction method') |
| 40 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | 61 parser.add_argument('-P', '--proteasome', |
| 41 | 62 default=None, |
| 42 if not options.allele and options.prediction != 'bcell': | 63 choices=['immuno', 'constitutive'], |
| 43 warn_err('-a allele required\n', exit_code=1) | 64 help='IEDB processing proteasome type') |
| 44 | 65 parser.add_argument('-a', '--allele', |
| 45 if not (options.sequence or options.input): | 66 action="append", |
| 46 warn_err('NO Sequences given: either -s sequence or -i input_file is required\n', exit_code=1) | 67 default=[], |
| 47 | 68 help='Alleles for which to make predictions') |
| 48 if options.output != None: | 69 parser.add_argument('-l', '--length', |
| 49 try: | 70 action="append", |
| 50 outputPath = os.path.abspath(options.output) | 71 default=[], |
| 51 outputFile = open(outputPath, 'w') | 72 help='lengths for which to make predictions, ' + |
| 52 except Exception, e: | 73 '1 per allele') |
| 53 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | 74 parser.add_argument('-w', '--window_size', |
| 54 else: | 75 type=int, |
| 55 outputFile = sys.stdout | 76 default=None, |
| 56 | 77 help='window_size for bcell prediction') |
| 57 url = 'http://tools-api.iedb.org/tools_api/%s/' % options.prediction | 78 parser.add_argument('-i', '--input', |
| 58 | 79 default=None, |
| 59 #TODO parse alleles from the options.alleles file | 80 help='Input file for peptide sequences ' + |
| 60 alleles = ','.join(options.allele) | 81 '(fasta or tabular)') |
| 61 lengths = ','.join(options.length) | 82 parser.add_argument('-c', '--column', |
| 62 method = options.method | 83 default=None, |
| 63 | 84 help='Peptide Column in a tabular input file') |
| 64 results = [] | 85 parser.add_argument('-C', '--id_column', |
| 65 global header | 86 default=None, |
| 66 header = None | 87 help='ID Column in a tabular input file') |
| 67 | 88 parser.add_argument('-o', '--output', |
| 68 sequence_text = [] | 89 default=None, |
| 69 def add_seq(seqid,seq): | 90 help='Output file for query results') |
| 70 sequence_text.append(">%s\n%s" % (seqid if seqid else "peptide%d" % len(sequence_text),seq)) | 91 parser.add_argument('-O', '--output2', |
| 71 | 92 default='iedb_results2', |
| 72 def query(url,seq,allele,length,seqid=None,method='recommended'): | 93 help='Output file for secondary query results') |
| 94 parser.add_argument('-t', '--timeout', | |
| 95 type=int, | |
| 96 default=600, | |
| 97 help='Seconds to wait for server response') | |
| 98 parser.add_argument('-r', '--retries', | |
| 99 type=int, | |
| 100 default=5, | |
| 101 help='Number of times to retry server query') | |
| 102 parser.add_argument('-S', '--sleep', | |
| 103 type=int, | |
| 104 default=300, | |
| 105 help='Seconds to wait between retries') | |
| 106 parser.add_argument('-d', '--debug', | |
| 107 action='store_true', | |
| 108 default=False, | |
| 109 help='Turn on wrapper debugging to stderr') | |
| 110 args = parser.parse_args() | |
| 111 | |
| 112 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | |
| 113 | |
| 114 if not args.allele and args.prediction != 'bcell': | |
| 115 warn_err('-a allele required\n', exit_code=1) | |
| 116 | |
| 117 if not (args.sequence or args.input): | |
| 118 warn_err('NO Sequences given: ' + | |
| 119 'either -s sequence or -i input_file is required\n', | |
| 120 exit_code=1) | |
| 121 | |
| 122 if args.output is not None: | |
| 123 try: | |
| 124 outputPath = os.path.abspath(args.output) | |
| 125 outputFile = open(outputPath, 'w') | |
| 126 except Exception as e: | |
| 127 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
| 128 else: | |
| 129 outputFile = sys.stdout | |
| 130 | |
| 131 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | |
| 132 args.prediction | |
| 133 len_param = 'length' if args.prediction != 'bcell' else 'window_size' | |
| 134 | |
| 135 # TODO parse alleles from the args.alleles file | |
| 136 alleles = ','.join(args.allele) if args.prediction != 'bcell' else None | |
| 137 lengths = ','.join(args.length) | |
| 138 if args.prediction == 'bcell': | |
| 139 lengths = args.window_size | |
| 140 method = args.method | |
| 141 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
| 73 global header | 142 global header |
| 74 params = dict() | 143 header = None |
| 75 if method: | 144 results = [] |
| 76 params['method'] = method | 145 global header2 |
| 77 params['sequence_text'] = seq | 146 header2 = None |
| 78 params['allele'] = allele | 147 results2 = [] |
| 79 params['length'] = length | 148 |
| 80 data = urllib.urlencode(params) | 149 sequence_text = [] |
| 81 request = urllib2.Request(url, data) | 150 |
| 82 if options.debug: | 151 def add_seq(seqid, seq): |
| 83 print >> sys.stderr, "url %s %s %s" % (request.get_full_url(), seqid if seqid else "None", seq) | 152 sid = seqid if seqid else "peptide%d" % len(sequence_text) |
| 84 response = None | 153 sequence_text.append(">%s\n%s" % (sid, seq)) |
| 85 response = urllib2.urlopen(request) | 154 |
| 86 if response and response.getcode() == 200: | 155 def query(url, seq, allele, length, seqid=None, method='recommended'): |
| 87 resp_data = response.readlines() | 156 global header |
| 88 for line in resp_data: | 157 global header2 |
| 89 if line.find('eptide') > 0: | 158 params = dict() |
| 90 header = "#%s%s" % ("ID\t" if seqid else "", line) | 159 if method: |
| 91 continue | 160 params['method'] = method.encode() |
| 92 if seqid: | 161 if proteasome: |
| 93 results.append("%s\t%s" % (seqid,line)) | 162 params['proteasome'] = proteasome.encode() |
| 163 params['sequence_text'] = seq.encode() | |
| 164 if allele is not None: | |
| 165 params['allele'] = allele.encode() | |
| 166 if length is not None: | |
| 167 params[len_param] = str(length).encode() | |
| 168 req_data = urlencode(params) | |
| 169 if args.debug: | |
| 170 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
| 171 retries = max(0, args.retries) + 1 | |
| 172 for retry in range(1, retries): | |
| 173 response = None | |
| 174 try: | |
| 175 response = urlopen(url, data=req_data.encode('utf-8'), | |
| 176 timeout=args.timeout) | |
| 177 if response and response.getcode() == 200: | |
| 178 data = [line.decode() for line in response.readlines()] | |
| 179 if args.debug: | |
| 180 print(data, file=sys.stderr) | |
| 181 rslts = results | |
| 182 for ln, line in enumerate(data): | |
| 183 if line.lower().find('invalid') >= 0: | |
| 184 msg = '%s %s\n%s' % (url, unquote(req_data), | |
| 185 ''.join(data)) | |
| 186 warn_err(msg, exit_code=1) | |
| 187 if line.find('eptide') > 0: | |
| 188 header = "#%s%s" %\ | |
| 189 ("ID\t" if seqid else "", line) | |
| 190 if args.debug: | |
| 191 print(header, file=sys.stderr) | |
| 192 continue | |
| 193 elif method == 'Bepipred' and line.find('Residue') > 0: | |
| 194 header2 = "#%s%s" %\ | |
| 195 ("ID\t" if seqid else "", line) | |
| 196 if args.debug: | |
| 197 print(header2, file=sys.stderr) | |
| 198 rslts = results2 | |
| 199 continue | |
| 200 if seqid: | |
| 201 rslts.extend("%s\t%s" % (seqid, line)) | |
| 202 else: | |
| 203 rslts.extend(line) | |
| 204 break | |
| 205 else: | |
| 206 code = response.getcode() if response else 1 | |
| 207 warn_err("Error connecting to IEDB server\n", | |
| 208 exit_code=code) | |
| 209 except HTTPError as e: | |
| 210 code = None if retry < args.retries else e.code | |
| 211 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
| 212 (retry, retries, e), | |
| 213 exit_code=code) | |
| 214 time.sleep(args.sleep) | |
| 215 except Exception as e: | |
| 216 warn_err("Error connecting to IEDB server %s\n" % e, | |
| 217 exit_code=3) | |
| 218 | |
| 219 if args.sequence: | |
| 220 for i, seq in enumerate(args.sequence): | |
| 221 query(url, seq, alleles, lengths, seqid=None, method=method) | |
| 222 if args.input: | |
| 223 try: | |
| 224 fh = open(args.input, 'r') | |
| 225 if args.column: # tabular | |
| 226 col = int(args.column) | |
| 227 idcol = int(args.id_column) if args.id_column else None | |
| 228 for i, line in enumerate(fh): | |
| 229 fields = line.split('\t') | |
| 230 if len(fields) > col: | |
| 231 seq = re.sub('[_*]', '', fields[col]) | |
| 232 if re.match(aapat, seq): | |
| 233 if idcol is not None and idcol < len(fields): | |
| 234 seqid = fields[idcol] | |
| 235 else: | |
| 236 seqid = None | |
| 237 query(url, seq, alleles, lengths, | |
| 238 seqid=seqid, method=method) | |
| 239 else: | |
| 240 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | |
| 241 exit_code=None) | |
| 242 else: # fasta | |
| 243 seqid = None | |
| 244 seq = '' | |
| 245 for i, line in enumerate(fh): | |
| 246 if line.startswith('>'): | |
| 247 if seqid and len(seq) > 0: | |
| 248 query(url, seq, alleles, lengths, | |
| 249 seqid=seqid, method=method) | |
| 250 seqid = line[1:].strip() | |
| 251 seq = '' | |
| 252 else: | |
| 253 seq += line.strip() | |
| 254 if seqid and len(seq) > 0: | |
| 255 query(url, seq, alleles, lengths, | |
| 256 seqid=seqid, method=method) | |
| 257 fh.close() | |
| 258 except Exception as e: | |
| 259 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
| 260 | |
| 261 if header: | |
| 262 outputFile.write(header) | |
| 263 for line in results: | |
| 264 outputFile.write(line) | |
| 265 if results2: | |
| 266 if args.output2: | |
| 267 try: | |
| 268 outPath = os.path.abspath(args.output2) | |
| 269 outFile = open(outPath, 'w') | |
| 270 except Exception as e: | |
| 271 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
| 94 else: | 272 else: |
| 95 results.append(line) | 273 outFile = sys.stdout |
| 96 elif not response: | 274 if header2: |
| 97 warn_err("NO response from IEDB server\n", exit_code=3) | 275 outFile.write(header2) |
| 98 else: | 276 for line in results2: |
| 99 warn_err("Error connecting to IEDB server\n", exit_code=response.getcode()) | 277 outFile.write(line) |
| 100 | 278 |
| 101 if options.sequence: | 279 |
| 102 for i,seq in enumerate(options.sequence): | 280 if __name__ == "__main__": |
| 103 query(url,seq,alleles,lengths,seqid=None,method=method) | 281 __main__() |
| 104 if options.input: | |
| 105 try: | |
| 106 fh = open(options.input,'r') | |
| 107 if options.column: ## tabular | |
| 108 col = int(options.column) | |
| 109 idcol = int(options.id_column) if options.id_column else None | |
| 110 for i,line in enumerate(fh): | |
| 111 fields = line.split('\t') | |
| 112 if len(fields) > col: | |
| 113 seq = re.sub('[_*]','',fields[col]) | |
| 114 if re.match(aapat,seq): | |
| 115 seqid = fields[idcol] if idcol != None and idcol < len(fields) else None | |
| 116 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 117 else: | |
| 118 warn_err('Line %d, Not a peptide: %s\n' % (i,seq),exit_code=None) | |
| 119 else: ## fasta | |
| 120 seqid = None | |
| 121 seq = '' | |
| 122 for i,line in enumerate(fh): | |
| 123 if line.startswith('>'): | |
| 124 if seqid and len(seq) > 0: | |
| 125 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 126 seqid = line[1:].strip() | |
| 127 seq = '' | |
| 128 else: | |
| 129 seq += line.strip() | |
| 130 if seqid and len(seq) > 0: | |
| 131 query(url,seq,alleles,lengths,seqid=seqid,method=method) | |
| 132 fh.close() | |
| 133 except Exception, e: | |
| 134 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
| 135 | |
| 136 if header: | |
| 137 outputFile.write(header) | |
| 138 for line in results: | |
| 139 outputFile.write(line) | |
| 140 | |
| 141 if __name__ == "__main__": __main__() | |
| 142 |
