Mercurial > repos > iuc > iedb_api
comparison iedb_api.py @ 0:fe3c43451319 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1"
| author | iuc |
|---|---|
| date | Fri, 28 Feb 2020 18:09:34 -0500 |
| parents | |
| children | 6cf84410cb2e |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:fe3c43451319 |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import os.path | |
| 5 import re | |
| 6 import sys | |
| 7 import time | |
| 8 from urllib.error import HTTPError | |
| 9 from urllib.parse import unquote, urlencode | |
| 10 from urllib.request import urlopen | |
| 11 | |
| 12 mhci_methods = ['recommended', 'consensus', | |
| 13 'netmhcpan_ba', 'netmhcpan_el', | |
| 14 'ann', 'smmpmbec', 'smm', | |
| 15 'comblib_sidney2008', 'netmhccons', | |
| 16 'pickpocket', 'netmhcstabpan'] | |
| 17 mhcii_methods = ['recommended', 'consensus', 'NetMHCIIpan', | |
| 18 'nn_align', 'smm_align', 'comblib', 'tepitope'] | |
| 19 processing_methods = ['recommended', 'netmhcpan', 'ann', | |
| 20 'smmpmbec', 'smm', 'comblib_sidney2008', | |
| 21 'netmhccons', 'pickpocket'] | |
| 22 mhcnp_methods = ['mhcnp', 'netmhcpan'] | |
| 23 bcell_methods = ['Bepipred', 'Chou-Fasman', 'Emini', 'Karplus-Schulz', | |
| 24 'Kolaskar-Tongaonkar', 'Parker', 'Bepipred-2.0'] | |
| 25 prediction_methods = {'mhci': mhci_methods, | |
| 26 'mhcii': mhcii_methods, | |
| 27 'processing': processing_methods, | |
| 28 'mhcnp': mhcnp_methods, | |
| 29 'bcell': bcell_methods} | |
| 30 all_methods = set(mhci_methods + mhcii_methods + | |
| 31 mhcnp_methods + bcell_methods) | |
| 32 prediction_lengths = {'mhci': range(8, 16), | |
| 33 'mhcii': range(11, 31), | |
| 34 'processing': range(8, 15), | |
| 35 'mhcnp': range(8, 12), | |
| 36 'bcell': range(8, 16)} | |
| 37 | |
| 38 | |
| 39 def parse_alleles(allelefile, lengths): | |
| 40 alleles = [] | |
| 41 lengths = [] | |
| 42 with open(allelefile, 'r') as fh: | |
| 43 for i, line in enumerate(fh): | |
| 44 fields = line.strip().split(',') | |
| 45 allele = fields[0].strip() | |
| 46 if allele: | |
| 47 if len(fields) > 1: | |
| 48 for alen in fields[1:]: | |
| 49 alleles.append(allele) | |
| 50 lengths.append(alen) | |
| 51 elif lengths: | |
| 52 for alen in str(lengths).split(','): | |
| 53 alleles.append(allele) | |
| 54 lengths.append(alen) | |
| 55 else: | |
| 56 alleles.append(allele) | |
| 57 return (alleles, lengths) | |
| 58 | |
| 59 | |
| 60 def query(url, prediction, seq, allele, length, results, | |
| 61 seqid=None, method='recommended', proteasome=None, | |
| 62 timeout=300, retries=3, sleep=300, debug=False): | |
| 63 params = dict() | |
| 64 if method: | |
| 65 params['method'] = method.encode() | |
| 66 if proteasome: | |
| 67 params['proteasome'] = proteasome.encode() | |
| 68 params['sequence_text'] = seq.strip().encode() | |
| 69 if allele is not None: | |
| 70 params['allele'] = allele.encode() | |
| 71 if length is not None: | |
| 72 if prediction == 'bcell': | |
| 73 params['window_size'] = str(length).encode() | |
| 74 else: | |
| 75 params['length'] = str(length).encode() | |
| 76 req_data = urlencode(params) | |
| 77 if debug: | |
| 78 print('url %s %s' % (url, unquote(req_data)), file=sys.stderr) | |
| 79 retries = max(0, retries) + 1 | |
| 80 for retry in range(1, retries): | |
| 81 response = None | |
| 82 try: | |
| 83 response = urlopen(url, data=req_data.encode('utf-8'), | |
| 84 timeout=timeout) | |
| 85 if response and response.getcode() == 200: | |
| 86 data = [line.decode() for line in response.readlines()] | |
| 87 if debug: | |
| 88 print(data, file=sys.stderr) | |
| 89 rslts = results['prediction']['entries'] | |
| 90 for ln, line in enumerate(data): | |
| 91 if 'invalid' in line.lower() or 'tools_api.html' in line: | |
| 92 msg = '%s %s\n%s' % (url, unquote(req_data), | |
| 93 ''.join(data)) | |
| 94 warn_err(msg, exit_code=1) | |
| 95 if line.find('eptide') > 0: | |
| 96 results['prediction']['header'] = "#%s%s" %\ | |
| 97 ("ID\t" if seqid else "", line) | |
| 98 continue | |
| 99 elif method == 'Bepipred' and line.find('Residue') > 0: | |
| 100 results['detail']['header'] = "#%s%s" %\ | |
| 101 ("ID\t" if seqid else "", line) | |
| 102 rslts = results['detail']['entries'] | |
| 103 continue | |
| 104 if seqid: | |
| 105 rslts.extend("%s\t%s" % (seqid, line)) | |
| 106 else: | |
| 107 rslts.extend(line) | |
| 108 break | |
| 109 else: | |
| 110 code = response.getcode() if response else 1 | |
| 111 warn_err("Error connecting to IEDB server\n", | |
| 112 exit_code=code) | |
| 113 except HTTPError as e: | |
| 114 code = None if retry < retries else e.code | |
| 115 warn_err("%d of %d Error connecting to IEDB server %s\n" % | |
| 116 (retry, retries, e), | |
| 117 exit_code=code) | |
| 118 time.sleep(sleep) | |
| 119 except Exception as e: | |
| 120 warn_err("Error connecting to IEDB server %s\n" % e, | |
| 121 exit_code=3) | |
| 122 return results | |
| 123 | |
| 124 | |
| 125 def warn_err(msg, exit_code=1): | |
| 126 sys.stderr.write(msg) | |
| 127 sys.stderr.flush() | |
| 128 if exit_code: | |
| 129 sys.exit(exit_code) | |
| 130 | |
| 131 | |
| 132 def __main__(): | |
| 133 # Parse Command Line | |
| 134 parser = argparse.ArgumentParser(description='', epilog='') | |
| 135 parser.add_argument('-p', '--prediction', | |
| 136 default='mhci', | |
| 137 choices=prediction_methods.keys(), | |
| 138 help='IEDB API prediction service') | |
| 139 parser.add_argument('-s', '--sequence', | |
| 140 action="append", | |
| 141 default=None, | |
| 142 help='Peptide Sequence') | |
| 143 parser.add_argument('-m', '--method', | |
| 144 default='recommended', | |
| 145 choices=all_methods, | |
| 146 help='prediction method') | |
| 147 parser.add_argument('-P', '--proteasome', | |
| 148 default=None, | |
| 149 choices=['immuno', 'constitutive'], | |
| 150 help='IEDB processing proteasome type') | |
| 151 parser.add_argument('-a', '--allele', | |
| 152 action="append", | |
| 153 default=[], | |
| 154 help='Alleles for which to make predictions') | |
| 155 parser.add_argument('-A', '--allelefile', | |
| 156 default=None, | |
| 157 help='File of HLA alleles') | |
| 158 parser.add_argument('-l', '--length', | |
| 159 action="append", | |
| 160 default=[], | |
| 161 help='lengths for which to make predictions, ' + | |
| 162 '1 per allele') | |
| 163 parser.add_argument('-w', '--window_size', | |
| 164 type=int, | |
| 165 default=None, | |
| 166 help='window_size for bcell prediction') | |
| 167 parser.add_argument('-i', '--input', | |
| 168 default=None, | |
| 169 help='Input file for peptide sequences ' + | |
| 170 '(fasta or tabular)') | |
| 171 parser.add_argument('-c', '--column', | |
| 172 default=None, | |
| 173 help='Peptide Column in a tabular input file') | |
| 174 parser.add_argument('-C', '--id_column', | |
| 175 default=None, | |
| 176 help='ID Column in a tabular input file') | |
| 177 parser.add_argument('-o', '--output', | |
| 178 default=None, | |
| 179 help='Output file for query results') | |
| 180 parser.add_argument('-O', '--output2', | |
| 181 default='iedb_results2', | |
| 182 help='Output file for secondary query results') | |
| 183 parser.add_argument('-t', '--timeout', | |
| 184 type=int, | |
| 185 default=600, | |
| 186 help='Seconds to wait for server response') | |
| 187 parser.add_argument('-r', '--retries', | |
| 188 type=int, | |
| 189 default=5, | |
| 190 help='Number of times to retry server query') | |
| 191 parser.add_argument('-S', '--sleep', | |
| 192 type=int, | |
| 193 default=300, | |
| 194 help='Seconds to wait between retries') | |
| 195 parser.add_argument('-d', '--debug', | |
| 196 action='store_true', | |
| 197 default=False, | |
| 198 help='Turn on wrapper debugging to stderr') | |
| 199 args = parser.parse_args() | |
| 200 | |
| 201 aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$' | |
| 202 | |
| 203 if args.prediction != 'bcell': | |
| 204 if not args.allele and not args.allelefile: | |
| 205 warn_err('-a allele or -A allelefile required\n', exit_code=1) | |
| 206 | |
| 207 if not (args.sequence or args.input): | |
| 208 warn_err('NO Sequences given: ' + | |
| 209 'either -s sequence or -i input_file is required\n', | |
| 210 exit_code=1) | |
| 211 | |
| 212 if args.output is not None: | |
| 213 try: | |
| 214 outputPath = os.path.abspath(args.output) | |
| 215 outputFile = open(outputPath, 'w') | |
| 216 except Exception as e: | |
| 217 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
| 218 else: | |
| 219 outputFile = sys.stdout | |
| 220 | |
| 221 # params | |
| 222 alleles = [] | |
| 223 lengths = [] | |
| 224 if args.prediction == 'bcell' and args.window_size is not None: | |
| 225 lengths.append(str(args.window_size)) | |
| 226 else: | |
| 227 if args.allelefile: | |
| 228 (alleles, lengths) = parse_alleles(args.allelefile, args.length) | |
| 229 if args.allele: | |
| 230 for i, allele in enumerate(args.allele): | |
| 231 alleles.append(allele) | |
| 232 alen = args.length[i] if i < len(args.length) else args.length[-1] | |
| 233 lengths.append(alen) | |
| 234 allele = ','.join(alleles) if alleles else None | |
| 235 length = ','.join(lengths) if lengths else None | |
| 236 method = args.method | |
| 237 proteasome = args.proteasome if args.prediction == 'processcing' else None | |
| 238 url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\ | |
| 239 args.prediction | |
| 240 | |
| 241 # results | |
| 242 results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}} | |
| 243 | |
| 244 if args.sequence: | |
| 245 for i, seq in enumerate(args.sequence): | |
| 246 seqid = 'pep_%d' % i | |
| 247 query(url, args.prediction, seq, allele, length, results, | |
| 248 seqid=seqid, method=method, proteasome=proteasome, | |
| 249 timeout=args.timeout, retries=args.retries, | |
| 250 sleep=args.sleep, debug=args.debug) | |
| 251 if args.input: | |
| 252 try: | |
| 253 fh = open(args.input, 'r') | |
| 254 if args.column: # tabular | |
| 255 col = int(args.column) | |
| 256 idcol = int(args.id_column) if args.id_column else None | |
| 257 for i, line in enumerate(fh): | |
| 258 fields = line.rstrip('\r\n').split('\t') | |
| 259 if len(fields) > col: | |
| 260 seq = re.sub('[_*]', '', fields[col].strip()) | |
| 261 if re.match(aapat, seq): | |
| 262 if idcol is not None and idcol < len(fields): | |
| 263 seqid = fields[idcol] | |
| 264 else: | |
| 265 seqid = 'pep_%d' % i | |
| 266 query(url, args.prediction, seq, allele, length, | |
| 267 results, seqid=seqid, | |
| 268 method=method, proteasome=proteasome, | |
| 269 timeout=args.timeout, retries=args.retries, | |
| 270 sleep=args.sleep, debug=args.debug) | |
| 271 else: | |
| 272 warn_err('Line %d, Not a peptide: %s\n' % (i, seq), | |
| 273 exit_code=None) | |
| 274 else: # fasta | |
| 275 seqid = None | |
| 276 seq = '' | |
| 277 for i, line in enumerate(fh): | |
| 278 if line.startswith('>'): | |
| 279 if seqid and len(seq) > 0: | |
| 280 query(url, args.prediction, seq, allele, length, | |
| 281 results, seqid=seqid, | |
| 282 method=method, proteasome=proteasome, | |
| 283 timeout=args.timeout, retries=args.retries, | |
| 284 sleep=args.sleep, debug=args.debug) | |
| 285 seqid = line[1:].strip() | |
| 286 seq = '' | |
| 287 else: | |
| 288 seq += line.strip() | |
| 289 if seqid and len(seq) > 0: | |
| 290 query(url, args.prediction, seq, allele, length, | |
| 291 results, seqid=seqid, | |
| 292 method=method, proteasome=proteasome, | |
| 293 timeout=args.timeout, retries=args.retries, | |
| 294 sleep=args.sleep, debug=args.debug) | |
| 295 fh.close() | |
| 296 except Exception as e: | |
| 297 warn_err("Unable to open input file: %s\n" % e, exit_code=1) | |
| 298 | |
| 299 if results['prediction']['header']: | |
| 300 outputFile.write(results['prediction']['header']) | |
| 301 for line in results['prediction']['entries']: | |
| 302 outputFile.write(line) | |
| 303 if results['detail']['entries']: | |
| 304 if args.output2: | |
| 305 try: | |
| 306 outPath = os.path.abspath(args.output2) | |
| 307 outFile = open(outPath, 'w') | |
| 308 except Exception as e: | |
| 309 warn_err("Unable to open output file: %s\n" % e, exit_code=1) | |
| 310 else: | |
| 311 outFile = sys.stdout | |
| 312 if results['detail']['header']: | |
| 313 outFile.write(results['detail']['header']) | |
| 314 for line in results['detail']['entries']: | |
| 315 outFile.write(line) | |
| 316 | |
| 317 | |
| 318 if __name__ == "__main__": | |
| 319 __main__() |
