# HG changeset patch
# User jjohnson
# Date 1582922714 18000
# Node ID a14128950578cc9c162f4d3b5cb751d4541bc577
# Parent 153d5fa7af53fc9fde6a889efc8ca1887bd1a85a
"planemo upload for repository https://github.com/jj-umn/galaxytools/tree/master/iedb_api commit 98a9dd3bd9c567e8b8e43ac5b54c4ba75a6fe78d"
diff -r 153d5fa7af53 -r a14128950578 iedb_api.py
--- a/iedb_api.py Wed Feb 26 16:27:06 2020 -0500
+++ b/iedb_api.py Fri Feb 28 15:45:14 2020 -0500
@@ -34,10 +34,102 @@
'processing': range(8, 15),
'mhcnp': range(8, 12),
'bcell': range(8, 16)}
+prediction_species = {'mhci': [],
+ 'mhcii': range(11, 31),
+ 'processing': range(8, 15),
+ 'mhcnp': range(8, 12),
+ 'bcell': range(8, 16)}
+
+
+def parse_alleles(allelefile, lengths):
+ alleles = []
+ lengths = []
+ with open(allelefile, 'r') as fh:
+ for i, line in enumerate(fh):
+ fields = line.strip().split(',')
+ allele = fields[0].strip()
+ if allele:
+ if len(fields) > 1:
+ for alen in fields[1:]:
+ alleles.append(allele)
+ lengths.append(alen)
+ elif lengths:
+ for alen in str(lengths).split(','):
+ alleles.append(allele)
+ lengths.append(alen)
+ else:
+ alleles.append(allele)
+ return (alleles, lengths)
+
+
+def query(url, prediction, seq, allele, length, results,
+ seqid=None, method='recommended', proteasome=None,
+ timeout=300, retries=3, sleep=300, debug=False):
+ params = dict()
+ if method:
+ params['method'] = method.encode()
+ if proteasome:
+ params['proteasome'] = proteasome.encode()
+ params['sequence_text'] = seq.strip().encode()
+ if allele is not None:
+ params['allele'] = allele.encode()
+ if length is not None:
+ if prediction == 'bcell':
+ params['window_size'] = str(length).encode()
+ else:
+ params['length'] = str(length).encode()
+ req_data = urlencode(params)
+ if debug:
+ print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
+ retries = max(0, retries) + 1
+ for retry in range(1, retries):
+ response = None
+ try:
+ response = urlopen(url, data=req_data.encode('utf-8'),
+ timeout=timeout)
+ if response and response.getcode() == 200:
+ data = [line.decode() for line in response.readlines()]
+ if debug:
+ print(data, file=sys.stderr)
+ rslts = results['prediction']['entries']
+ for ln, line in enumerate(data):
+ if 'invalid' in line.lower() or 'tools_api.html' in line:
+ msg = '%s %s\n%s' % (url, unquote(req_data),
+ ''.join(data))
+ warn_err(msg, exit_code=1)
+ if line.find('eptide') > 0:
+ results['prediction']['header'] = "#%s%s" %\
+ ("ID\t" if seqid else "", line)
+ continue
+ elif method == 'Bepipred' and line.find('Residue') > 0:
+ results['detail']['header'] = "#%s%s" %\
+ ("ID\t" if seqid else "", line)
+ rslts = results['detail']['entries']
+ continue
+ if seqid:
+ rslts.extend("%s\t%s" % (seqid, line))
+ else:
+ rslts.extend(line)
+ break
+ else:
+ code = response.getcode() if response else 1
+ warn_err("Error connecting to IEDB server\n",
+ exit_code=code)
+ except HTTPError as e:
+ code = None if retry < retries else e.code
+ warn_err("%d of %d Error connecting to IEDB server %s\n" %
+ (retry, retries, e),
+ exit_code=code)
+ time.sleep(sleep)
+ except Exception as e:
+ warn_err("Error connecting to IEDB server %s\n" % e,
+ exit_code=3)
+ return results
def warn_err(msg, exit_code=1):
sys.stderr.write(msg)
+ sys.stderr.flush()
if exit_code:
sys.exit(exit_code)
@@ -65,6 +157,9 @@
action="append",
default=[],
help='Alleles for which to make predictions')
+ parser.add_argument('-A', '--allelefile',
+ default=None,
+ help='File of HLA alleles')
parser.add_argument('-l', '--length',
action="append",
default=[],
@@ -110,8 +205,9 @@
aapat = '^[ABCDEFGHIKLMNPQRSTVWY]+$'
- if not args.allele and args.prediction != 'bcell':
- warn_err('-a allele required\n', exit_code=1)
+ if args.prediction != 'bcell':
+ if not args.allele and not args.allelefile:
+ warn_err('-a allele or -A allelefile required\n', exit_code=1)
if not (args.sequence or args.input):
warn_err('NO Sequences given: ' +
@@ -127,97 +223,35 @@
else:
outputFile = sys.stdout
- url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
- args.prediction
- len_param = 'length' if args.prediction != 'bcell' else 'window_size'
-
+ alleles = []
+ lengths = []
# TODO parse alleles from the args.alleles file
- alleles = ','.join(args.allele) if args.prediction != 'bcell' else None
- lengths = ','.join(args.length)
- if args.prediction == 'bcell':
- lengths = args.window_size
+ if args.prediction == 'bcell' and args.window_size is not None:
+ lengths.append(str(args.window_size))
+ else:
+ if args.allelefile:
+ (alleles, lengths) = parse_alleles(args.allelefile, args.length)
+ if args.allele:
+ for i, allele in enumerate(args.allele):
+ alleles.append(allele)
+ alen = args.length[i] if i < len(args.length) else args.length[-1]
+ lengths.append(alen)
+ allele = ','.join(alleles) if alleles else None
+ length = ','.join(lengths) if lengths else None
method = args.method
proteasome = args.proteasome if args.prediction == 'processcing' else None
- global header
- header = None
- results = []
- global header2
- header2 = None
- results2 = []
-
- sequence_text = []
-
- def add_seq(seqid, seq):
- sid = seqid if seqid else "peptide%d" % len(sequence_text)
- sequence_text.append(">%s\n%s" % (sid, seq))
-
- def query(url, seq, allele, length, seqid=None, method='recommended'):
- global header
- global header2
- params = dict()
- if method:
- params['method'] = method.encode()
- if proteasome:
- params['proteasome'] = proteasome.encode()
- params['sequence_text'] = seq.encode()
- if allele is not None:
- params['allele'] = allele.encode()
- if length is not None:
- params[len_param] = str(length).encode()
- req_data = urlencode(params)
- if args.debug:
- print('url %s %s' % (url, unquote(req_data)), file=sys.stderr)
- retries = max(0, args.retries) + 1
- for retry in range(1, retries):
- response = None
- try:
- response = urlopen(url, data=req_data.encode('utf-8'),
- timeout=args.timeout)
- if response and response.getcode() == 200:
- data = [line.decode() for line in response.readlines()]
- if args.debug:
- print(data, file=sys.stderr)
- rslts = results
- for ln, line in enumerate(data):
- if line.lower().find('invalid') >= 0:
- msg = '%s %s\n%s' % (url, unquote(req_data),
- ''.join(data))
- warn_err(msg, exit_code=1)
- if line.find('eptide') > 0:
- header = "#%s%s" %\
- ("ID\t" if seqid else "", line)
- if args.debug:
- print(header, file=sys.stderr)
- continue
- elif method == 'Bepipred' and line.find('Residue') > 0:
- header2 = "#%s%s" %\
- ("ID\t" if seqid else "", line)
- if args.debug:
- print(header2, file=sys.stderr)
- rslts = results2
- continue
- if seqid:
- rslts.extend("%s\t%s" % (seqid, line))
- else:
- rslts.extend(line)
- break
- else:
- code = response.getcode() if response else 1
- warn_err("Error connecting to IEDB server\n",
- exit_code=code)
- except HTTPError as e:
- code = None if retry < args.retries else e.code
- warn_err("%d of %d Error connecting to IEDB server %s\n" %
- (retry, retries, e),
- exit_code=code)
- time.sleep(args.sleep)
- except Exception as e:
- warn_err("Error connecting to IEDB server %s\n" % e,
- exit_code=3)
+ url = 'http://tools-cluster-interface.iedb.org/tools_api/%s/' %\
+ args.prediction
+ # results
+ results = {'prediction': {'header': None, 'entries': []}, 'detail': {'header': None, 'entries': []}}
if args.sequence:
for i, seq in enumerate(args.sequence):
- query(url, seq, alleles, lengths, seqid=None, method=method)
+ seqid = 'pep_%d' % i
+ query(url, args.prediction, seq, allele, length, results,
+ seqid=seqid, method=method, proteasome=proteasome,
+ timeout=args.timeout, retries=args.retries,
+ sleep=args.sleep, debug=args.debug)
if args.input:
try:
fh = open(args.input, 'r')
@@ -225,16 +259,19 @@
col = int(args.column)
idcol = int(args.id_column) if args.id_column else None
for i, line in enumerate(fh):
- fields = line.split('\t')
+ fields = line.rstrip('\r\n').split('\t')
if len(fields) > col:
- seq = re.sub('[_*]', '', fields[col])
+ seq = re.sub('[_*]', '', fields[col].strip())
if re.match(aapat, seq):
if idcol is not None and idcol < len(fields):
seqid = fields[idcol]
else:
- seqid = None
- query(url, seq, alleles, lengths,
- seqid=seqid, method=method)
+ seqid = 'pep_%d' % i
+ query(url, args.prediction, seq, allele, length,
+ results, seqid=seqid,
+ method=method, proteasome=proteasome,
+ timeout=args.timeout, retries=args.retries,
+ sleep=args.sleep, debug=args.debug)
else:
warn_err('Line %d, Not a peptide: %s\n' % (i, seq),
exit_code=None)
@@ -244,24 +281,30 @@
for i, line in enumerate(fh):
if line.startswith('>'):
if seqid and len(seq) > 0:
- query(url, seq, alleles, lengths,
- seqid=seqid, method=method)
+ query(url, args.prediction, seq, allele, length,
+ results, seqid=seqid,
+ method=method, proteasome=proteasome,
+ timeout=args.timeout, retries=args.retries,
+ sleep=args.sleep, debug=args.debug)
seqid = line[1:].strip()
seq = ''
else:
seq += line.strip()
if seqid and len(seq) > 0:
- query(url, seq, alleles, lengths,
- seqid=seqid, method=method)
+ query(url, args.prediction, seq, allele, length,
+ results, seqid=seqid,
+ method=method, proteasome=proteasome,
+ timeout=args.timeout, retries=args.retries,
+ sleep=args.sleep, debug=args.debug)
fh.close()
except Exception as e:
warn_err("Unable to open input file: %s\n" % e, exit_code=1)
- if header:
- outputFile.write(header)
- for line in results:
+ if results['prediction']['header']:
+ outputFile.write(results['prediction']['header'])
+ for line in results['prediction']['entries']:
outputFile.write(line)
- if results2:
+ if results['detail']['entries']:
if args.output2:
try:
outPath = os.path.abspath(args.output2)
@@ -270,9 +313,9 @@
warn_err("Unable to open output file: %s\n" % e, exit_code=1)
else:
outFile = sys.stdout
- if header2:
- outFile.write(header2)
- for line in results2:
+ if results['detail']['header']:
+ outFile.write(results['detail']['header'])
+ for line in results['detail']['entries']:
outFile.write(line)
diff -r 153d5fa7af53 -r a14128950578 iedb_api.xml
--- a/iedb_api.xml Wed Feb 26 16:27:06 2020 -0500
+++ b/iedb_api.xml Fri Feb 28 15:45:14 2020 -0500
@@ -24,6 +24,7 @@
python
+
0:
- #if len($fields) > 1:
- #for $alen in $fields[1:]:
- -a '$allele' -l $alen
- #end for
- #else:
- #for $alen in str($prediction.lengths).split(','):
- -a '$allele' -l $alen
- #end for
- #end if
- #end if
- #end for
+ -A '$prediction.alleles.allele_file'
#else:
- #for $word in str($prediction.alleles.allele_text).strip().split():
- #set $fields = $word.strip().split(',')
- #set $allele = $fields[0].strip()
- #if len($allele) > 0:
- #if len($fields) > 1:
- #for $alen in $fields[1:]:
- -a '$allele' -l $alen
- #end for
- #else:
- #for $alen in str($prediction.lengths).split(','):
- -a '$allele' -l $alen
- #end for
- #end if
- #end if
- #end for
+ -A '$entered_alleles'
#end if
#end if
@@ -81,12 +54,33 @@
-C #echo int(str($sequence.id_col)) - 1
#end if
#else:
- #for $seq in str($sequence.seq_text).strip().split():
- -s $seq.strip()
- #end for
+ -i '$entered_seqs' -c 1 -C 0
#end if
-o '$output'
]]>
+
+ 0
+$word
+#else
+#set $allele = $word + ',' + str($prediction.lengths)
+$allele
+#end if
+#end for
+#end if
+]]>
+
+
@@ -132,7 +126,7 @@
-
+
Used for any alleles which don't include specified lengths
@@ -382,6 +376,24 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+