Next changeset 1:6cf84410cb2e (2020-04-07) |
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1" |
added:
iedb_api.py iedb_api.xml static/images/IEDB_Workflow_QueryTabular.png static/images/IEDB_Workflow_TextProcessing.png static/images/IEDB_formatted_alleles.png static/images/TextProcessingConversion.png static/images/seq2HLA_ClassI.HLAgenotype4digits.png test-data/alleles.tsv test-data/bcell.fa test-data/seqs.fa test-data/seqs.tsv |
b |
diff -r 000000000000 -r fe3c43451319 iedb_api.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iedb_api.py Fri Feb 28 18:09:34 2020 -0500 |
[ |
b'@@ -0,0 +1,319 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import os.path\n+import re\n+import sys\n+import time\n+from urllib.error import HTTPError\n+from urllib.parse import unquote, urlencode\n+from urllib.request import urlopen\n+\n+mhci_methods = [\'recommended\', \'consensus\',\n+ \'netmhcpan_ba\', \'netmhcpan_el\',\n+ \'ann\', \'smmpmbec\', \'smm\',\n+ \'comblib_sidney2008\', \'netmhccons\',\n+ \'pickpocket\', \'netmhcstabpan\']\n+mhcii_methods = [\'recommended\', \'consensus\', \'NetMHCIIpan\',\n+ \'nn_align\', \'smm_align\', \'comblib\', \'tepitope\']\n+processing_methods = [\'recommended\', \'netmhcpan\', \'ann\',\n+ \'smmpmbec\', \'smm\', \'comblib_sidney2008\',\n+ \'netmhccons\', \'pickpocket\']\n+mhcnp_methods = [\'mhcnp\', \'netmhcpan\']\n+bcell_methods = [\'Bepipred\', \'Chou-Fasman\', \'Emini\', \'Karplus-Schulz\',\n+ \'Kolaskar-Tongaonkar\', \'Parker\', \'Bepipred-2.0\']\n+prediction_methods = {\'mhci\': mhci_methods,\n+ \'mhcii\': mhcii_methods,\n+ \'processing\': processing_methods,\n+ \'mhcnp\': mhcnp_methods,\n+ \'bcell\': bcell_methods}\n+all_methods = set(mhci_methods + mhcii_methods +\n+ mhcnp_methods + bcell_methods)\n+prediction_lengths = {\'mhci\': range(8, 16),\n+ \'mhcii\': range(11, 31),\n+ \'processing\': range(8, 15),\n+ \'mhcnp\': range(8, 12),\n+ \'bcell\': range(8, 16)}\n+\n+\n+def parse_alleles(allelefile, lengths):\n+ alleles = []\n+ lengths = []\n+ with open(allelefile, \'r\') as fh:\n+ for i, line in enumerate(fh):\n+ fields = line.strip().split(\',\')\n+ allele = fields[0].strip()\n+ if allele:\n+ if len(fields) > 1:\n+ for alen in fields[1:]:\n+ alleles.append(allele)\n+ lengths.append(alen)\n+ elif lengths:\n+ for alen in str(lengths).split(\',\'):\n+ alleles.append(allele)\n+ lengths.append(alen)\n+ else:\n+ alleles.append(allele)\n+ return (alleles, lengths)\n+\n+\n+def query(url, prediction, seq, allele, length, results,\n+ seqid=None, method=\'recommended\', proteasome=None,\n+ timeout=300, retries=3, sleep=300, debug=False):\n+ params = dict()\n+ if method:\n+ params[\'method\'] = method.encode()\n+ if proteasome:\n+ params[\'proteasome\'] = proteasome.encode()\n+ params[\'sequence_text\'] = seq.strip().encode()\n+ if allele is not None:\n+ params[\'allele\'] = allele.encode()\n+ if length is not None:\n+ if prediction == \'bcell\':\n+ params[\'window_size\'] = str(length).encode()\n+ else:\n+ params[\'length\'] = str(length).encode()\n+ req_data = urlencode(params)\n+ if debug:\n+ print(\'url %s %s\' % (url, unquote(req_data)), file=sys.stderr)\n+ retries = max(0, retries) + 1\n+ for retry in range(1, retries):\n+ response = None\n+ try:\n+ response = urlopen(url, data=req_data.encode(\'utf-8\'),\n+ timeout=timeout)\n+ if response and response.getcode() == 200:\n+ data = [line.decode() for line in response.readlines()]\n+ if debug:\n+ print(data, file=sys.stderr)\n+ rslts = results[\'prediction\'][\'entries\']\n+ for ln, line in enumerate(data):\n+ if \'invalid\' in line.lower() or \'tools_api.html\' in line:\n+ msg = \'%s %s\\n%s\' % (url, unquote(req_data),\n+ \'\'.join(data))\n+ warn_err(msg, exit_code=1)\n+ if line.find(\'eptide\') > 0:\n+ results[\'prediction\'][\'header\'] = "#%s%s" %\\\n+ ("ID\\t" if seqid else ""'..b'les else None\n+ length = \',\'.join(lengths) if lengths else None\n+ method = args.method\n+ proteasome = args.proteasome if args.prediction == \'processcing\' else None\n+ url = \'http://tools-cluster-interface.iedb.org/tools_api/%s/\' %\\\n+ args.prediction\n+\n+ # results\n+ results = {\'prediction\': {\'header\': None, \'entries\': []}, \'detail\': {\'header\': None, \'entries\': []}}\n+\n+ if args.sequence:\n+ for i, seq in enumerate(args.sequence):\n+ seqid = \'pep_%d\' % i\n+ query(url, args.prediction, seq, allele, length, results,\n+ seqid=seqid, method=method, proteasome=proteasome,\n+ timeout=args.timeout, retries=args.retries,\n+ sleep=args.sleep, debug=args.debug)\n+ if args.input:\n+ try:\n+ fh = open(args.input, \'r\')\n+ if args.column: # tabular\n+ col = int(args.column)\n+ idcol = int(args.id_column) if args.id_column else None\n+ for i, line in enumerate(fh):\n+ fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+ if len(fields) > col:\n+ seq = re.sub(\'[_*]\', \'\', fields[col].strip())\n+ if re.match(aapat, seq):\n+ if idcol is not None and idcol < len(fields):\n+ seqid = fields[idcol]\n+ else:\n+ seqid = \'pep_%d\' % i\n+ query(url, args.prediction, seq, allele, length,\n+ results, seqid=seqid,\n+ method=method, proteasome=proteasome,\n+ timeout=args.timeout, retries=args.retries,\n+ sleep=args.sleep, debug=args.debug)\n+ else:\n+ warn_err(\'Line %d, Not a peptide: %s\\n\' % (i, seq),\n+ exit_code=None)\n+ else: # fasta\n+ seqid = None\n+ seq = \'\'\n+ for i, line in enumerate(fh):\n+ if line.startswith(\'>\'):\n+ if seqid and len(seq) > 0:\n+ query(url, args.prediction, seq, allele, length,\n+ results, seqid=seqid,\n+ method=method, proteasome=proteasome,\n+ timeout=args.timeout, retries=args.retries,\n+ sleep=args.sleep, debug=args.debug)\n+ seqid = line[1:].strip()\n+ seq = \'\'\n+ else:\n+ seq += line.strip()\n+ if seqid and len(seq) > 0:\n+ query(url, args.prediction, seq, allele, length,\n+ results, seqid=seqid,\n+ method=method, proteasome=proteasome,\n+ timeout=args.timeout, retries=args.retries,\n+ sleep=args.sleep, debug=args.debug)\n+ fh.close()\n+ except Exception as e:\n+ warn_err("Unable to open input file: %s\\n" % e, exit_code=1)\n+\n+ if results[\'prediction\'][\'header\']:\n+ outputFile.write(results[\'prediction\'][\'header\'])\n+ for line in results[\'prediction\'][\'entries\']:\n+ outputFile.write(line)\n+ if results[\'detail\'][\'entries\']:\n+ if args.output2:\n+ try:\n+ outPath = os.path.abspath(args.output2)\n+ outFile = open(outPath, \'w\')\n+ except Exception as e:\n+ warn_err("Unable to open output file: %s\\n" % e, exit_code=1)\n+ else:\n+ outFile = sys.stdout\n+ if results[\'detail\'][\'header\']:\n+ outFile.write(results[\'detail\'][\'header\'])\n+ for line in results[\'detail\'][\'entries\']:\n+ outFile.write(line)\n+\n+\n+if __name__ == "__main__":\n+ __main__()\n' |
b |
diff -r 000000000000 -r fe3c43451319 iedb_api.xml --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/iedb_api.xml Fri Feb 28 18:09:34 2020 -0500 |
[ |
b'@@ -0,0 +1,477 @@\n+<tool id="iedb_api" name="IEDB" version="2.15.0">\n+ <description>MHC Binding prediction</description>\n+ <macros>\n+ <xml name="alleles" token_hla_regex="" token_hla_examples="" token_hlalen_examples=""> \n+ <conditional name="alleles">\n+ <param name="allelesrc" type="select" label="Alleles">\n+ <option value="history">From history</option>\n+ <option value="entry">Entered</option>\n+ </param>\n+ <when value="history">\n+ <param name="allele_file" type="data" format="txt" label="Alleles file">\n+ <help>The dataset should have on allele per line. The allele may be followed by an optional comma-separated list of peptide lengths, e.g.: @HLALEN_EXAMPLES@</help>\n+ </param>\n+ </when>\n+ <when value="entry">\n+ <param name="allele_text" type="text" size="80" label="Alleles">\n+ <help>Enter alleles separated by white space: @HLA_EXAMPLES@ (The peptide lengths may follow each allele: @HLALEN_EXAMPLES@)</help>\n+ <validator type="regex" message="Doesn\'t appear to be a valid allele">^@HLA_REGEX@(\\s+@HLA_REGEX@)*$</validator>\n+ </param>\n+ </when>\n+ </conditional>\n+ </xml>\n+ </macros>\n+ <requirements>\n+ <requirement type="package" version="3.7">python</requirement>\n+ </requirements>\n+\n+ <command detect_errors="exit_code"><![CDATA[\n+ #import re\n+ python \'${__tool_directory__}/iedb_api.py\' \n+ --prediction=$prediction.tool\n+ --method=$prediction.method \n+ #if $prediction.tool == \'bcell\':\n+ #if $prediction.window_size:\n+ -w $prediction.window_size\n+ #end if\n+ #else\n+ #if $prediction.tool == \'processing\' and $prediction.proteasome:\n+ --proteasome $prediction.proteasome\n+ #end if\n+ #if $prediction.alleles.allelesrc == \'history\':\n+ -A \'$prediction.alleles.allele_file\'\n+ #else:\n+ -A \'$entered_alleles\'\n+ #end if\n+ #end if\n+\n+ #if $sequence.seqsrc == \'fasta\':\n+ -i \'$sequence.seq_fasta\'\n+ #else if $sequence.seqsrc == \'tabular\':\n+ -i \'$sequence.seq_tsv\'\n+ -c #echo int(str($sequence.pep_col)) - 1\n+ #if $sequence.id_col:\n+ -C #echo int(str($sequence.id_col)) - 1\n+ #end if\n+ #else:\n+ -i \'$entered_seqs\' -c 1 -C 0\n+ #end if\n+ -o \'$output\'\n+ ]]></command>\n+ <configfiles> \n+ <configfile name="entered_alleles"><![CDATA[#slurp\n+#if $prediction.tool != \'bcell\' and $prediction.alleles.allelesrc == \'entry\'\n+#for $word in str($prediction.alleles.allele_text).strip().split():\n+#if $word.find(\',\') > 0\n+$word\n+#else\n+#set $allele = $word + \',\' + str($prediction.lengths)\n+$allele\n+#end if\n+#end for\n+#end if\n+]]></configfile> \n+ <configfile name="entered_seqs"><![CDATA[#slurp\n+#if $sequence.seqsrc == \'entry\'\n+#for $i, $seq in enumerate(str($sequence.seq_text).strip().split())\n+#set $seqid = $i + 1\n+#set $seqtext = \'\\t\'.join([str($seqid),$seq.strip()])\n+$seqtext\n+#end for\n+#end if\n+]]></configfile> \n+ </configfiles> \n+ <inputs>\n+ <conditional name="prediction">\n+ <param name="tool" type="select" label="Prediction">\n+ <option value="mhci">MHC-I Binding</option>\n+ <option value="mhcii">MHC-II Binding</option>\n+ <option value="processing">MHC-I Processing</option>\n+ <option value="mhcnp">MHC-NP T-Cell Epitope</option>\n+ <option value="bcell">Antibody Epitope Prediction</option>\n+ </param>\n+ <when value="mhci">\n+ <param name="method" type="select" label="prediction method">\n+ <option valu'..b' <param name="method" value="Emini"/>\n+ </conditional>\n+ <conditional name="sequence">\n+ <param name="seqsrc" value="entry"/>\n+ <param name="seq_text" value="VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTE"/>\n+ </conditional>\n+ <output name="output">\n+ <assert_contents>\n+ <has_text text="VLSEGE" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+ <!-- test8 -->\n+ <test>\n+ <conditional name="prediction">\n+ <param name="tool" value="bcell"/>\n+ <param name="method" value="Bepipred"/>\n+ <param name="window_size" value="9"/>\n+ </conditional>\n+ <conditional name="sequence">\n+ <param name="seqsrc" value="fasta"/>\n+ <param name="seq_fasta" ftype="fasta" value="bcell.fa"/>\n+ </conditional>\n+ <output name="output">\n+ <assert_contents>\n+ <has_text text="ADVAGH" />\n+ </assert_contents>\n+ </output>\n+ </test>\n+\n+ </tests>\n+ <help><![CDATA[\n+The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species. \n+\n+This tool retrieves epitope binding information about input peptide sequences by using the RESTful web services provided by IEDB. \n+The webservices are described at: http://tools.immuneepitope.org/main/tools-api/\n+That page also describes how to retrieve the available HLA alleles for class of epitope binding.\n+\n+**INPUTS**\n+\n+ peptide sequences from a fasta file or a column in a tabular file\n+\n+ HLA alleles either entered as text or one per line in a text file\n+\n+\n+**OUTPUTS**\n+ \n+ A tabular file containing the results returned from the IEDB web service\n+\n+**Typical Workflow for Human MHC I Binding Prediction** \n+\n+The RNAseq data for the subject would be used for:\n+\n+ - HLA prediction by seq2HLA\n+ - Novel Antigen Prediction by a variety of workflows to generate a Antigen peptide fasta \n+\n+\n+.. image:: $PATH_TO_IMAGES/IEDB_Workflow_QueryTabular.png\n+ :width: 584\n+ :height: 430\n+\n+.. note:: The seq2HLA ClassI.HLAgenotype4digits output needs to be converted for IEDB alleles.\n+\n+The seq2HLA ClassI.HLAgenotype4digits output:\n+\n+.. image:: $PATH_TO_IMAGES/seq2HLA_ClassI.HLAgenotype4digits.png\n+ :width: 285\n+ :height: 77\n+\n+Needs to be converted into IEDB formatted alleles:\n+\n+.. image:: $PATH_TO_IMAGES/IEDB_formatted_alleles.png\n+ :width: 74\n+ :height: 81\n+\n+In the workflow above QueryTabular tool converts the alleles:\n+\n+ - Filter Dataset Input\n+\n+ * skip leading lines - *skip lines:* 1\n+ * select columns - *columns:* 2,4\n+ * regex replace value in column - *column:* 1 *regex pattern:* ^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$ *replacement expression:* HLA-\\\\1\n+ * regex replace value in column - *column:* 2 *regex pattern:* ^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$ *replacement expression:* HLA-\\\\1\n+\n+ - SQL Query to generate tabular output\n+\n+ * SELECT c1 FROM t1 UNION SELECT c2 FROM t1\n+\n+\n+The IEDB formatting can also be performed by TextProcessing tools:\n+\n+.. image:: $PATH_TO_IMAGES/TextProcessingConversion.png\n+ :width: 608\n+ :height: 87\n+\n+The TextProcessing steps to convert the alleles:\n+\n+ - Remove beginning - removes the header line\n+ - Replace Text - picks Allele 1 and Allele 2 from each line and reformats each on a separate line \n+\n+ * *Find pattern:* ^.*\\\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\\\t.*\\\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\\\t.*$\n+ * *Replace with:* HLA-\\\\1\\\\nHLA-\\\\2\n+\n+ - Unique - remove duplicates\n+\n+\n+ ]]></help>\n+ <citations>\n+ <citation type="doi">10.1093/nar/gku938</citation>\n+ </citations>\n+</tool>\n' |
b |
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_Workflow_QueryTabular.png |
b |
Binary file static/images/IEDB_Workflow_QueryTabular.png has changed |
b |
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_Workflow_TextProcessing.png |
b |
Binary file static/images/IEDB_Workflow_TextProcessing.png has changed |
b |
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_formatted_alleles.png |
b |
Binary file static/images/IEDB_formatted_alleles.png has changed |
b |
diff -r 000000000000 -r fe3c43451319 static/images/TextProcessingConversion.png |
b |
Binary file static/images/TextProcessingConversion.png has changed |
b |
diff -r 000000000000 -r fe3c43451319 static/images/seq2HLA_ClassI.HLAgenotype4digits.png |
b |
Binary file static/images/seq2HLA_ClassI.HLAgenotype4digits.png has changed |
b |
diff -r 000000000000 -r fe3c43451319 test-data/alleles.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/alleles.tsv Fri Feb 28 18:09:34 2020 -0500 |
b |
@@ -0,0 +1,2 @@ +HLA-A*01:01,9 +HLA-A*03:01,10 |
b |
diff -r 000000000000 -r fe3c43451319 test-data/bcell.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/bcell.fa Fri Feb 28 18:09:34 2020 -0500 |
b |
@@ -0,0 +1,6 @@ +>pep1 +VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTE +>pep2 +AGHAHKVPRRLLKAAR +>pep3 +ALKAADASADADGSGSGSGSGAGHAHKVPRRLLKAAR |
b |
diff -r 000000000000 -r fe3c43451319 test-data/seqs.fa --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seqs.fa Fri Feb 28 18:09:34 2020 -0500 |
b |
@@ -0,0 +1,4 @@ +>peptide1 +GHAHKVPRRLLKAAR +>peptide2 +LKAADASADADGSGSGSGSG |
b |
diff -r 000000000000 -r fe3c43451319 test-data/seqs.tsv --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/seqs.tsv Fri Feb 28 18:09:34 2020 -0500 |
b |
@@ -0,0 +1,2 @@ +peptide1 16 GHAHKVPRRLLKAAR +peptide2 21 LKAADASADADGSGSGSGSG |