Repository 'iedb_api'
hg clone https://toolshed.g2.bx.psu.edu/repos/iuc/iedb_api

Changeset 0:fe3c43451319 (2020-02-28)
Next changeset 1:6cf84410cb2e (2020-04-07)
Commit message:
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 9aaa7c6c7241db52681b12939ebd908902830ef1"
added:
iedb_api.py
iedb_api.xml
static/images/IEDB_Workflow_QueryTabular.png
static/images/IEDB_Workflow_TextProcessing.png
static/images/IEDB_formatted_alleles.png
static/images/TextProcessingConversion.png
static/images/seq2HLA_ClassI.HLAgenotype4digits.png
test-data/alleles.tsv
test-data/bcell.fa
test-data/seqs.fa
test-data/seqs.tsv
b
diff -r 000000000000 -r fe3c43451319 iedb_api.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iedb_api.py Fri Feb 28 18:09:34 2020 -0500
[
b'@@ -0,0 +1,319 @@\n+#!/usr/bin/env python\n+\n+import argparse\n+import os.path\n+import re\n+import sys\n+import time\n+from urllib.error import HTTPError\n+from urllib.parse import unquote, urlencode\n+from urllib.request import urlopen\n+\n+mhci_methods = [\'recommended\', \'consensus\',\n+                \'netmhcpan_ba\', \'netmhcpan_el\',\n+                \'ann\', \'smmpmbec\', \'smm\',\n+                \'comblib_sidney2008\', \'netmhccons\',\n+                \'pickpocket\', \'netmhcstabpan\']\n+mhcii_methods = [\'recommended\', \'consensus\', \'NetMHCIIpan\',\n+                 \'nn_align\', \'smm_align\', \'comblib\', \'tepitope\']\n+processing_methods = [\'recommended\', \'netmhcpan\', \'ann\',\n+                      \'smmpmbec\', \'smm\', \'comblib_sidney2008\',\n+                      \'netmhccons\', \'pickpocket\']\n+mhcnp_methods = [\'mhcnp\', \'netmhcpan\']\n+bcell_methods = [\'Bepipred\', \'Chou-Fasman\', \'Emini\', \'Karplus-Schulz\',\n+                 \'Kolaskar-Tongaonkar\', \'Parker\', \'Bepipred-2.0\']\n+prediction_methods = {\'mhci\': mhci_methods,\n+                      \'mhcii\': mhcii_methods,\n+                      \'processing\': processing_methods,\n+                      \'mhcnp\': mhcnp_methods,\n+                      \'bcell\': bcell_methods}\n+all_methods = set(mhci_methods + mhcii_methods +\n+                  mhcnp_methods + bcell_methods)\n+prediction_lengths = {\'mhci\': range(8, 16),\n+                      \'mhcii\': range(11, 31),\n+                      \'processing\': range(8, 15),\n+                      \'mhcnp\': range(8, 12),\n+                      \'bcell\': range(8, 16)}\n+\n+\n+def parse_alleles(allelefile, lengths):\n+    alleles = []\n+    lengths = []\n+    with open(allelefile, \'r\') as fh:\n+        for i, line in enumerate(fh):\n+            fields = line.strip().split(\',\')\n+            allele = fields[0].strip()\n+            if allele:\n+                if len(fields) > 1:\n+                    for alen in fields[1:]:\n+                        alleles.append(allele)\n+                        lengths.append(alen)\n+                elif lengths:\n+                    for alen in str(lengths).split(\',\'):\n+                        alleles.append(allele)\n+                        lengths.append(alen)\n+                else:\n+                    alleles.append(allele)\n+    return (alleles, lengths)\n+\n+\n+def query(url, prediction, seq, allele, length, results,\n+          seqid=None, method=\'recommended\', proteasome=None,\n+          timeout=300, retries=3, sleep=300, debug=False):\n+    params = dict()\n+    if method:\n+        params[\'method\'] = method.encode()\n+    if proteasome:\n+        params[\'proteasome\'] = proteasome.encode()\n+    params[\'sequence_text\'] = seq.strip().encode()\n+    if allele is not None:\n+        params[\'allele\'] = allele.encode()\n+    if length is not None:\n+        if prediction == \'bcell\':\n+            params[\'window_size\'] = str(length).encode()\n+        else:\n+            params[\'length\'] = str(length).encode()\n+    req_data = urlencode(params)\n+    if debug:\n+        print(\'url %s %s\' % (url, unquote(req_data)), file=sys.stderr)\n+    retries = max(0, retries) + 1\n+    for retry in range(1, retries):\n+        response = None\n+        try:\n+            response = urlopen(url, data=req_data.encode(\'utf-8\'),\n+                               timeout=timeout)\n+            if response and response.getcode() == 200:\n+                data = [line.decode() for line in response.readlines()]\n+                if debug:\n+                    print(data, file=sys.stderr)\n+                rslts = results[\'prediction\'][\'entries\']\n+                for ln, line in enumerate(data):\n+                    if \'invalid\' in line.lower() or \'tools_api.html\' in line:\n+                        msg = \'%s %s\\n%s\' % (url, unquote(req_data),\n+                                             \'\'.join(data))\n+                        warn_err(msg, exit_code=1)\n+                    if line.find(\'eptide\') > 0:\n+                        results[\'prediction\'][\'header\'] = "#%s%s" %\\\n+                            ("ID\\t" if seqid else ""'..b'les else None\n+    length = \',\'.join(lengths) if lengths else None\n+    method = args.method\n+    proteasome = args.proteasome if args.prediction == \'processcing\' else None\n+    url = \'http://tools-cluster-interface.iedb.org/tools_api/%s/\' %\\\n+        args.prediction\n+\n+    # results\n+    results = {\'prediction\': {\'header\': None, \'entries\': []}, \'detail\': {\'header\': None, \'entries\': []}}\n+\n+    if args.sequence:\n+        for i, seq in enumerate(args.sequence):\n+            seqid = \'pep_%d\' % i\n+            query(url, args.prediction, seq, allele, length, results,\n+                  seqid=seqid, method=method, proteasome=proteasome,\n+                  timeout=args.timeout, retries=args.retries,\n+                  sleep=args.sleep, debug=args.debug)\n+    if args.input:\n+        try:\n+            fh = open(args.input, \'r\')\n+            if args.column:  # tabular\n+                col = int(args.column)\n+                idcol = int(args.id_column) if args.id_column else None\n+                for i, line in enumerate(fh):\n+                    fields = line.rstrip(\'\\r\\n\').split(\'\\t\')\n+                    if len(fields) > col:\n+                        seq = re.sub(\'[_*]\', \'\', fields[col].strip())\n+                        if re.match(aapat, seq):\n+                            if idcol is not None and idcol < len(fields):\n+                                seqid = fields[idcol]\n+                            else:\n+                                seqid = \'pep_%d\' % i\n+                            query(url, args.prediction, seq, allele, length,\n+                                  results, seqid=seqid,\n+                                  method=method, proteasome=proteasome,\n+                                  timeout=args.timeout, retries=args.retries,\n+                                  sleep=args.sleep, debug=args.debug)\n+                        else:\n+                            warn_err(\'Line %d, Not a peptide: %s\\n\' % (i, seq),\n+                                     exit_code=None)\n+            else:  # fasta\n+                seqid = None\n+                seq = \'\'\n+                for i, line in enumerate(fh):\n+                    if line.startswith(\'>\'):\n+                        if seqid and len(seq) > 0:\n+                            query(url, args.prediction, seq, allele, length,\n+                                  results, seqid=seqid,\n+                                  method=method, proteasome=proteasome,\n+                                  timeout=args.timeout, retries=args.retries,\n+                                  sleep=args.sleep, debug=args.debug)\n+                        seqid = line[1:].strip()\n+                        seq = \'\'\n+                    else:\n+                        seq += line.strip()\n+                if seqid and len(seq) > 0:\n+                    query(url, args.prediction, seq, allele, length,\n+                          results, seqid=seqid,\n+                          method=method, proteasome=proteasome,\n+                          timeout=args.timeout, retries=args.retries,\n+                          sleep=args.sleep, debug=args.debug)\n+            fh.close()\n+        except Exception as e:\n+            warn_err("Unable to open input file: %s\\n" % e, exit_code=1)\n+\n+    if results[\'prediction\'][\'header\']:\n+        outputFile.write(results[\'prediction\'][\'header\'])\n+    for line in results[\'prediction\'][\'entries\']:\n+        outputFile.write(line)\n+    if results[\'detail\'][\'entries\']:\n+        if args.output2:\n+            try:\n+                outPath = os.path.abspath(args.output2)\n+                outFile = open(outPath, \'w\')\n+            except Exception as e:\n+                warn_err("Unable to open output file: %s\\n" % e, exit_code=1)\n+        else:\n+            outFile = sys.stdout\n+        if results[\'detail\'][\'header\']:\n+            outFile.write(results[\'detail\'][\'header\'])\n+        for line in results[\'detail\'][\'entries\']:\n+            outFile.write(line)\n+\n+\n+if __name__ == "__main__":\n+    __main__()\n'
b
diff -r 000000000000 -r fe3c43451319 iedb_api.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/iedb_api.xml Fri Feb 28 18:09:34 2020 -0500
[
b'@@ -0,0 +1,477 @@\n+<tool id="iedb_api" name="IEDB" version="2.15.0">\n+    <description>MHC Binding prediction</description>\n+    <macros>\n+        <xml name="alleles" token_hla_regex="" token_hla_examples="" token_hlalen_examples=""> \n+            <conditional name="alleles">\n+               <param name="allelesrc" type="select" label="Alleles">\n+                   <option value="history">From history</option>\n+                   <option value="entry">Entered</option>\n+               </param>\n+               <when value="history">\n+                   <param name="allele_file" type="data" format="txt" label="Alleles file">\n+                       <help>The dataset should have on allele per line. The allele may be followed by an optional comma-separated list of peptide lengths, e.g.: @HLALEN_EXAMPLES@</help>\n+                   </param>\n+               </when>\n+               <when value="entry">\n+                   <param name="allele_text" type="text" size="80" label="Alleles">\n+                       <help>Enter alleles separated by white space: @HLA_EXAMPLES@  (The peptide lengths may follow each allele: @HLALEN_EXAMPLES@)</help>\n+                       <validator type="regex" message="Doesn\'t appear to be a valid allele">^@HLA_REGEX@(\\s+@HLA_REGEX@)*$</validator>\n+                   </param>\n+               </when>\n+            </conditional>\n+        </xml>\n+    </macros>\n+    <requirements>\n+        <requirement type="package" version="3.7">python</requirement>\n+    </requirements>\n+\n+    <command detect_errors="exit_code"><![CDATA[\n+        #import re\n+        python \'${__tool_directory__}/iedb_api.py\' \n+        --prediction=$prediction.tool\n+        --method=$prediction.method \n+        #if $prediction.tool == \'bcell\':\n+            #if $prediction.window_size:\n+                -w $prediction.window_size\n+            #end if\n+        #else\n+            #if $prediction.tool == \'processing\' and $prediction.proteasome:\n+                --proteasome $prediction.proteasome\n+            #end if\n+            #if $prediction.alleles.allelesrc == \'history\':\n+              -A \'$prediction.alleles.allele_file\'\n+            #else:\n+              -A \'$entered_alleles\'\n+            #end if\n+        #end if\n+\n+        #if $sequence.seqsrc == \'fasta\':\n+          -i \'$sequence.seq_fasta\'\n+        #else if $sequence.seqsrc == \'tabular\':\n+          -i \'$sequence.seq_tsv\'\n+          -c #echo int(str($sequence.pep_col)) - 1\n+          #if $sequence.id_col:\n+            -C #echo  int(str($sequence.id_col)) - 1\n+          #end if\n+        #else:\n+          -i \'$entered_seqs\' -c 1 -C 0\n+        #end if\n+        -o \'$output\'\n+    ]]></command>\n+    <configfiles>     \n+        <configfile name="entered_alleles"><![CDATA[#slurp\n+#if $prediction.tool != \'bcell\' and $prediction.alleles.allelesrc == \'entry\'\n+#for $word in str($prediction.alleles.allele_text).strip().split():\n+#if $word.find(\',\') > 0\n+$word\n+#else\n+#set $allele = $word + \',\' + str($prediction.lengths)\n+$allele\n+#end if\n+#end for\n+#end if\n+]]></configfile>     \n+        <configfile name="entered_seqs"><![CDATA[#slurp\n+#if $sequence.seqsrc == \'entry\'\n+#for $i, $seq in enumerate(str($sequence.seq_text).strip().split())\n+#set $seqid = $i + 1\n+#set $seqtext = \'\\t\'.join([str($seqid),$seq.strip()])\n+$seqtext\n+#end for\n+#end if\n+]]></configfile>     \n+    </configfiles>     \n+    <inputs>\n+        <conditional name="prediction">\n+            <param name="tool" type="select" label="Prediction">\n+                <option value="mhci">MHC-I Binding</option>\n+                <option value="mhcii">MHC-II Binding</option>\n+                <option value="processing">MHC-I Processing</option>\n+                <option value="mhcnp">MHC-NP T-Cell Epitope</option>\n+                <option value="bcell">Antibody Epitope Prediction</option>\n+            </param>\n+            <when value="mhci">\n+                <param name="method" type="select" label="prediction method">\n+                    <option valu'..b'             <param name="method" value="Emini"/>\n+            </conditional>\n+            <conditional name="sequence">\n+                <param name="seqsrc" value="entry"/>\n+                <param name="seq_text" value="VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTE"/>\n+            </conditional>\n+            <output name="output">\n+                <assert_contents>\n+                    <has_text text="VLSEGE" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+        <!-- test8 -->\n+        <test>\n+            <conditional name="prediction">\n+                <param name="tool" value="bcell"/>\n+                <param name="method" value="Bepipred"/>\n+                <param name="window_size" value="9"/>\n+            </conditional>\n+            <conditional name="sequence">\n+                <param name="seqsrc" value="fasta"/>\n+                <param name="seq_fasta" ftype="fasta" value="bcell.fa"/>\n+            </conditional>\n+            <output name="output">\n+                <assert_contents>\n+                    <has_text text="ADVAGH" />\n+                </assert_contents>\n+            </output>\n+        </test>\n+\n+    </tests>\n+    <help><![CDATA[\n+The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species. \n+\n+This tool retrieves epitope binding information about input peptide sequences by using the RESTful web services provided by IEDB.  \n+The webservices are described at:  http://tools.immuneepitope.org/main/tools-api/\n+That page also describes how to retrieve the available HLA alleles for class of epitope binding.\n+\n+**INPUTS**\n+\n+  peptide sequences from a fasta file or a column in a tabular file\n+\n+  HLA alleles either entered as text or one per line in a text file\n+\n+\n+**OUTPUTS**\n+  \n+  A tabular file containing the results returned from the IEDB web service\n+\n+**Typical Workflow for Human MHC I Binding Prediction** \n+\n+The RNAseq data for the subject would be used for:\n+\n+  - HLA prediction by seq2HLA\n+  - Novel Antigen Prediction by a variety of workflows to generate a Antigen peptide fasta \n+\n+\n+.. image:: $PATH_TO_IMAGES/IEDB_Workflow_QueryTabular.png\n+   :width: 584\n+   :height: 430\n+\n+.. note:: The seq2HLA ClassI.HLAgenotype4digits output needs to be converted for IEDB alleles.\n+\n+The seq2HLA ClassI.HLAgenotype4digits output:\n+\n+.. image:: $PATH_TO_IMAGES/seq2HLA_ClassI.HLAgenotype4digits.png\n+   :width: 285\n+   :height: 77\n+\n+Needs to be converted into IEDB formatted alleles:\n+\n+.. image:: $PATH_TO_IMAGES/IEDB_formatted_alleles.png\n+   :width: 74\n+   :height: 81\n+\n+In the workflow above QueryTabular tool converts the alleles:\n+\n+  - Filter Dataset Input\n+\n+    * skip leading lines - *skip lines:* 1\n+    * select columns - *columns:* 2,4\n+    * regex replace value in column - *column:* 1  *regex pattern:* ^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$  *replacement expression:* HLA-\\\\1\n+    * regex replace value in column - *column:* 2  *regex pattern:* ^(\\\\w+[*]\\\\d\\\\d:\\\\d\\\\d\\\\d?).*$  *replacement expression:* HLA-\\\\1\n+\n+  - SQL Query to generate tabular output\n+\n+    * SELECT c1 FROM t1 UNION SELECT c2 FROM t1\n+\n+\n+The IEDB formatting can also be performed by TextProcessing tools:\n+\n+.. image:: $PATH_TO_IMAGES/TextProcessingConversion.png\n+   :width: 608\n+   :height: 87\n+\n+The TextProcessing steps to convert the alleles:\n+\n+  - Remove beginning -  removes the header line\n+  - Replace Text - picks Allele 1 and Allele 2 from each line and reformats each on a separate line \n+\n+    * *Find pattern:* ^.*\\\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\\\t.*\\\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\\\t.*$\n+    * *Replace with:* HLA-\\\\1\\\\nHLA-\\\\2\n+\n+  - Unique - remove duplicates\n+\n+\n+    ]]></help>\n+    <citations>\n+       <citation type="doi">10.1093/nar/gku938</citation>\n+    </citations>\n+</tool>\n'
b
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_Workflow_QueryTabular.png
b
Binary file static/images/IEDB_Workflow_QueryTabular.png has changed
b
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_Workflow_TextProcessing.png
b
Binary file static/images/IEDB_Workflow_TextProcessing.png has changed
b
diff -r 000000000000 -r fe3c43451319 static/images/IEDB_formatted_alleles.png
b
Binary file static/images/IEDB_formatted_alleles.png has changed
b
diff -r 000000000000 -r fe3c43451319 static/images/TextProcessingConversion.png
b
Binary file static/images/TextProcessingConversion.png has changed
b
diff -r 000000000000 -r fe3c43451319 static/images/seq2HLA_ClassI.HLAgenotype4digits.png
b
Binary file static/images/seq2HLA_ClassI.HLAgenotype4digits.png has changed
b
diff -r 000000000000 -r fe3c43451319 test-data/alleles.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/alleles.tsv Fri Feb 28 18:09:34 2020 -0500
b
@@ -0,0 +1,2 @@
+HLA-A*01:01,9
+HLA-A*03:01,10
b
diff -r 000000000000 -r fe3c43451319 test-data/bcell.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bcell.fa Fri Feb 28 18:09:34 2020 -0500
b
@@ -0,0 +1,6 @@
+>pep1
+VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTE
+>pep2
+AGHAHKVPRRLLKAAR
+>pep3
+ALKAADASADADGSGSGSGSGAGHAHKVPRRLLKAAR
b
diff -r 000000000000 -r fe3c43451319 test-data/seqs.fa
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seqs.fa Fri Feb 28 18:09:34 2020 -0500
b
@@ -0,0 +1,4 @@
+>peptide1
+GHAHKVPRRLLKAAR
+>peptide2
+LKAADASADADGSGSGSGSG
b
diff -r 000000000000 -r fe3c43451319 test-data/seqs.tsv
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/seqs.tsv Fri Feb 28 18:09:34 2020 -0500
b
@@ -0,0 +1,2 @@
+peptide1 16 GHAHKVPRRLLKAAR
+peptide2 21 LKAADASADADGSGSGSGSG