view iedb_api.xml @ 1:6cf84410cb2e draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/iedb_api commit 848c47fb29e9f1d319056a94a7dbd90129a4aafc"
author iuc
date Tue, 07 Apr 2020 23:20:35 -0400
parents fe3c43451319
children a09849898387
line wrap: on
line source

<tool id="iedb_api" name="IEDB" version="2.15.0">
    <description>MHC Binding prediction</description>
    <macros>
        <xml name="alleles" token_hla_regex="" token_hla_examples="" token_hlalen_examples=""> 
            <conditional name="alleles">
               <param name="allelesrc" type="select" label="Alleles">
                   <option value="history">From history</option>
                   <option value="entry">Entered</option>
               </param>
               <when value="history">
                   <param name="allele_file" type="data" format="txt" label="Alleles file">
                       <help>The dataset should have on allele per line. The allele may be followed by an optional comma-separated list of peptide lengths, e.g.: @HLALEN_EXAMPLES@</help>
                   </param>
               </when>
               <when value="entry">
                   <param name="allele_text" type="text" size="80" label="Alleles">
                       <help>Enter alleles separated by white space: @HLA_EXAMPLES@  (The peptide lengths may follow each allele: @HLALEN_EXAMPLES@)</help>
                       <validator type="regex" message="Doesn't appear to be a valid allele">^@HLA_REGEX@(\s+@HLA_REGEX@)*$</validator>
                   </param>
               </when>
            </conditional>
        </xml>
    </macros>
    <requirements>
        <requirement type="package" version="3.7">python</requirement>
    </requirements>

    <command detect_errors="exit_code"><![CDATA[
        #import re
        python '${__tool_directory__}/iedb_api.py' 
        --prediction=$prediction.tool
        --method=$prediction.method 
        #if $prediction.tool == 'bcell':
            #if $prediction.window_size:
                -w $prediction.window_size
            #end if
        #else
            #if $prediction.tool == 'processing' and $prediction.proteasome:
                --proteasome $prediction.proteasome
            #end if
            #if $prediction.alleles.allelesrc == 'history':
              -A '$prediction.alleles.allele_file'
            #else:
              -A '$entered_alleles'
            #end if
        #end if

        #if $sequence.seqsrc == 'fasta':
          -i '$sequence.seq_fasta'
        #else if $sequence.seqsrc == 'tabular':
          -i '$sequence.seq_tsv'
          -c #echo int(str($sequence.pep_col)) - 1
          #if $sequence.id_col:
            -C #echo  int(str($sequence.id_col)) - 1
          #end if
        #else:
          -i '$entered_seqs' -c 1 -C 0
        #end if
        -o '$output'
    ]]></command>
    <configfiles>     
        <configfile name="entered_alleles"><![CDATA[#slurp
#if $prediction.tool != 'bcell' and $prediction.alleles.allelesrc == 'entry'
#for $word in str($prediction.alleles.allele_text).strip().split():
#if $word.find(',') > 0
$word
#else
#set $allele = $word + ',' + str($prediction.lengths)
$allele
#end if
#end for
#end if
]]></configfile>     
        <configfile name="entered_seqs"><![CDATA[#slurp
#if $sequence.seqsrc == 'entry'
#for $i, $seq in enumerate(str($sequence.seq_text).strip().split())
#set $seqid = $i + 1
#set $seqtext = '\t'.join([str($seqid),$seq.strip()])
$seqtext
#end for
#end if
]]></configfile>     
    </configfiles>     
    <inputs>
        <conditional name="prediction">
            <param name="tool" type="select" label="Prediction">
                <option value="mhci">MHC-I Binding</option>
                <option value="mhcii">MHC-II Binding</option>
                <option value="processing">MHC-I Processing</option>
                <option value="mhcnp">MHC-NP T-Cell Epitope</option>
                <option value="bcell">Antibody Epitope Prediction</option>
            </param>
            <when value="mhci">
                <param name="method" type="select" label="prediction method">
                    <option value="recommended" selected="true">recommended</option>
                    <option value="consensus">consensus</option>
                    <option value="netmhcpan">netmhcpan</option>
                    <option value="ann">ann</option>
                    <option value="smmpmbec">smmpmbec</option>
                    <option value="smm">smm</option>
                    <option value="comblib_sidney2008">comblib_sidney2008</option>
                    <option value="netmhccons">netmhccons</option>
                    <option value="pickpocket">pickpocket</option>
                </param>
                <expand macro="alleles" hla_regex="(HLA-[A-CEG]\*[0-8][[0-9]:[0-9][0-9][0-9]*|BoLA-.+|Gogo-.+|H-2-[DKL][bdk]|Mamu-.+|Patr-.+|RT.+|SLA-.+)(,([8-9]|1[0-5]))*" hla_examples="HLA-A*03:01  HLA-B*07:02" hlalen_examples="HLA-A*03:01,8,9,10  HLA-B*07:02,9"/>
                <param name="lengths" type="select" multiple="true" optional="false" label="peptide lengths for prediction">
                    <help>Used for any alleles which don't include specified lengths</help>
                    <option value="8" selected="true">8</option>
                    <option value="9">9</option>
                    <option value="10">10</option>
                    <option value="11">11</option>
                    <option value="12">12</option>
                    <option value="13">13</option>
                    <option value="14">14</option>
                    <option value="15">15</option>
                </param>

            </when>
            <when value="mhcii">
                <param name="method" type="select" label="prediction method">
                    <option value="recommended" selected="true">recommended</option>
                    <option value="consensus3">consensus3</option>
                    <option value="NetMHCIIpan">NetMHCIIpan</option>
                    <option value="nn_align">nn_align</option>
                    <option value="smm_align">smm_align</option>
                    <option value="comblib">comblib</option>
                    <option value="tepitope">tepitope</option>
                </param>
                <expand macro="alleles" hla_regex="(DPA1\*0[1-3](:0[1-3])?/DPB1\*0[1-6]:0[12]|DQA1\*0[1-5]:0[12]/DQB1\*0[2-6]:0[12]|(HLA-)?DRB[1-5]\*[01][1-9]:0[1-5]|H2-IA[bd])(,(asis|[1-2][0-9]|30))*" hla_examples="DPA1*01/DPB1*04:01 HLA-DRB1*01:01 H2-IAb" hlalen_examples="DPA1*01/DPB1*04:01,11,15"/>
                <param name="lengths" type="select" multiple="true" optional="false" label="peptide lengths for prediction">
                    <help>Used for any alleles which don't include specified lengths</help>
                    <option value="asis">asis</option>
                    <option value="11">11</option>
                    <option value="12">12</option>
                    <option value="13">13</option>
                    <option value="14">14</option>
                    <option value="15" selected="true">15</option>
                    <option value="16">16</option>
                    <option value="17">17</option>
                    <option value="18">18</option>
                    <option value="19">19</option>
                    <option value="20">20</option>
                    <option value="21">21</option>
                    <option value="22">22</option>
                    <option value="23">23</option>
                    <option value="24">24</option>
                    <option value="25">25</option>
                    <option value="26">26</option>
                    <option value="27">27</option>
                    <option value="28">28</option>
                    <option value="29">29</option>
                    <option value="30">30</option>
                </param>
            </when>
            <when value="processing">
                <param name="method" type="select" label="prediction method">
                    <option value="recommended" selected="true">recommended</option>
                    <option value="consensus">consensus</option>
                    <option value="netmhcpan">netmhcpan</option>
                    <option value="ann">ann</option>
                    <option value="smmpmbec">smmpmbec</option>
                    <option value="smm">smm</option>
                    <option value="comblib_sidney2008">comblib_sidney2008</option>
                </param>
                <param name="proteasome" type="select" label="proteasome type">
                    <option value="immuno">immuno</option>
                    <option value="constitutive">constitutive</option>
                </param>
                <expand macro="alleles" hla_regex="(HLA-[A-CE]\*[0-8][[0-9]:[0-9][0-9]|BoLA-.+|Gogo-.+|H-2-[DKL][bdk]|Mamu-.+|Patr-.+|RT.+|SLA-.+)(,([8-9]|1[0-4]))*" hla_examples="HLA-A*03:01  HLA-B*07:02" hlalen_examples="HLA-A*03:01,8,9,10  HLA-B*07:02,9"/> 
                <param name="lengths" type="select" multiple="true" optional="false" label="peptide lengths for prediction">
                    <help>Used for any alleles which don't include specified lengths</help>
                    <option value="8" selected="true">8</option>
                    <option value="9">9</option>
                    <option value="10">10</option>
                    <option value="11">11</option>
                    <option value="12">12</option>
                    <option value="13">13</option>
                    <option value="14">14</option>
                </param>
            </when>
            <when value="mhcnp">
                <param name="method" type="select" label="prediction method">
                    <option value="mhcnp" selected="true">mhcnp</option>
                    <option value="netmhcpan">netmhcpan</option>
                </param>
                <expand macro="alleles" hla_regex="(HLA-(A\*02:01|B\*07:02|B\*35:01|B\*44:03|B\*53:01|B\*57:01)|H-2-[DK]b)(,[8-9]|1[0-1])*" hla_examples="HLA-A*02:01  H-2-Db" hlalen_examples="HLA-A*02:01,8,9,10"/> 
                <param name="lengths" type="select" multiple="true" optional="false" label="peptide lengths for prediction">
                    <help>Used for any alleles which don't include specified lengths</help>
                    <option value="8" selected="true">8</option>
                    <option value="9">9</option>
                    <option value="10">10</option>
                    <option value="11">11</option>
                </param>
            </when>
            <when value="bcell">
                <param name="method" type="select" label="prediction method">
                    <option value="Bepipred" selected="true">Bepipred</option>
                    <option value="Chou-Fasman">Chou-Fasman</option>
                    <option value="Emini">Emini</option>
                    <option value="Karplus-Schulz">Karplus-Schulz</option>
                    <option value="Kolaskar-Tongaonkar">Kolaskar-Tongaonkar</option>
                    <option value="Parker">Parker</option>
                </param>
                <param name="window_size" type="integer" value="" optional="true" min="1" label="window_size" help="window_size should be less than the sequence length, and less than 8 for Karplus-Schulz method"/>
            </when>
        </conditional>
        <conditional name="sequence">
           <param name="seqsrc" type="select" label="Peptide sequences">
               <option value="fasta">Fasta file</option>
               <option value="tabular">From tabular</option>
               <option value="entry"></option>
           </param>
           <when value="fasta">
               <param name="seq_fasta" type="data" format="fasta" label="Peptide Sequence Fasta"/>
           </when>
           <when value="tabular">
               <param name="seq_tsv" type="data" format="tabular" label="Peptide Sequence Tabular"/>
               <param name="pep_col" label="Select column with peptides" type="data_column" numerical="false" data_ref="seq_tsv" />
               <param name="id_col" label="Select column with name" type="data_column" numerical="false" data_ref="seq_tsv" optional="true"/>
           </when>
           <when value="entry">
               <param name="seq_text" type="text" label="Peptide Sequence"/>
           </when>
        </conditional>

    </inputs>
    <outputs>
        <!--
        <data name="output" format="tabular"/>
        -->
        <data name="output" format="tabular" label="IEDB ${prediction.tool} ${prediction.method}"/>
        <data name="output2" format="tabular" label="IEDB ${prediction.tool} ${prediction.method} residue scores" from_work_dir="iedb_results2">
            <filter>prediction['method'].startswith('Bepipred')</filter>
        </data> 
    </outputs>
    <tests>
        <!-- test1 -->
        <test> 
            <conditional name="prediction">
                <param name="tool" value="mhci"/>
                <param name="method" value="recommended"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="entry"/>
                    <param name="allele_text" value="HLA-A*01:01,9"/>
                </conditional>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="entry"/>
                <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="LYNTVATLY" />
                </assert_contents>
            </output>
        </test>
        <!-- test2 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="mhci"/>
                <param name="method" value="recommended"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="history"/>
                    <param name="allele_file" ftype="tabular" value="alleles.tsv"/>
                </conditional>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="fasta"/>
                <param name="seq_fasta" ftype="fasta" value="seqs.fa"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="peptide1" />
                    <has_text text="HKVPRRLLK" />
                </assert_contents>
            </output>
        </test>
        <!-- test3 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="mhci"/>
                <param name="method" value="recommended"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="history"/>
                    <param name="allele_file" ftype="tabular" value="alleles.tsv"/>
                </conditional>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="tabular"/>
                <param name="seq_tsv" ftype="tabular" value="seqs.tsv"/>
                <param name="pep_col" value="3"/>
                <param name="id_col" value="1"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="peptide1" />
                    <has_text text="HKVPRRLLK" />
                </assert_contents>
            </output>
        </test>
        <!-- test4 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="mhcii"/>
                <param name="method" value="recommended"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="entry"/>
                    <param name="allele_text" value="DPA1*01/DPB1*04:01"/>
                </conditional>
                <param name="lengths" value="asis"/>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="entry"/>
                <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="LYNTVATLY" />
                </assert_contents>
            </output>
        </test>
        <!-- test5 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="processing"/>
                <param name="method" value="recommended"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="entry"/>
                    <param name="allele_text" value="HLA-A*01:01,8 HLA-A*02:01,9"/>
                </conditional>
                <param name="proteasome" value="constitutive"/>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="entry"/>
                <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="LYNTVATLY" />
                </assert_contents>
            </output>
        </test>
        <!-- test6 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="mhcnp"/>
                <param name="method" value="mhcnp"/>
                <conditional name="alleles">
                    <param name="allelesrc" value="entry"/>
                    <param name="allele_text" value="HLA-A*02:01,9"/>
                </conditional>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="entry"/>
                <param name="seq_text" value="SLYNTVATLYCVHQRIDV"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="LYNTVATLY" />
                </assert_contents>
            </output>
        </test>
        <!-- test7 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="bcell"/>
                <param name="method" value="Emini"/>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="entry"/>
                <param name="seq_text" value="VLSEGEWQLVLHVWAKVEADVAGHGQDILIRLFKSHPETLEKFDRFKHLKTE"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="VLSEGE" />
                </assert_contents>
            </output>
        </test>
        <!-- test8 -->
        <test>
            <conditional name="prediction">
                <param name="tool" value="bcell"/>
                <param name="method" value="Bepipred"/>
                <param name="window_size" value="9"/>
            </conditional>
            <conditional name="sequence">
                <param name="seqsrc" value="fasta"/>
                <param name="seq_fasta" ftype="fasta" value="bcell.fa"/>
            </conditional>
            <output name="output">
                <assert_contents>
                    <has_text text="ADVAGH" />
                </assert_contents>
            </output>
        </test>

    </tests>
    <help><![CDATA[
The IEDB is a free resource, funded by a contract from the National Institute of Allergy and Infectious Diseases. It offers easy searching of experimental data characterizing antibody and T cell epitopes studied in humans, non-human primates, and other animal species. 

This tool retrieves epitope binding information about input peptide sequences by using the RESTful web services provided by IEDB.  
The webservices are described at:  http://tools.immuneepitope.org/main/tools-api/
That page also describes how to retrieve the available HLA alleles for class of epitope binding.

**INPUTS**

  peptide sequences from a fasta file or a column in a tabular file

  HLA alleles either entered as text or one per line in a text file


**OUTPUTS**
  
  A tabular file containing the results returned from the IEDB web service

**Typical Workflow for Human MHC I Binding Prediction** 

The RNAseq data for the subject would be used for:

  - HLA prediction by seq2HLA
  - Novel Antigen Prediction by a variety of workflows to generate a Antigen peptide fasta 


.. image:: $PATH_TO_IMAGES/IEDB_Workflow_QueryTabular.png
   :width: 584
   :height: 430

.. note:: The seq2HLA ClassI.HLAgenotype4digits output needs to be converted for IEDB alleles.

The seq2HLA ClassI.HLAgenotype4digits output:

.. image:: $PATH_TO_IMAGES/seq2HLA_ClassI.HLAgenotype4digits.png
   :width: 285
   :height: 77

Needs to be converted into IEDB formatted alleles:

.. image:: $PATH_TO_IMAGES/IEDB_formatted_alleles.png
   :width: 74
   :height: 81

In the workflow above QueryTabular tool converts the alleles:

  - Filter Dataset Input

    * skip leading lines - *skip lines:* 1
    * select columns - *columns:* 2,4
    * regex replace value in column - *column:* 1  *regex pattern:* ^(\\w+[*]\\d\\d:\\d\\d\\d?).*$  *replacement expression:* HLA-\\1
    * regex replace value in column - *column:* 2  *regex pattern:* ^(\\w+[*]\\d\\d:\\d\\d\\d?).*$  *replacement expression:* HLA-\\1

  - SQL Query to generate tabular output

    * SELECT c1 FROM t1 UNION SELECT c2 FROM t1


The IEDB formatting can also be performed by TextProcessing tools:

.. image:: $PATH_TO_IMAGES/TextProcessingConversion.png
   :width: 608
   :height: 87

The TextProcessing steps to convert the alleles:

  - Remove beginning -  removes the header line
  - Replace Text - picks Allele 1 and Allele 2 from each line and reformats each on a separate line 

    * *Find pattern:* ^.*\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\t.*\\t([a-zA-Z]+[*][0-9]{2}:[0-9]{2,3}).*\\t.*$
    * *Replace with:* HLA-\\1\\nHLA-\\2

  - Unique - remove duplicates


    ]]></help>
    <citations>
       <citation type="doi">10.1093/nar/gku938</citation>
    </citations>
</tool>