Mercurial > repos > iuc > optitype

<tool id="optitype" name="OptiType" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@">
    <description>HLA genotyping predictions from NGS data</description>
    <macros>
        <token name="@TOOL_VERSION@">1.3.5</token>
        <token name="@VERSION_SUFFIX@">0</token>
    </macros>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">optitype</requirement>
    </requirements>
    <command detect_errors="aggressive">
<![CDATA[
export MPLCONFIGDIR=\$TEMP &&
#set $fastqs = []
#if str( $fastq_input.fastq_input_selector ) == "paired":
    ln -s '${fastq_input.fastq_input1}' reads_1.fastq
    && ln -s '${fastq_input.fastq_input2}' reads_2.fastq
    #set $fastqs = ['reads_1.fastq','reads_2.fastq']
#elif str( $fastq_input.fastq_input_selector ) == "paired_collection":
    ln -s '${fastq_input.fastq_input1.forward}' reads_1.fastq
    && ln -s '${fastq_input.fastq_input1.reverse}' reads_2.fastq
    #set $fastqs = ['reads_1.fastq','reads_2.fastq']
#elif str( $fastq_input.fastq_input_selector ) == "single":
    ln -s '${fastq_input.fastq_input1}' reads.fastq
    #set $fastqs = ['reads.fastq']
#end if
&& RAZERS3=`which razers3`
&& sed "s#path_to_razers3#\$RAZERS3#" '$optitype_config' | sed "s/threads=16/threads=\${GALAXY_SLOTS}/" > config.ini
#set $input_fq = ' '.join($fastqs)
&& OptiTypePipeline.py
$read_type --input ${' '.join($fastqs)}
#if str($beta) != '':
 --beta $beta
#end if
#if str($enumerations) != '':
 --enumerate $enumerations
#end if
--config "`pwd`/config.ini"
--outdir outdir
&& cp outdir/*/*_coverage_plot.pdf '$coverage_plot'
&& cp outdir/*/*_result.tsv '$result'
]]>
    </command>
    <configfiles>
        <configfile name="optitype_config"><![CDATA[
[mapping]

# Absolute path to RazerS3 binary, and number of threads to use for mapping

razers3=path_to_razers3
threads=16

[ilp]

# A Pyomo-supported ILP solver. The solver must be globally accessible in the
# environment OptiType is run, so make sure to include it in PATH.
# Note: this is NOT a path to the solver binary, but a keyword argument for
# Pyomo. Examples: glpk, cplex, cbc.

solver=$solver
threads=1

[behavior]

# tempdir=/path/to/tempdir  # we may enable this setting later. Not used now.

# Delete intermediate bam files produced by RazerS3 after OptiType finished
# loading them. If you plan to re-analyze your samples with different settings
# disabling this option can be a time-saver, as you'll be able to pass the bam
# files to OptiType directly as input and spare the expensive read mapping
# step.

deletebam=true

# In paired-end mode one might want to use reads with just one mapped end (e.g.,
# the other end falls outside the reference region). This setting allows the
# user to keep them with an optionally reduced weight. A value of 0 means they
# are discarded for typing, 0.2 means single reads are "worth" 20% of paired
# reads, and a value of 1 means they are treated as valuable as properly mapped
# read pairs. Note: unpaired reads will be reported on the result coverage plots
# for completeness, regardless of this setting.

unpaired_weight=$unpaired_weight

# We call a read pair discordant if its two ends best-map to two disjoint sets
# of alleles. Such reads can be either omitted or either of their ends treated
# as unpaired hits. Note: discordant read pairs are reported on the coverage
# plots as unpaired reads, regardless of this setting.

use_discordant=$use_discordant
        ]]></configfile>
    </configfiles>
    <inputs>
        <conditional name="fastq_input">
            <param name="fastq_input_selector" type="select" label="Single or Paired-end reads" help="Select between paired and single end data">
                <option value="paired">Paired</option>
                <option value="single">Single</option>
                <option value="paired_collection">Paired Collection</option>
            </param>
            <when value="paired">
                <param name="fastq_input1" type="data" format="fastqsanger" label="Select first set of reads" help="Specify dataset with forward reads"/>
                <param name="fastq_input2" type="data" format="fastqsanger" label="Select second set of reads" help="Specify dataset with reverse reads"/>
            </when>
            <when value="single">
                <param name="fastq_input1" type="data" format="fastqsanger" label="Select fastq dataset" help="Specify dataset with single reads"/>
            </when>
            <when value="paired_collection">
                <param name="fastq_input1" format="fastqsanger" type="data_collection" collection_type="paired" label="Select a paired collection" help="See help section for an explanation of dataset collections"/>
            </when>
        </conditional>
        <param name="read_type" type="select" label="Nucleotide Type" help="">
            <option value="--rna">RNA</option>
            <option value="--dna">DNA</option>
        </param>
        <param name="beta" type="float" value="" min="0.0" max="0.1" optional="true" label="homozygosity beta" help="The beta value for for homozygosity detection (Leave blank for default: 0.009)"/>
        <param name="enumerations" type="integer" value="" min="1" max="5" optional="true" label="Enumerations" help="The number of enumerations (Leave blank for default: 1)"/>
        <param name="solver" type="select" label="ILP solver" help="">
            <option value="glpk">glpk</option>
            <!-- Not currently in OptiType conda package
            <option value="cbc">cbc</option>
            <option value="cplex">cplex</option>
            -->
        </param>
        <param name="unpaired_weight" type="float" value="0" min="0.0" max="1.0" label="unpaired_weight">
            <help><![CDATA[
In paired-end mode one might want to use reads with just one mapped end (e.g.,
the other end falls outside the reference region). This setting allows the
user to keep them with an optionally reduced weight. A value of 0 means they
are discarded for typing, 0.2 means single reads are "worth" 20% of paired
reads, and a value of 1 means they are treated as valuable as properly mapped
read pairs. Note: unpaired reads will be reported on the result coverage plots
for completeness, regardless of this setting. ]]>
            </help>
        </param>
        <param name="use_discordant" type="boolean" truevalue="true" falsevalue="false" checked="false" label="use_discordant">
            <help><![CDATA[
We call a read pair discordant if its two ends best-map to two disjoint sets
of alleles. Such reads can be either omitted or either of their ends treated
as unpaired hits. Note: discordant read pairs are reported on the coverage
plots as unpaired reads, regardless of this setting. ]]>
            </help>
        </param>
    </inputs>
    <outputs>
        <data name="coverage_plot" format="pdf" label="${tool.name} on ${on_string} coverage_plot.pdf"/>
        <data name="result" format="tabular" label="${tool.name} on ${on_string} result.tsv">
            <actions>
                <action name="column_names" type="metadata" default="Solution,A1,A2,B1,B2,C1,C2,Reads,Objective"/>
            </actions>
        </data>
    </outputs>
    <tests>
        <test>
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="paired"/>
                <param name="fastq_input1" ftype="fastqsanger" value="rna/CRC_81_N_1_fished.fastq"/>
                <param name="fastq_input2" ftype="fastqsanger" value="rna/CRC_81_N_2_fished.fastq"/>
            </conditional>
            <param name="read_type" value="--rna"/>
            <param name="solver" value="glpk"/>
            <output name="result">
                <assert_contents>
                    <has_text_matching expression="0\tA\*31:01\tA\*68:01\tB\*40:01\tB\*51:01\tC\*03:04\tC\*15:02\t13\d+.\d+\t12\d+.\d+"/>
                </assert_contents>
            </output>
        </test>
        <test>
            <conditional name="fastq_input">
                <param name="fastq_input_selector" value="paired"/>
                <param name="fastq_input1" ftype="fastqsanger" value="exome/NA11995_SRR766010_1_fished.fastq"/>
                <param name="fastq_input2" ftype="fastqsanger" value="exome/NA11995_SRR766010_2_fished.fastq"/>
            </conditional>
            <param name="read_type" value="--dna"/>
            <param name="solver" value="glpk"/>
            <param name="enumerations" value="2"/>
            <param name="unpaired_weight" value="0.2"/>
            <output name="result">
                <assert_contents>
                    <has_text_matching expression="0\tA\*01:01\tA\*01:01\tB\*08:01\tB\*57:01\tC\*06:02\tC\*07:01\t3\d+.\d+\t3\d+.\d+"/>
                    <has_text_matching expression="1\tA\*01:01\tA\*01:01\tB\*08:01\tB\*08:01\tC\*06:02\tC\*07:01\t3\d+.\d+\t3\d+.\d+"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help>
<![CDATA[
**OptiType**
============

OptiType_ is a novel HLA genotyping algorithm based on integer linear programming, capable of producing accurate 4-digit HLA genotyping predictions from NGS data by simultaneously selecting all major and minor HLA Class I alleles.

**INPUTS**

     RNA or DNA sequences in fastq format.

**OUTPUTS**

    result.tsv     A TAB-separated file of HLA genotyping predictions:

    ::

		A1	A2	B1	B2	C1	C2	Reads	Objective
	0	A*31:01	A*68:01	B*40:01	B*51:01	C*15:02	C*03:04	132	128.43599999999998


    coverage_plot.pdf    Plots of coverage of HLA genotyping predictions

.. _OptiType: https://github.com/FRED-2/OptiType
]]>
    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btu548</citation>
    </citations>
</tool>