view tb_profiler_profile.xml @ 19:51dd22484644 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/tb-profiler commit 0d6df32b6249e9f0ac4bb2bbd006c6215bbddfc8
author iuc
date Thu, 20 Jun 2024 06:40:19 +0000
parents 4839e590cb26
children 8cb134b8c1a5
line wrap: on
line source

<tool id="tb_profiler_profile" name="TB-Profiler Profile" version="@TOOL_VERSION@+galaxy1" profile="20.09" license="AGPL-3.0-or-later">
    <description>Infer strain types and drug resistance markers from sequences</description>
    <macros>
        <token name="@TOOL_VERSION@">6.2.1</token>
    </macros>
    <xrefs>
        <xref type="bio.tools">tb-profiler</xref>
    </xrefs>
    <requirements>
        <requirement type="package" version="@TOOL_VERSION@">tb-profiler</requirement>
    </requirements>
    <command detect_errors="exit_code"><![CDATA[
        #if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq", "single_fastq")
            #if str($fastq_or_bam.input_select.value) == "paired_fastq"
                #set r1_ext = $fastq_or_bam.read1.extension
                #set r2_ext = $fastq_or_bam.read2.extension
                ln -s '$fastq_or_bam.read1' fastq_r1.'$r1_ext' &&
                ln -s '$fastq_or_bam.read2' fastq_r2.'$r2_ext' &&
            #else if str($fastq_or_bam.input_select.value) == "single_fastq"
                #set r1_ext = $fastq_or_bam.fastq.extension
                ln -s '$fastq_or_bam.fastq' fastq_r1.'$r1_ext' &&
            #else if str($fastq_or_bam.input_select.value) == "paired_collection_fastq"
                #set r1_ext = $fastq_or_bam.fastq_collection.forward.extension
                #set r2_ext = $fastq_or_bam.fastq_collection.reverse.extension
                ln -s '$fastq_or_bam.fastq_collection.forward' fastq_r1.'$r1_ext' &&
                ln -s '$fastq_or_bam.fastq_collection.reverse' fastq_r2.'$r2_ext' &&
            #end if
        #else if str($fastq_or_bam.input_select.value) == "bam"
            ln -s '$fastq_or_bam.bam_input' input.bam &&
        #end if

        tb-profiler profile
            --platform '${platform.value}'
            ${spoligotype}
        #if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq", "single_fastq")
            -1 fastq_r1.'$r1_ext'
        #end if
        #if str($fastq_or_bam.input_select.value) in ("paired_fastq", "paired_collection_fastq")
            -2 fastq_r2.'$r1_ext'
        #else if str($fastq_or_bam.input_select.value) == "bam"
            --bam input.bam
        #end if

            --threads "\${GALAXY_SLOTS:-1}"
        #if $advanced.options == 'yes'
            --mapper '${advanced.mapper}'
            --caller '${advanced.caller}'
            --depth '${advanced.vf.min_depth_hard},${advanced.vf.min_depth_soft}'
            --af '${advanced.vf.min_af_hard},${advanced.vf.min_af_soft}'
            --strand '${advanced.vf.min_read_hard},${advanced.vf.min_read_soft}'
            --sv-depth '${advanced.vf.min_sv_depth_hard},${advanced.vf.min_sv_depth_soft}'
            --sv-af '${advanced.vf.min_sv_af_hard},${advanced.vf.min_sv_af_soft}'
            --sv-len '${advanced.vf.max_sv_len_hard},${advanced.vf.max_sv_len_soft}'
            ${advanced.suspect}
            ${advanced.no_trim}
            ${advanced.no_coverage_qc}
            ${advanced.no_samclip}
            ${advanced.no_delly}
        #end if

        #if $output_format == "pdf"
            --pdf
        #else if $output_format == "txt"
            --txt
        #end if
        && mv results/tbprofiler.results.json $results_json
        #if str($fastq_or_bam.input_select) != "bam"
            && mv bam/tbprofiler.bam '${output_bam}'
        #end if
        && bcftools view -Ov -o'${output_vcf}' vcf/tbprofiler.targets.vcf.gz
        #if $output_format == "pdf"
            && mv results/tbprofiler.results.pdf '${output_pdf}'
        #else if $output_format == "txt"
            && mv results/tbprofiler.results.txt '${output_txt}'
        #end if 

    ]]>    </command>
    <inputs>
        <param name="platform" type="select" label="Platform">
            <option value="illumina" selected="true">Illumina</option>
            <option value="nanopore">Nanopore</option>
            <option value="pacbio">PacBio</option>
        </param>
        <conditional name="fastq_or_bam">
            <param name="input_select" type="select" label="Input File Type">
                <option value="paired_fastq">Paired Fastq</option>
                <option value="paired_collection_fastq">Paired Collection Fastq</option>
                <option value="single_fastq">Single Fastq</option>
                <option value="bam">BAM</option>
            </param>
            <when value="paired_fastq">
                <param name="read1" type="data" format="fastq" label="Read1" help="First read file (default: None)"/>
                <param name="read2" type="data" format="fastq" optional="true" label="Read2" help="Second read file (default: None)"/>
            </when>
            <when value="paired_collection_fastq">
                <param label="Reads (collection)" name="fastq_collection" type="data_collection" collection_type="paired" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
            </when>
            <when value="single_fastq">
                <param label="Reads" name="fastq" type="data" format="fastq,fastq.gz,fastqsanger,fastqsanger.gz" />
            </when>
            <when value="bam">
                <param name="bam_input" type="data" format="bam" label="Bam" help="Warning!!!: The BAM files must have been created using the ensembl version of the genome."/>
            </when>
        </conditional>
        <param argument="--spoligotype" type="boolean" label="Perform in-silico spoligotyping" checked="false" truevalue="--spoligotype" falsevalue="" 
            help="Uses spacers of CRISPR region to call a spoligotype."/>
        <param name="output_format" label="Output format" type="select">
            <option value="txt" selected="true">Text</option>
            <option value="pdf">PDF</option>
        </param>
        <conditional name="advanced">
            <param label="Select advanced options" type="select" name="options">
                <option value="yes">Yes</option>
                <option value="no" selected="true">No</option>
            </param>
            <when value="no">
            </when>
            <when value="yes">
                <param argument="mapper" label="Mapper" type="select" help="Mapping tools to use (default: bwa)">
                    <option value="bwa" selected="true">bwa</option>
                    <option value="minimap2">minimap2</option>
                    <option value="bowtie2">bowtie2</option>
                    <option value="bwa-mem2">bwa-mem2</option>
                </param>
                <param argument="caller" label="Variant caller" help="Variant calling tool to use" type="select">
                    <option value="freebayes" selected="true">freebayes</option>
                    <option value="bcftools">bcftools</option>
                    <option value="gatk">gatk</option>
                    <option value="pilon">pilon</option>
                    <option value="lofreq">lofreq</option>
                </param>
                <section name="vf" title="Variant Filters">
                    <param name="min_depth_hard" argument="--depth" label="Min Depth (hard cutoff)" type="integer" value="0" min="0"
                        help="Minimum depth required to call variant. Bases with depth below this cutoff will be marked as missing (default: 0)"/>
                    <param name="min_depth_soft" argument="--depth" label="Min Depth (soft cutoff)" type="integer" value="10" min="0"
                        help="Minimum depth required to call variant. Variants at positions lower than this depth and above the hard cutoff will be marked as QC fail (default: 10)"/>
                    <param name="min_af_hard" argument="--af" type="float" value="0" label="Minimum allele frequency to call variants (hard cutoff)" min="0.0" max="1.0" 
                        help="Minimum allele frequency to call variants (default: 0)" />
                    <param name="min_af_soft" argument="--af" type="float" value="0.1" label="Minimum allele frequency to call variants (soft cutoff)" min="0.0" max="1.0"
                        help="Variants with an allele frequency lower than this (and higher than the hard cutoff) will be marked as QC fail (default 0.1)" />
                    <param name="min_read_hard" argument="--strand" type="integer" value="0" label="Mininum read number per strand (hard cutoff)" min="0"
                        help="Minimum read number per strand to call variants (default: 0)" />
                    <param name="min_read_soft" argument="--strand" type="integer" value="3" label="Mininum read number per strand (soft cutoff)" min="0"
                        help="Minimum read number per strand to report variants (default: 3). Variants with number of reads on each strand below this (and above the hard cutoff) will be marked as QC fail" />
                    <param name="min_sv_depth_hard" argument="--sv-depth" type="integer" value="0" label="Min Depth for SV (hard cutoff)" min="0"
                        help="Minimum depth required to call structural variants (default: 0)" />
                    <param name="min_sv_depth_soft" argument="--sv-depth" type="integer" value="10" label="Min Depth for SV (soft cutoff)" min="0"
                        help="Minimum depth required to call structural variants. Variants at positions lower than this depth and above the hard cutoff will be marked as QC fail" />
                    <param name="min_sv_af_hard" argument="--sv-af" type="float" value="0.5" label="Min SV allele frequency (hard cutoff)" min="0.0" max="1.0"
                        help="Minimum allele frequency to call structural variants (default: 0.5)" />
                    <param name="min_sv_af_soft" argument="--sv-af" type="float" value="0.9" label="Min SV allele frequency (soft cutoff)" min="0.0" max="1.0"
                        help="Variants with an allele frequency lower than this (and higher than the hard cutoff) will be marked as QC fail (default 0.9)" />
                    <param name="max_sv_len_hard" argument="--sv-len" type="integer" value="100000" label="Max SV length (hard cutoff)" min="0"
                        help="Maximum length (length) of structural variants to call (default: 100000)" />
                    <param name="max_sv_len_soft" argument="--sv-len" type="integer" value="50000" label="Max SV length (soft cutoff)" min="0"
                        help="Structural variants with length higher than this (and lower than the hard cutoff) will be marked as QC fail (default: 50000)" />
                </section>
                <param argument="--suspect" label="Use ML predictions of resistance from the SUSPECT tool suite" type="boolean" checked="False" truevalue="--suspect" falsevalue="" 
                    help="The SUSPECT tools offer ML-based predictions of RIF, PZA and BDQ resistance. Note that this uses a web service thus requires an Internet connection."/>
                <param argument="--no_trim" label="Do not trim reads using trimmomatic" checked="false" type="boolean" truevalue="--no_trim" falsevalue="" />
                <param argument="--no_coverage_qc" label="Don't collect flagstats" type="boolean" truevalue="--no_coverage_qc" falsevalue="" />
                <param argument="--no_samclip" label="Don't removed clipped reads from variant calling" type="boolean" truevalue="--no_samclip" falsevalue="" />
                <param argument="--no_delly" label="Don't run delly for SV calling" type="boolean" truevalue="--no_delly" falsevalue="" />
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="results_json" format="json" from_work_dir="results/tbprofiler.results.json" label="${tool.name} on ${on_string}: Results.json"/>
        <data format="vcf" name="output_vcf" label="${tool.name} VCF on ${on_string}" />
        <data format="bam" name="output_bam" label="${tool.name} BAM on ${on_string}">
            <filter>fastq_or_bam['input_select'] != 'bam'</filter>
        </data>
        <data format="pdf" name="output_pdf" label="${tool.name} PDF report on ${on_string}">
            <filter>output_format == 'pdf'</filter>
        </data>
        <data format="txt" name="output_txt" label="${tool.name} report on ${on_string}">
            <filter>output_format == 'txt'</filter>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="4">
            <param name="input_select" value="single_fastq"/>
            <param name="fastq" ftype="fastq.gz" value="rif_resistant.fastq.gz" />
            <param name="output_format" value="txt" />
            <param name="platform" value="illumina" />
            <param name="options" value="no" />
            <output name="output_txt">
                <assert_contents>
                    <has_line line="Drug-resistance: RR-TB" />
                    <has_line line="lineage2.2.2&#009;1.000&#009;East-Asian (Beijing)&#009;RD105;RD207" />
                    <has_line line="Rifampicin&#009;R&#009;rpoB p.Asp435Val (1.00)" />
                    <has_line line="761110&#009;Rv0667&#009;rpoB&#009;missense_variant&#009;p.Asp435Val&#009;1.000&#009;rifampicin&#009;Assoc w R" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="3">
            <param name="input_select" value="bam"/>
            <param name="bam_input" ftype="bam" value="rif_resistant.bam" />
            <param name="output_format" value="txt" />
            <param name="platform" value="illumina" />
            <param name="options" value="no" />
            <output name="output_txt">
                <assert_contents>
                    <has_line line="Drug-resistance: RR-TB" />
                    <has_line line="lineage2.2.2&#009;1.000&#009;East-Asian (Beijing)&#009;RD105;RD207" />
                    <has_line line="Rifampicin&#009;R&#009;rpoB p.Asp435Val (1.00)" />
                    <has_line line="761110&#009;Rv0667&#009;rpoB&#009;missense_variant&#009;p.Asp435Val&#009;1.000&#009;rifampicin&#009;Assoc w R" />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="input_select" value="single_fastq"/>
            <param name="fastq" ftype="fastq.gz" value="rif_resistant.fastq.gz" />
            <param name="output_format" value="txt" />
            <param name="platform" value="illumina" />
            <conditional name="advanced">
                <param name="options" value="yes" />
                <section name="vf">
                    <param name="min_depth_hard" value="40" />
                    <param name="min_depth_soft" value="50" />
                </section>
            </conditional>
            <output name="output_txt">
                <assert_contents>
                    <has_line line="lineage2.2.2&#009;1.000&#009;East-Asian (Beijing)&#009;RD105;RD207" />
                    <has_line line="761110&#009;Rv0667&#009;rpoB&#009;missense_variant&#009;p.Asp435Val&#009;1.000&#009;rifampicin&#009;Assoc w R" />
                </assert_contents>
            </output>
            <output name="results_json">
                <assert_contents>
                    <has_text text='"filter": "soft_fail",' />
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="4">
            <param name="input_select" value="single_fastq"/>
            <param name="fastq" ftype="fastq.gz" value="rpoB_and_crispr.fastq.gz" />
            <param name="output_format" value="txt" />
            <param name="platform" value="illumina" />
            <param name="spoligotype" value="true" />
            <param name="options" value="no" />
            <output name="output_txt">
                <assert_contents>
                    <has_line line="Octal: 000000000000771" />
                    <has_line line="Drug-resistance: RR-TB" />
                    <has_line line="Rifampicin&#009;R&#009;rpoB p.Asp435Val (1.00)" />
                    <has_line line="761110&#009;Rv0667&#009;rpoB&#009;missense_variant&#009;p.Asp435Val&#009;1.000&#009;rifampicin&#009;Assoc w R" />
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[    
Summary
=======

The pipeline aligns reads to the H37Rv reference using BWA, bowtie2 or minimap2 and then calls variants 
(using bcftools, GATK4 or freebayes). These variants are then compared to a drug-resistance database. 
TB-Profiler also predicts the number of reads supporting drug resistance variants as an insight into
hetero-resistance (not applicable for MinION data).

Produces a JSON output file by default.

In the Advanced options, you can select the mapper and variant caller to use, as well as set a number of filtering parameters.
Each of these typically has a "hard" and a "soft" cutoff. If a variant value is below the "hard" cutoff it is discarded, if
it is between the "hard" and "soft" values it is removed from the final predictions but added to the "qc_fail_variants" section
of the output.

One of the advanced options is to use the SUSPECT tool suite to predict resistance to `rifampicin (RIF)`_, `pyrazinamide (PZA)`_
and `bedaquiline (BDQ)`_.

.. _rifampicin (RIF): https://www.nature.com/articles/s41598-020-74648-y
.. _pyrazinamide (PZA): https://www.nature.com/articles/s41598-020-58635-x
.. _bedaquiline (BDQ): https://journals.plos.org/plosone/article?id=10.1371/journal.pone.0217169
    ]]>    </help>
    <citations>
        <citation type="doi">10.1186/s13073-019-0650-x</citation>
    </citations>
</tool>