view ivar_trim.xml @ 19:a250929be21b draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ivar/ commit 32fbe8a14173afe0b39f1483afaba958dc6cd027
author iuc
date Fri, 21 Jun 2024 15:21:34 +0000
parents 28e4bcbc86e7
children
line wrap: on
line source

<tool id="ivar_trim" name="ivar trim" version="@TOOL_VERSION@+galaxy0" profile="@PROFILE@">
    <description>Trim reads in aligned BAM</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements"/>
    <expand macro="version_command"/>
    <command detect_errors="exit_code"><![CDATA[
        #if $primer.source == 'history'
            cp '$primer.input_bed' bed.bed &&
        #else
            cp '$primer.cached_bed.fields.path' bed.bed &&
        #end if
        python '$__tool_directory__/sanitize_bed.py' bed.bed &&
        #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute'
            #if $amplicons.filter_by == 'yes_compute':
                python '$__tool_directory__/write_amplicon_info_file.py' bed.bed amplicon_info_raw.tsv &&
            #else
                ln -s '$amplicons.amplicon_info' amplicon_info_raw.tsv &&
            #end if
            python '$__tool_directory__/prepare_amplicon_info.py' bed.bed amplicon_info_raw.tsv amplicon_info.tsv &&
        #end if
        ln -s '$input_bam' sorted.bam &&
        ln -s '${input_bam.metadata.bam_index}' sorted.bam.bai &&

        ivar trim
        -i sorted.bam
        -b bed.bed
        #if $amplicons.filter_by == 'yes' or $amplicons.filter_by == 'yes_compute'
            -f amplicon_info.tsv
        #end if
        -x $primer_pos_wiggle
        $inc_primers
        #if $trimmed_length.filter == 'off':
            -m 0
        #elif $trimmed_length.filter == 'auto':
            -m -1
        #else:
            -m $trimmed_length.min_len
        #end if
        -q $min_qual
        -s $window_width
        | samtools sort -@ \${GALAXY_SLOTS:-1} -T "\${TMPDIR:-.}" -o trimmed.sorted.bam -
    ]]></command>
    <inputs>
        <param name="input_bam" argument="-i" type="data" format="bam" label="Bam file" help="Aligned reads, to trim primers and quality"/>
        <conditional name="primer">
            <param label="Source of primer information" name="source" type="select">
                <option value="history" selected="true">History</option>
                <option value="cached">Built-in</option>
            </param>
            <when value="history">
                <param name="input_bed" argument="-b" type="data" format="bed" label="BED file with primer sequences and positions"/>
            </when>
            <when value="cached">
                <param name="cached_bed" type="select" label="Primer scheme name" help="Select primer scheme bed file from a list">
                    <options from_data_table="primer_scheme_bedfiles">
                        <filter type="sort_by" column="1" />
                    <validator type="no_options" message="No primer schemes are available" />
                    </options>
                </param>
            </when>
        </conditional>
        <conditional name="amplicons">
            <param name="filter_by" type="select"
            label="Filter reads based on amplicon info"
            help="When you select Yes, reads that are not fully contained in any amplicon will be dropped before primer trimming. This option is currently marked as [Experimental] in ivar, but nevertheless recommended here. Info on amplicons can be computed from suitable primer BED files (see tool help below) or provided by the user. ">
                <option value="">No, allow reads to extend beyond amplicon boundaries</option>
                <option value="yes_compute">Yes, drop reads that extend beyond amplicon boundaries</option>
                <option value="yes">Yes, drop reads that extend beyond amplicon boundaries and use my amplicon info file</option>
            </param>
            <when value="yes_compute" />
            <when value="yes">
                <param name="amplicon_info" argument="-f" type="data" format="tabular" />
            </when>
            <when value="" />
        </conditional>
        <param name="primer_pos_wiggle" argument="-x" type="integer" min="0" value="0"
        label="Wiggling room for read ends relative to primer binding sites"
        help="Reads that occur at the specified offset positions relative to primer positions (as annotated in the primer information dataset) will also be trimmed (default: 0)" />
        <param name="inc_primers" argument="-e" type="boolean" truevalue="-e" falsevalue="" checked="false" label="Include reads not ending in any primer binding sites?"/>
        <param name="min_qual" argument="-q" type="integer" min="0" max="255" value="20" label="Minimum quality threshold for sliding window to pass"/>
        <param name="window_width" argument="-s" type="integer" min="0" max="255" value="4" label="Width of sliding window"/>
        <conditional name="trimmed_length">
            <param name="filter" type="select" label="Require a minimum length for reads to retain them after any trimming?" help="The default automatic setting will determine the threshold as 50% of the mean length of the first 1000 raw input reads, which may or may not be adequate for your data. You can always make the outcome of filtering more predictable by specifying the threshold explicitly. You can also opt to retain reads independently of their trimmed length, but typically this only makes sense if you are performing additional read filtering with additional tools in your analysis pipeline.">
                <option value="off">No, keep reads independently of their trimmed length (-m 0)</option>
                <option value="auto" selected="true">Yes, and determine required length threshold automatically from input (-m -1)</option>
                <option value="custom">Yes, and provide a custom threshold</option>
            </param>
            <when value="off" />
            <when value="auto" />
            <when value="custom">
                <param name="min_len" argument="-m" type="integer" min="1" value="30" label="Minimum trimmed length threshold" help="Reads with a trimmed length shorter than this value will be discarded."/>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="output_bam" format="bam" label="${tool.name} on ${on_string} Trimmed bam" from_work_dir="trimmed.sorted.bam"/>
    </outputs>
    <tests>
        <!-- #1: SARS-Cov data-->
        <test>
            <param name="input_bam" value="covid19/PC00101P_sub.sorted.bam" />
            <param name="input_bed" value="covid19/ARTIC-V1.bed" />
            <param name="inc_primers" value="true" />
            <conditional name="trimmed_length">
                <param name="filter" value="custom" />
                <param name="min_len" value="30" />
            </conditional>
            <output name="output_bam" file="covid19/PC00101P_sub.sorted.bam" compare="sim_size" delta="300000"/>
        </test>
        <test>
            <!-- Test with primer bed file that needs to be sanitized -->
            <param name="input_bam" value="covid19/PC00101P_sub.sorted.bam" />
            <param name="input_bed" value="covid19/ARTIC-V1-bad.bed" />
            <param name="inc_primers" value="true" />
            <conditional name="trimmed_length">
                <param name="filter" value="custom" />
                <param name="min_len" value="30" />
            </conditional>
            <output name="output_bam" file="covid19/PC00101P_sub.sorted.bam" compare="sim_size" delta="300000"/>
        </test>
        <!-- #1: Zika data-->
        <test>
            <conditional name="primer">
                <param name="source" value="history" />
                <param name="input_bed" value="zika/db/zika_primers.bed" />
            </conditional>
            <param name="input_bam" value="zika/Z52_a.sorted.bam" />
            <conditional name="trimmed_length">
                <param name="filter" value="custom" />
                <param name="min_len" value="30" />
            </conditional>
            <output name="output_bam" file="zika/Z52_a.trimmed.sorted.bam" compare="sim_size" delta="100000"/>
        </test>
        <test>
            <conditional name="primer">
                <param name="source" value="history" />
                <param name="input_bed" value="zika/db/zika_primers.bed" />
            </conditional>
            <param name="input_bam" value="zika/Z52_b.sorted.bam" />
            <conditional name="trimmed_length">
                <param name="filter" value="auto" />
            </conditional>
            <output name="output_bam" ftype="bam">
                <assert_contents>
                    <has_size value="3373924" delta="1000"/>
                </assert_contents>
            </output>
            <assert_stderr>
                <has_text text="Found 68 primers in BED file"/>
                <has_text text="reads were quality trimmed below the minimum length of 125 bp and were not written to file"/>
                <has_text text="reads that started outside of primer regions were not written to file"/>
            </assert_stderr>
        </test>
        <test>
            <conditional name="primer">
                <param name="source" value="cached" />
                <param name="cached_bed" value="SARS-CoV-2-ARTICv1" />
            </conditional>
            <param name="input_bam" value="sars-cov-2/sars_cov2_untrimmed.bam" ftype="bam" />
            <conditional name="trimmed_length">
                <param name="filter" value="custom" />
                <param name="min_len" value="30" />
            </conditional>
            <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
        </test>
        <test>
            <conditional name="primer">
                <param name="source" value="cached" />
                <param name="cached_bed" value="SARS-CoV-2-ARTICv1" />
            </conditional>
            <conditional name="amplicons">
                <param name="filter_by" value="yes_compute" />
            </conditional>
            <param name="input_bam" value="sars-cov-2/sars_cov2_untrimmed.bam" ftype="bam" />
            <conditional name="trimmed_length">
                <param name="filter" value="custom" />
                <param name="min_len" value="30" />
            </conditional>
            <assert_command>
                <has_text text="write_amplicon_info_file" />
            </assert_command>
            <output name="output_bam" file="sars-cov-2/sars_cov2_trimmed.bam" compare="sim_size" delta="100000"/>
        </test>
    </tests>
    <help><![CDATA[
iVar uses primer positions supplied in a BED file to soft clip primer
sequences from an aligned and sorted BAM file. Following this, the reads are
trimmed further based on a quality threshold.

**Primer and Amplicon info**

The tool requires information about primers and their binding sites in 6-column
BED format. The information from this file is used to decide whether any mapped
read in the BAM input ends with a primer sequence and should, thus, be
soft-clipped.

Optionally, the tool can also discard reads that do not fully map to within any
amplicon. Such reads are likely to be wet-lab or mapping artefacts and removing
them can increase variant calling precision. To calculate the extent of
expected amplicons the tool needs to know which primers work together to form
an amplicon. The tool can try to deduce this info from the names of the primers
found in the primer info dataset. This will require a primer naming scheme
following the regex pattern::

  .*_(?P<amplicon_number>\d+).*_(?P<primer_orientation>L(?:EFT)?|R(?:IGHT)?)

*i.e.*, the following schemes will work (and get parsed as):

- ``nCoV-2019_1_LEFT`` (forward primer of amplicon 1)

- ``400_2_out_R`` (reverse primer of amplicon 2)

- ``QIAseq_163-2_LEFT`` (forward primer of amplicon 163)

Alternatively, you can specify the amplicon information explicitly through a
dataset that lists the names of primers that together form any given amplicon.
In it, primer names (exactly matching those in the primer info dataset) need to
be TAB-separated with one line per amplicon.
If the primer scheme has more than two primers contributing to a given amplicon
(in schemes using alternate primers), you can (in this Galaxy tool only)
specify all of them on one line and the tool will calculate the maximum extent
of the amplicon.

**Quality trimming details and final length filtering**

To do the quality trimming, iVar uses a sliding window approach. The window
slides from the 5' end to the 3' end and if at any point the average base
quality in the window falls below the threshold, the remaining read is soft
clipped.

Finally, the trimmed length threshold gets applied if specified, and fully
trimmed surviving reads are written to the BAM output.

Documentation can be found at `<https://andersen-lab.github.io/ivar/html/manualpage.html>`_.
    ]]></help>
    <expand macro="citations" />
</tool>