view sambamba.xml @ 3:de833cc76a8e draft default tip

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 1b6bff4ab8aaac8a247f603648f63bbadb02e1fe"
author artbio
date Thu, 30 Dec 2021 17:44:31 +0000
parents 7ad3484aa5db
children
line wrap: on
line source

<tool id="sambamba_sample_or_filter" name="Sample, Slice or Filter BAM" version="0.7.1+galaxy1">
    <description>
        on flags, fields, and tags using Sambamba
    </description>
    <requirements>
        <requirement type="package" version="0.7.1">sambamba</requirement>
    </requirements>
    <stdio>
        <exit_code range="1:" level="fatal" description="Error occured" />
    </stdio>
    <!-- <version_command>sambamba 2>&amp;1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> -->
    <command detect_errors="exit_code"><![CDATA[
        ln -s $input input.bam &&
        ln -s $input.metadata.bam_index input.bai &&

        #if $sambamba_options.selector == 'filter'
            sambamba view -h -t \${GALAXY_SLOTS:-4}
            #if $sambamba_options.query != '':
                --filter='$sambamba_options.query'
            #end if
                -f '$sambamba_options.format'
                -o $outfile input.bam $sambamba_options.region

        #else if $sambamba_options.selector == 'sample'
            sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$sambamba_options.format'
                 --subsampling-seed='$sambamba_options.seed'
                 -s '$sambamba_options.fraction'
                 -o '$outfile' input.bam
        #end if

        #if $sambamba_options.selector == 'slice'
            #if $sambamba_options.slice.slice_format == 'text'
                sambamba slice -o '$outfile' input.bam $sambamba_options.slice.region
            #else
                sambamba slice --regions $sambamba_options.slice.bed_file -o '$outfile' input.bam
            #end if
        #end if
    ]]></command>
    <inputs>
        <param name="input" type="data" format="bam" label="BAM or SAM input file(s)"/>
        <conditional name="sambamba_options">
            <param name="selector" type="select" label="Filter, Down-sample or slice alignments">
                <option value="sample">Down-sample bam or sam alignments</option>
                <option value="filter" selected="true">Filter bam or sam alignements</option>
                <option value="slice">Slice bam or sam alignements by chromosome or chromosome region</option>
            </param>
            <when value="filter">
                <param name="format" type="select" label="format of the tool output">
                    <option value="bam">BAM</option>
                    <option value="sam">SAM</option>
                </param>
                <param name="query" type="text" size="80">
                    <sanitizer invalid_char="X">
                        <valid initial="string.ascii_letters,string.digits, string.punctuation">
                            <add value=" " />
                        </valid>
                    </sanitizer>
                    <label>Filter expression</label>
                    <help>See below for query syntax.</help>
                </param>
                <param name="region" type="text" size="40" label="Region in format chr:beg-end">
                    <help>
                    Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
                    (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
                    (region between 1,000,000 and 2,000,000bp including the end points).
                    The coordinates are 1-based.
                    </help>
                </param>
            </when>

            <when value="slice">
                <conditional name="slice">
                    <param name="slice_format" type="select" label="specify in text field or a bed file">
                        <option value="text">Specify in the region field</option>
                        <option value="bed" selected="true">BED file</option>
                    </param>
                    <when value="bed">
                        <param name="bed_file" type="data" format="bed" label="BED file"
                               help="The coordinates are 0-based. NOTE that sambamba slice only outputs BAM-format datasets."/>
                    </when>
                    <when value="text">
                        <param name="region" type="text" size="40" label="Region(s) in space-separated format chr:start-end">
                            <help>
                            Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
                            (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
                            (region between 1,000,000 and 2,000,000bp including the end points).
                            The coordinates are 1-based. NOTE that sambamba slice only outputs
                            BAM-format datasets.
                            </help>
                        </param>
                    </when>
                </conditional>
            </when>

            <when value="sample">
                <param name="format" type="select" label="format of the tool output">
                    <option value="bam">BAM</option>
                    <option value="sam">SAM</option>
                </param>
                <param name="seed" type="integer" value="123" size="10">
                    <label>Seed value for randomisation</label>
                    <help>
                    Be careful at selecting different seed values if you
                    re-subsample a subsample output of this tool
                    </help>
                </param>
                <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling">
                    <help>
                    Use a real number between 0 and 1 to indicate the relative size of
                    the fraction you wish to retrieve
                    </help>
                </param>
            </when>
        </conditional>
    </inputs>
    <outputs>
        <data name="outfile" format="bam" label="Filter, slice or sample on ${on_string}">
            <change_format>
                <when input="sambamba_options['format']" value="sam" format="sam" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test>
            <param name="input" value="ex1_header.sam" ftype="sam" />
            <param name="selector" value="filter" />
            <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
            <param name="format" value="bam" />
            <param name="region" value="" />
            <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test>
            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
            <param name="selector" value="filter" />
            <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
            <param name="format" value="sam" />
            <param name="region" value="AL096846:1000-5000" />
            <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/>
        </test>
        <test>
            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
            <param name="selector" value="filter" />
            <param name="query" value='' />
            <param name="format" value="sam" />
            <param name="region" value="AL096846:1000-5000" />
            <output name="outfile" file="c1215_fixmate_region-filtered.sam" ftype="sam" lines_diff="2"/>
        </test>
        <test>
            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
            <param name="selector" value="slice" />
            <param name="slice_format" value="text" />
            <param name="query" value='' />
            <param name="region" value="AL096846:1000-2000 AL096846:4000-5000" />
            <output name="outfile" file="c1215_fixmate_region-sliced.bam" ftype="bam"/>
        </test>
        <test>
            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
            <param name="selector" value="slice" />
            <param name="slice_format" value="bed" />
            <param name="query" value='' />
            <param name="bed_file" value="regions.bed" />
            <output name="outfile" file="c1215_fixmate_region-sliced.bam" ftype="bam"/>
        </test>
         <test>
            <param name="input" value="ex1_header.sam" ftype="sam" />
            <param name="selector" value="sample" />
            <param name="seed" value="123" />
            <param name="fraction" value="0.1" />
            <param name="format" value="bam" />
            <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" lines_diff="2"/>
        </test>
        <test>
            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
            <param name="selector" value="sample" />
            <param name="seed" value="123" />
            <param name="fraction" value="0.1" />
            <param name="format" value="sam" />
            <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/>
        </test>
   </tests>
    <help>
Sambamba Filter Overview
========================

This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.


Filter Syntax 
=============

A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.

*Basic condition* is a one for a single record field, tag, or flag.

You can use ``==,`` ``!=,`` ``&gt;``, ``&lt;``, ``&gt;=``, ``&lt;=`` comparison operators for both integers and strings.

Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.

Examples of filter expressions
------------------------------

::

    mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
    read_name == 'abc\'def'

Basic conditions for flags
--------------------------

The following flag names are recognized:
  * paired
  * proper_pair
  * unmapped
  * mate_is_unmapped
  * reverse_strand
  * mate_is_reverse_strand
  * first_of_pair
  * second_of_pair
  * secondary_alignment
  * failed_quality_control
  * duplicate

Example
~~~~~~~

::

    not (unmapped or mate_is_unmapped) and first_of_pair

Basic conditions for fields
---------------------------

Conditions for integer and string fields are supported.

List of integer fields:
  * ref_id
  * position
  * mapping_quality
  * sequence_length
  * mate_ref_id
  * mate_position
  * template_length


List of string fields:
  * read_name
  * sequence
  * cigar


Example
~~~~~~~

::

    ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80

Basic conditions for tags
-------------------------

Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.

In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.

Example
~~~~~~~

::

    [RG] != null and [AM] == 37

Down-sampling 
=============

The tool is using the following sambamba command line for sampling:

::

    sambamba view -h -t &lt;number of Galaxy threads configured in job_conf.xml&gt; -f &lt;bam or sam&gt;
    --subsampling-seed=&lt;an integer&gt;
    -s &lt;a real number between 0 and 1&gt; -o &lt;bam or sam output&gt; input_file
    
Warnings
--------

The tool does not down-sample at a user given **number of lines**, because sambamba does not
expose this functionality. For tool performances, we decided not to add it in this wrapper.

If you down-sample a dataset that has been *already down-sampled* with this tool, it is
important that you choose **another seed** for randomisation. Otherwise, the new subsampling
was reported not to conform the indicated fraction.


.. _sambamba: http://github.com/lomereiter/sambamba

    </help>
    <citations>
        <citation type="doi">10.1093/bioinformatics/btv098</citation>
    </citations>
</tool>