diff sambamba.xml @ 1:6195f15d4541 draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/sambamba commit 516e8d55d6d45e6f2266805b78eb25a711621321"
author artbio
date Mon, 25 May 2020 17:10:17 -0400
parents
children 7ad3484aa5db
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sambamba.xml	Mon May 25 17:10:17 2020 -0400
@@ -0,0 +1,242 @@
+<tool id="sambamba_sample_or_filter" name="Sample or Filter BAM" version="0.6">
+    <description>
+        on flags, fields, and tags using Sambamba
+    </description>
+    <requirements>
+        <requirement type="package" version="0.7.1">sambamba</requirement>
+    </requirements>
+    <stdio>
+        <exit_code range="1:" level="fatal" description="Error occured" />
+    </stdio>
+    <!-- <version_command>sambamba 2>&amp;1 | grep "sambamba v" | sed 's/^sambamba v\(.*\)/\1/'</version_command> -->
+    <command detect_errors="exit_code"><![CDATA[
+        ln -s $input input.bam &&
+        ln -s $input.metadata.bam_index input.bai &&
+        #if $sambamba_options.selector == 'filter'
+            sambamba view -h -t \${GALAXY_SLOTS:-4}
+            #if $sambamba_options.query != '':
+                --filter='$sambamba_options.query'
+            #end if
+                -f '$format' -o $outfile input.bam $sambamba_options.region
+         #else
+             sambamba view -h -t \${GALAXY_SLOTS:-4} -f '$format'
+             --subsampling-seed='$sambamba_options.seed'
+             -s '$sambamba_options.fraction' -o '$outfile' input.bam
+         #end if
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="bam" label="BAM or SAM file to filter"/>
+        <param name="format" type="select" label="format of the tool output">
+            <option value="bam">BAM</option>
+            <option value="sam">SAM</option>
+        </param>
+        <conditional name="sambamba_options">
+            <param name="selector" type="select" label="Filter or Down-sample alignments">
+                <option value="sample">Down-sample bam or sam alignments</option>
+                <option value="filter" selected="true">Filter bam or sam alignements</option>
+            </param>
+            <when value="filter">
+                <param name="query" type="text" size="80">
+                    <sanitizer invalid_char="X">
+                        <valid initial="string.ascii_letters,string.digits, string.punctuation">
+                            <add value=" " />
+                        </valid>
+                    </sanitizer>
+                    <label>Filter expression</label>
+                    <help>See below for query syntax.</help>
+                </param>
+                <param name="region" type="text" size="40" label="Region in format chr:beg-end">
+                    <help>
+                    Regions can be specified as 'chr2' (the whole chr2), 'chr2:1000000'
+                    (region starting from 1,000,000bp) or 'chr2:1,000,000-2,000,000'
+                    (region between 1,000,000 and 2,000,000bp including the end points).
+                    The coordinates are 1-based.
+                    </help>
+                </param>
+            </when>
+            <when value="sample">
+                <param name="seed" type="integer" value="123" size="10">
+                    <label>Seed value for randomisation</label>
+                    <help>
+                    Be careful at selecting different seed values if you
+                    re-subsample a subsample output of this tool
+                    </help>
+                </param>
+                <param name="fraction" type="float" value="0.1" max="1" size="10" label="fraction to retrieve after subsampling">
+                    <help>
+                    Use a real number between 0 and 1 to indicate the relative size of
+                    the fraction you wish to retrieve
+                    </help>
+                </param>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="outfile" format="bam">
+            <change_format>
+                <when input="format" value="sam" format="sam" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input" value="ex1_header.sam" ftype="sam" />
+            <param name="selector" value="filter" />
+            <param name="query" value="[H0] == 1 and read_name =~ /^EAS51_62/" />
+            <param name="format" value="bam" />
+            <param name="region" value="" />
+            <output name="outfile" file="ex1_header_filtered.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
+            <param name="selector" value="filter" />
+            <param name="query" value="[MD] =~ /^\d+T\d+A\d+/ and first_of_pair" />
+            <param name="format" value="sam" />
+            <param name="region" value="AL096846:1000-5000" />
+            <output name="outfile" file="c1215_fixmate_filtered.sam" ftype="sam" lines_diff="2"/>
+        </test>
+        <test>
+            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
+            <param name="selector" value="filter" />
+            <param name="query" value='' />
+            <param name="format" value="sam" />
+            <param name="region" value="AL096846:1000-5000" />
+            <output name="outfile" file="c1215_fixmate_region-filtered.sam" ftype="sam" lines_diff="2"/>
+        </test>
+         <test>
+            <param name="input" value="ex1_header.sam" ftype="sam" />
+            <param name="selector" value="sample" />
+            <param name="seed" value="123" />
+            <param name="fraction" value="0.1" />
+            <param name="format" value="bam" />
+            <output name="outfile" file="ex1_header_sampled.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="input" value="c1215_fixmate.bam" ftype="bam" />
+            <param name="selector" value="sample" />
+            <param name="seed" value="123" />
+            <param name="fraction" value="0.1" />
+            <param name="format" value="sam" />
+            <output name="outfile" file="c1215_fixmate_sampled.sam" ftype="sam" lines_diff="2"/>
+        </test>
+   </tests>
+    <help>
+Sambamba Filter Overview
+========================
+
+This tool uses the sambamba_ ``view`` command to filter BAM/SAM on flags, fields, tags, and region. Input is SAM or BAM file.
+
+
+Filter Syntax 
+=============
+
+A *filter expression* is a number of *basic conditions* linked by ``and``, ``or``, ``not`` logical operators, and enclosed in parentheses where needed.
+
+*Basic condition* is a one for a single record field, tag, or flag.
+
+You can use ``==,`` ``!=,`` ``&gt;``, ``&lt;``, ``&gt;=``, ``&lt;=`` comparison operators for both integers and strings.
+
+Strings are delimited by single quotes, if you need a single quote inside a string, escape it with ``\\``.
+
+Examples of filter expressions
+------------------------------
+
+::
+
+    mapping_quality >= 30 and ([RG] =~ /^abcd/ or [NM] == 7)
+    read_name == 'abc\'def'
+
+Basic conditions for flags
+--------------------------
+
+The following flag names are recognized:
+  * paired
+  * proper_pair
+  * unmapped
+  * mate_is_unmapped
+  * reverse_strand
+  * mate_is_reverse_strand
+  * first_of_pair
+  * second_of_pair
+  * secondary_alignment
+  * failed_quality_control
+  * duplicate
+
+Example
+~~~~~~~
+
+::
+
+    not (unmapped or mate_is_unmapped) and first_of_pair
+
+Basic conditions for fields
+---------------------------
+
+Conditions for integer and string fields are supported.
+
+List of integer fields:
+  * ref_id
+  * position
+  * mapping_quality
+  * sequence_length
+  * mate_ref_id
+  * mate_position
+  * template_length
+
+
+List of string fields:
+  * read_name
+  * sequence
+  * cigar
+
+
+Example
+~~~~~~~
+
+::
+
+    ref_id == 3 and mapping_quality >= 50 and sequence_length >= 80
+
+Basic conditions for tags
+-------------------------
+
+Tags are denoted by their names in square brackets, for instance, ``[RG]`` or ``[Q2].`` They support conditions for both integers and strings, i.e. the tag must also hold value of the corresponding type.
+
+In order to do filtering based on the presence of a particular tag, you can use special ``null`` value.
+
+Example
+~~~~~~~
+
+::
+
+    [RG] != null and [AM] == 37
+
+Down-sampling 
+=============
+
+The tool is using the following sambamba command line for sampling:
+
+::
+
+    sambamba view -h -t &lt;number of Galaxy threads configured in job_conf.xml&gt; -f &lt;bam or sam&gt;
+    --subsampling-seed=&lt;an integer&gt;
+    -s &lt;a real number between 0 and 1&gt; -o &lt;bam or sam output&gt; input_file
+    
+Warnings
+--------
+
+The tool does not down-sample at a user given **number of lines**, because sambamba does not
+expose this functionality. For tool performances, we decided not to add it in this wrapper.
+
+If you down-sample a dataset that has been *already down-sampled* with this tool, it is
+important that you choose **another seed** for randomisation. Otherwise, the new subsampling
+was reported not to conform the indicated fraction.
+
+
+.. _sambamba: http://github.com/lomereiter/sambamba
+
+    </help>
+    <citations>
+        <citation type="doi">10.1093/bioinformatics/btv098</citation>
+    </citations>
+</tool>