changeset 0:62e370d81cd5 draft default tip

Uploaded
author anton
date Tue, 26 Aug 2014 13:14:33 -0400
parents
children
files bamtools-filter.xml complex-filters.png multiple-filters.png rule.png single-filter.png test-data/bamtools-input1.bam test-data/bamtools-test1.bam tool_dependencies.xml
diffstat 8 files changed, 334 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamtools-filter.xml	Tue Aug 26 13:14:33 2014 -0400
@@ -0,0 +1,328 @@
+<tool id="bamFilter" name="bamFilter" version="0.0.1">
+  <description>Filter BAM files on a variety of attributes</description>
+  <requirements>
+    <requirement type="package" version="2.3.0_2d7685d2ae">bamtools</requirement>
+  </requirements>
+  <command>
+
+    cat $script_file > $out_file2;
+    
+    #for $bam_count, $input_bam in enumerate( $input_bams ):
+        ln -s "${input_bam.input_bam}" "localbam_${bam_count}.bam" &amp;&amp;
+        ln -s "${input_bam.input_bam.metadata.bam_index}" "localbam_${bam_count}.bam.bai" &amp;&amp;
+    #end for
+    
+    bamtools
+    filter
+    -script $script_file
+    
+    #for $bam_count, $input_bam in enumerate( $input_bams ):
+        -in "localbam_${bam_count}.bam"
+    #end for
+    -out $out_file1
+    
+  </command>
+
+  <inputs>
+    <repeat name="input_bams" title="BAM dataset(s) to filter" min="1">
+          <param name="input_bam" type="data" format="bam" label="BAM dataset" />
+    </repeat>
+    <repeat name="conditions" title="Condition" min="1">
+      <repeat name="filters" title="Filter" min="1">
+        <conditional name="bam_property">
+          <param name="bam_property_selector" type="select" label="Select BAM property to filter on">
+            <option value="alignmentFlag"/>
+            <option value="cigar"/>
+            <option value="insertSize"/>
+            <option value="isDuplicate"/>
+            <option value="isFailedQC"/>
+            <option value="isFirstMate"/>
+            <option value="isMapped"/>
+            <option value="isMateMapped"/>
+            <option value="isMateReverseStrand"/>
+            <option value="isPaired"/>
+            <option value="isPrimaryAlignment"/>
+            <option value="isProperPair"/>
+            <option value="isReverseStrand"/>
+            <option value="isSecondMate"/>
+            <option selected="True" value="mapQuality"/>
+            <option value="matePosition"/>
+            <option value="mateReference"/>
+            <option value="name"/>
+            <option value="position"/>
+            <option value="queryBases"/>
+            <option value="reference"/>
+            <option value="tag"/>
+          </param>
+          <!-- would be fanstastic to have AND and OR constructs in when statements -->
+          <when value="alignmentFlag">
+            <param name="bam_property_value" type="integer" value="3" label="Filter on this alignment flag" help="Default (3) is for a paired read mapped in a proper pair"/>
+          </when>
+          <when value="cigar">
+            <param name="bam_property_value" type="text" size="10" value="101M" label="Filter on this CIGAR string" help="Default (101M) is for 101 continuously matched bases"/>
+          </when>
+          <when value="insertSize">
+            <param name="bam_property_value" type="text" size="10" value=">=250" label="Filter on inster size" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select pairs with inster size above 250 nt use &quot;>=250&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="isDuplicate">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads makwed as duplicates" help="Checked = Read IS Duplicate, Empty = Read is NOT Duplicate" />
+          </when>
+          <when value="isFailedQC">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads failing QC" help="Checked = Failed QC, Empty = Passed QC"/>
+          </when>
+          <when value="isFirstMate">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select first mate in a read pair" help="Checked = is first mate, Empty = is NOT first mate"/>
+          </when>
+          <when value="isMapped">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Selected mapped reads" help="Checked = Mapped, Empty = NOT mapped"/>
+          </when>
+          <when value="isMateMapped">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mapped mate" help="Checked = Mate IS mapped Empty = Mate is NOT mapped"/>
+          </when>
+          <when value="isMateReverseStrand">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads with mate on the reverse strand" help="Checked = Mate IS on reverse strand, Empty = Mate is NOT on the reverse strand"/>
+          </when>
+          <when value="isPaired">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select paired reads" help="Checked = Read IS paired, Empty = Read is NOT paired"/>
+          </when>
+          <when value="isPrimaryAlignment">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select BAM records for primary alignments" help="Checked = Alignment IS primary, Empty = Alignment is NOT primary"/>
+          </when>
+          <when value="isProperPair">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select properly paired reads" help="Checked = Read IS in proper pair, Empty = Read is NOT in the proper pair"/>
+          </when>
+          <when value="isReverseStrand">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select reads in the reverse strand only" help="Checked = Read IS on the reverse strand, Empty = Read is NOT on the reverse strand"/>
+          </when>
+          <when value="isSecondMate">
+            <param name="bam_property_value" type="boolean" truevalue="true" falsevalue="false" label="Select second mate in a read pair" help="Checked = Read IS second mate, Empty = Read is NOT second mate"/>
+          </when>
+          <when value="mapQuality">
+            <param name="bam_property_value" type="text" value="20" label="Filter on read mapping quality (phred scale)" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mapping quality of at least 30 use &quot;>=30&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="matePosition">
+            <param name="bam_property_value" type="text" value="1000000" label="Filter on the position of the mate" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads with mate (second end) mapping after position 1,000,000 use &quot;&gt;1000000&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="mateReference">
+            <param name="bam_property_value" type="text" value="chr22" label="Filter on reference name for the mate" help="You can use = and ! (not) in your expression. E.g., to select reads with mates mapping to chrM use &quot;chr22&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="name">
+            <param name="bam_property_value" type="text" label="Filter on read name" help="You can use = and ! (not) in your expression.">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="position">
+            <param name="bam_property_value" type="text" value="500000" label="Filter on the position of the read" help="You can use &gt;, &lt;, =, and ! (not) in your expression. E.g., to select reads mapping after position 5,000 use &quot;&gt;5000&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="queryBases">
+            <param name="bam_property_value" type="text" value="ttagggttagg" label="Filter on a sequence motif" help="You can use ! (not) in your expression">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="reference">
+            <param name="bam_property_value" type="text" value="chr22" label="Filter on the reference name for the read" help="You can use ! (not) in your expression">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value="!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+          <when value="tag">
+            <param name="bam_property_value" type="text" value="NM:&gt;1" label="Filter on a particular tag" help="You can use &gt;, &lt;, =, and ! (not).
+            Tag name and its value must be separated by &quot;:&quot;. E.g., to obtain reads with at least one mismatch use &quot;NM:&gt;1&quot;">
+              <sanitizer invalid_char="">
+                <valid initial="string.letters,string.digits"><add value="&gt;"/><add value="&lt;"/><add value=":!="/></valid>
+              </sanitizer>
+            </param>
+          </when>
+        </conditional>
+      </repeat>
+    </repeat>
+    <conditional name="rule_configuration">
+      <param name="rules_selector" type="boolean" truevalue="true" falsevalue="false" label="Would you like to set rules?" help="Allows complex logical constructs. See Example 4 below." />
+      <when value="true">
+        <param name="rules" type="text" size="20" label="Enter rules here" help="This option can only be used with at least two conditions. Read help below (Example 4) to understand how it works." >
+          <sanitizer invalid_char="">
+            <valid initial="string.printable"/>
+          </sanitizer>
+        </param>
+      </when>
+    </conditional>
+  </inputs>
+
+  <configfiles>
+    <configfile name="script_file">
+      ##Sets up a json configfile for bamtools filter
+      ##If there is more than one condition prints brackets and "filters:"
+      #if len( $conditions ) > 1
+      {
+        "filters":
+        [
+      #end if
+      #for $i, $c in enumerate( $conditions, start=1 )
+          { "id": "$i",
+          #for $j, $s in enumerate( $c.filters, start=1 )
+          ##The if below takes care of the comma at the end of last condition within group       
+            #if $j != len( $c.filters)
+            "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}",
+            #else
+            "${s.bam_property.bam_property_selector}":"${s.bam_property.bam_property_value}"
+            #end if
+          #end for
+          ##The if below takes care of the comma at the end of last condition within group 
+          #if $i != len( $conditions )
+          },
+          #else
+          }
+          #end if      
+      #end for   
+      #if len( $conditions ) > 1
+      #if str( $rule_configuration.rules_selector ) == "True":
+        ],
+          "rule" : "${rule_configuration.rules}"
+        #else
+        ]
+        #end if
+      }
+      #end if
+    </configfile>
+  </configfiles>
+
+  <outputs>
+    <data format="txt" name="out_file2" />
+    <data format="bam" name="out_file1" />
+  </outputs>
+    <tests>
+        <test>
+            <param name="input_bam" ftype="bam" value="bamtools-input1.bam"/>
+            <param name="bam_property_selector" value="mapQuality"/>
+            <param name="bam_property_value" value=">20"/>
+            <output name="out_file1" file="bamtools-test1.bam" ftype="bam"/>        
+        </test>
+    </tests>
+<help>
+**What is does**
+
+BAMTools filter is a very powerful utility to perform complex filtering of BAM files. It is based on BAMtools suite of tools by Derek Barnett (https://github.com/pezmaster31/bamtools).
+
+-----
+
+**How it works**
+
+The tool use logic relies on the three concepts: (1) input BAM, (2) groups, and (3) filters.
+
+*Input BAM(s)*
+
+The input BAM is self-explanatory. This is the dataset you will be filtering. The tool can accept just one or multiple BAM files. To filter on multiple BAMs just add them by clicking **Add new BAM dataset(s) to filter**
+
+*Conditions and Filters*
+
+Conditions for filtering BAM files can be arranged in **Groups and Filters**. While it can be confusing at first this is what gives ultimate power to this tools. So try to look at the examples we are supplying below.
+
+-----
+
+**Example 1. Using a single filter**
+
+When filtering on a single condition there is no need to worry about filters and conditions. Just choose a filter from the **Select BAM property to filter on:** dropdown and enter a value (or click a checkbox for binary filters).
+For example, for retaining reads with mapping quality of at least 20 one would set the tool interface as shown below:
+
+.. image:: images/simple-filter.png
+
+-----
+
+**Example 2. Using multiple filters**
+
+Now suppose one needs to extract reads that (1) have mapping quality of at least 20, (2) contain at least 1 mismatch, and (3) are mapping onto forward strand only.
+To do so we will use three filters as shown below (multiple filters are added to the interface by clicking on the **Add new Filter** button):
+
+.. image:: images/multiple-filters.png
+
+In this case (you can see that the three filters are grouped within a single Condition - **Condition 1**) the filter too use logical **AND** to perform filtering.
+In other words only reads that (1) have mapping quality of at least 20 **AND** (2) contain at least 1 mismatch **AND** are mapping onto forward strand will be returned in this example.
+
+-----
+
+**Example 3. Complex filtering with multiple conditions**
+
+Suppose now you would like to select **either** reads that (**1**) have (*1.1*) no mismatches and (*1.2*) are on the forward strand **OR** (**2**) reads that have (*2.1*) 
+at least one mismatch and (*2.2*) are on the reverse strand. In this scenario we have to set up two conditions: (**1**) and (**2**) each with two filters: *1.1* and *1.2* as well as *2.1* and *2.2*. 
+The following screenshot expalins how this can be done:
+
+.. image:: images/complex-filters.png
+
+-----
+
+**Example 4. Even more complex filtering with Rules**
+
+In the above example we have used two conditions (Condition 1 and Condition 2). Using multiple conditions allows to combine them and a variety of ways to enable even more powerful filtering.
+For example, suppose get all reads that (**1**) do NOT map to mitochondria and either (**2**) have mapping quality over 20, or (**3**) are in properly mapped pairs. The logical rule to enable such
+filtering will look like this::
+
+ !(1) &amp; (2 | 3)
+ 
+Here, numbers 1, 2, and 3 represent conditions. The following screenshot illustrates how to do this in Galaxy:
+
+.. image:: images/rule.png
+
+There are three conditions here, each with a single filter. A text entry area that can be opened by clicking on the **Would you like to set rules?** checkbox enables you to enter a rule.
+Here numbers correspond to numbers of conditions as they are shown in the interface. E.g., 1 corresponds to condition 1, 2 to condition 2 and so on... In human language this means::
+
+ NOT condition 1 AND (condition 2 OR condition 3)
+
+-----
+
+**JSON script file**
+
+This tool produces two outputs. One of the them is a BAM file containing filtered reads. The other is a JSONified script. It can help you to see how your instructions are sent to BAMTools.
+For instance, the example 4 looks like this in the JSON form::
+
+       {
+        "filters":
+        [
+          { "id": "1",
+            "tag":"NM:=0",
+            "isReverseStrand":"false"
+          },
+          { "id": "2",
+            "tag":"NM:>0",
+            "isReverseStrand":"true"
+          }
+        ]
+      }
+    
+    
+
+-----
+
+**More information**
+
+.. class:: infomark
+
+Additional information about BAMtools can be found at https://github.com/pezmaster31/bamtools/wiki
+
+
+</help>
+</tool>
Binary file complex-filters.png has changed
Binary file multiple-filters.png has changed
Binary file rule.png has changed
Binary file single-filter.png has changed
Binary file test-data/bamtools-input1.bam has changed
Binary file test-data/bamtools-test1.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Aug 26 13:14:33 2014 -0400
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="bamtools" version="2.3.0_2d7685d2ae">
+        <repository changeset_revision="9e50b79bc32b" name="package_bamtools" owner="anton" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>
\ No newline at end of file