diff bam_filter.xml @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 8187a729d9f4
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bam_filter.xml	Wed Nov 11 13:04:07 2015 -0500
@@ -0,0 +1,231 @@
+<tool id="ngsutils_bam_filter" name="BAM filter" version="@WRAPPER_VERSION@.0">
+    <description>Removes reads from a BAM file based on criteria</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <expand macro="stdio" />
+    <expand macro="version" />
+    <command><![CDATA[
+    ## If the tool is executed with no filtering option,
+    ## the default parameters simply copy over the input file
+    if grep -q "\w" ${parameters};
+    then
+        $__tool_directory__/filter.py
+        $infile
+        $outfile
+        `cat ${parameters}`;
+    else
+        cp $infile $outfile;
+    fi
+]]>
+    </command>
+    <configfiles>
+        <configfile name="parameters">
+<![CDATA[
+        #if $minlen:
+            -minlen $minlen
+        #end if
+        #if $maxlen
+            -maxlen $maxlen
+        #end if
+        $mapped
+        $unmapped
+        $properpair
+        $noproperpair
+        #if $mask:
+            -mask "${mask}"
+        #end if
+        #if int($uniq) > -1:
+            -uniq
+            #if int($uniq) > 0:
+                $uniq
+            #end if
+        #end if
+        $uniq_start
+        #if $mismatch:
+            -mismatch $mismatch
+        #end if
+        $nosecondary
+        $noqcfail
+        $nopcrdup
+        #if $excludebed:
+            -excludebed "${excludebed}" $ignore_strand
+        #end if
+        #if $includebed:
+            -includebed "${includebed}" $ignore_strand
+        #end if
+        #if $includeref:
+            -includeref "${includeref}"
+        #end if
+        #if $excluderef:
+            -excluderef "${excluderef}"
+        #end if
+        #if $maximum_mismatch_ratio
+            -maximum_mismatch_ratio $maximum_mismatch_ratio
+        #end if
+    ]]>
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="infile" type="data" format="bam" label="Select BAM dataset" />
+        <param argument="-minlen" type="integer" value="" optional="True" min="0"
+            label="Remove reads that are smaller than"
+            help="in bp"/>
+        <param argument="-maxlen" type="integer" value="" optional="True" min="0"
+            label="Remove reads that are larger than"
+            help="in bp"/>
+        <param argument="-mapped" truevalue="-mapped" type="boolean" falsevalue=""
+            label="Keep only mapped reads"
+            help="" />
+        <param argument="-unmapped" truevalue="-unmapped" type="boolean" falsevalue=""
+            label="Keep only unmapped reads"
+            help="" />
+        <param argument="-properpair" truevalue="-properpair" type="boolean" falsevalue=""
+            label="Keep only properly paired reads"
+            help="both mapped, correct orientation, flag set in BAM" />
+        <param argument="-noproperpair" truevalue="-noproperpair" type="boolean" falsevalue=""
+            label="Discard properly paired reads"
+            help="" />
+        <param argument="-mask" type="text" value="" optional="True" label="Remove reads that match the mask"
+            help="e.g. 0x400, 0x2" />
+
+        <param argument="-uniq" type="integer" value="-1" optional="True" min="-1"
+            label="Remove reads that have the same sequence"
+            help="up to the Nth nucleotide starting from the 5prime end. -1 means do not filter, 0 means check along the entire read,
+                10 would make filter reads that are unique over the first 10 nucleotides."/>
+
+        <param argument="-uniq_start" truevalue="-uniq_start" type="boolean" falsevalue=""
+            label="Remove reads that start at the same position"
+            help="Use only for low-coverage samples" />
+
+        <param argument="-mismatch" type="integer" value="" optional="True" min="0"
+            label="Remove reads with that many mismatches"
+            help="Indels always counts as 1 regardless of length. Requires NM tag to be set."/>
+
+        <param argument="-nosecondary" truevalue="-nosecondary" type="boolean" falsevalue=""
+            label="Remove secondary alignment reads"
+            help="Remove reads flagged with 0x100" />
+        <param argument="-noqcfail" truevalue="-noqcfail" type="boolean" falsevalue=""
+            label="Remove reads that do not pass the quality control"
+            help="Remove reads flagged with 0x200" />
+        <param argument="-nopcrdup" truevalue="-nopcrdup" type="boolean" falsevalue=""
+            label="Remove reads that are marked as PCR dupicates "
+            help="Remove reads flagged with 0x400" />
+
+        <param argument="-excludebed" type="data" optional="True" format="bed" label="Remove reads that are in any of the regions" />
+        <param argument="-includebed" type="data" optional="True" format="bed" label="Remove reads that are NOT any of the regions" />
+        <param name="ignore_strand" truevalue="nostrand" type="boolean" falsevalue=""
+            label="Strand information from BED file is ignored. Affects -excludebed and -includebed."
+            help="" />
+
+        <param argument="-includeref" type="text" value="" optional="True" label="Exclude reads NOT mapped to a reference"
+            help="" />
+        <param argument="-excluderef" type="text" value="" optional="True" label="Exclude reads mapped to a particular reference"
+            help="e.g. chrM, or _dup chromosomes" />
+
+        <param argument="-maximum_mismatch_ratio" type="float" value="" optional="True" min="0.0" max="1.0"
+            label="Filter by maximum mismatch ratio"
+            help="fraction of length"/>
+
+    </inputs>
+    <outputs>
+        <data format="bam" name="outfile" />
+    </outputs>
+    <tests>
+        <test>
+            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
+            <param name="minlen" value="100"/>
+            <param name="maximum_mismatch_ratio" value="0.02"/>
+            <output name="outfile" file="ngsutils_bam_filter_result1.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
+            <param name="minlen" value="250"/>
+            <param name="properpair" value="True"/>
+            <output name="outfile" file="ngsutils_bam_filter_result2.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
+            <param name="minlen" value="100"/>
+            <param name="nosecondary" value="True"/>
+            <param name="noqcfail" value="True"/>
+            <param name="nopcrdup" value="True"/>
+            <output name="outfile" file="ngsutils_bam_filter_result3.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
+            <output name="outfile" file="ngsutils_bam_filter_input1.bam" ftype="bam" />
+        </test>
+        <test>
+            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
+            <param name="mask" value="0x40"/>
+            <output name="outfile" file="ngsutils_bam_filter_result4.bam" ftype="bam" />
+        </test>
+    </tests>
+    <help><![CDATA[
+Removes reads from a BAM file based on criteria.
+
+Given a BAM file, this tool will discard reads that did not meet the selected filtering criteria
+The output is another BAM file with the reads not matching the criteria removed.
+
+Note: this does not adjust tag values reflecting any filtering. (for example:
+      if a read mapped to two locations (IH:i:2), and one was removed by
+      filtering, the IH:i tag would still read IH:i:2).
+
+Currently, the available filters are:
+
++--------------------------------+-------------------------------------------------+
+| Agument                        | Description                                     |
++================================+=================================================+
+| -minlen val                    | Remove reads that are smaller than {val}        |
++--------------------------------+-------------------------------------------------+
+| -maxlen val                    | Remove reads that are larger than {val}         |
++--------------------------------+-------------------------------------------------+
+| -mapped                        | Keep only mapped reads                          |
++--------------------------------+-------------------------------------------------+
+| -unmapped                      | Keep only unmapped reads                        |
++--------------------------------+-------------------------------------------------+
+| -properpair                    | Keep only properly paired reads (both mapped,   |
+|                                | correct orientation, flag set in BAM)           |
++--------------------------------+-------------------------------------------------+
+| -noproperpair                  | Keep only not-properly paired reads             |
++--------------------------------+-------------------------------------------------+
+| -mask bitmask                  | Remove reads that match the mask (e.g. 0x400)   |
++--------------------------------+-------------------------------------------------+
+| -uniq {length}                 | Remove reads that are have the same sequence    |
+|                                | Note: BAM file should be sorted                 |
+|                                | (up to an optional length)                      |
++--------------------------------+-------------------------------------------------+
+| -uniq_start                    | Remove reads that start at the same position    |
+|                                | Note: BAM file should be sorted                 |
+|                                | (Use only for low-coverage samples)             |
++--------------------------------+-------------------------------------------------+
+|-mismatch num                   | Number of mismatches or indels                  |
+|                                | indel always counts as 1 regardless of length   |
+|                                | (requires NM tag in reads)                      |
++--------------------------------+-------------------------------------------------+
+|-nosecondary                    | Remove reads that have the 0x100 flag set       |
++--------------------------------+-------------------------------------------------+
+|-noqcfail                       | Remove reads that have the 0x200 flag set       |
++--------------------------------+-------------------------------------------------+
+|-nopcrdup                       | Remove reads that have the 0x400 flag set       |
++--------------------------------+-------------------------------------------------+
+|-excludebed file.bed {nostrand} | Remove reads that are in any of the regions     |
+|                                | from the given BED file. If 'nostrand' is given,|
+|                                | strand information from the BED file is ignored.|
++--------------------------------+-------------------------------------------------+
+|-includebed file.bed {nostrand} | Remove reads that are NOT any of the regions    |
+|                                | from the given BED file. If 'nostrand' is given,|
+|                                | strand information from the BED file is ignored.|
+|                                | Note: If this is a large dataset, use           |
+|                                | "bamutils extract" instead.                     |
++--------------------------------+-------------------------------------------------+
+| -includeref refname            | Exclude reads NOT mapped to a reference         |
++--------------------------------+-------------------------------------------------+
+| -excluderef refname            | Exclude reads mapped to a particular reference  |
+|                                | (e.g. chrM, or _dup chromosomes)                |
++--------------------------------+-------------------------------------------------+
+]]>
+    </help>
+</tool>