view bam_filter.xml @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 8187a729d9f4
line wrap: on
line source

<tool id="ngsutils_bam_filter" name="BAM filter" version="@WRAPPER_VERSION@.0">
    <description>Removes reads from a BAM file based on criteria</description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="requirements" />
    <expand macro="stdio" />
    <expand macro="version" />
    <command><![CDATA[
    ## If the tool is executed with no filtering option,
    ## the default parameters simply copy over the input file
    if grep -q "\w" ${parameters};
    then
        $__tool_directory__/filter.py
        $infile
        $outfile
        `cat ${parameters}`;
    else
        cp $infile $outfile;
    fi
]]>
    </command>
    <configfiles>
        <configfile name="parameters">
<![CDATA[
        #if $minlen:
            -minlen $minlen
        #end if
        #if $maxlen
            -maxlen $maxlen
        #end if
        $mapped
        $unmapped
        $properpair
        $noproperpair
        #if $mask:
            -mask "${mask}"
        #end if
        #if int($uniq) > -1:
            -uniq
            #if int($uniq) > 0:
                $uniq
            #end if
        #end if
        $uniq_start
        #if $mismatch:
            -mismatch $mismatch
        #end if
        $nosecondary
        $noqcfail
        $nopcrdup
        #if $excludebed:
            -excludebed "${excludebed}" $ignore_strand
        #end if
        #if $includebed:
            -includebed "${includebed}" $ignore_strand
        #end if
        #if $includeref:
            -includeref "${includeref}"
        #end if
        #if $excluderef:
            -excluderef "${excluderef}"
        #end if
        #if $maximum_mismatch_ratio
            -maximum_mismatch_ratio $maximum_mismatch_ratio
        #end if
    ]]>
        </configfile>
    </configfiles>
    <inputs>
        <param name="infile" type="data" format="bam" label="Select BAM dataset" />
        <param argument="-minlen" type="integer" value="" optional="True" min="0"
            label="Remove reads that are smaller than"
            help="in bp"/>
        <param argument="-maxlen" type="integer" value="" optional="True" min="0"
            label="Remove reads that are larger than"
            help="in bp"/>
        <param argument="-mapped" truevalue="-mapped" type="boolean" falsevalue=""
            label="Keep only mapped reads"
            help="" />
        <param argument="-unmapped" truevalue="-unmapped" type="boolean" falsevalue=""
            label="Keep only unmapped reads"
            help="" />
        <param argument="-properpair" truevalue="-properpair" type="boolean" falsevalue=""
            label="Keep only properly paired reads"
            help="both mapped, correct orientation, flag set in BAM" />
        <param argument="-noproperpair" truevalue="-noproperpair" type="boolean" falsevalue=""
            label="Discard properly paired reads"
            help="" />
        <param argument="-mask" type="text" value="" optional="True" label="Remove reads that match the mask"
            help="e.g. 0x400, 0x2" />

        <param argument="-uniq" type="integer" value="-1" optional="True" min="-1"
            label="Remove reads that have the same sequence"
            help="up to the Nth nucleotide starting from the 5prime end. -1 means do not filter, 0 means check along the entire read,
                10 would make filter reads that are unique over the first 10 nucleotides."/>

        <param argument="-uniq_start" truevalue="-uniq_start" type="boolean" falsevalue=""
            label="Remove reads that start at the same position"
            help="Use only for low-coverage samples" />

        <param argument="-mismatch" type="integer" value="" optional="True" min="0"
            label="Remove reads with that many mismatches"
            help="Indels always counts as 1 regardless of length. Requires NM tag to be set."/>

        <param argument="-nosecondary" truevalue="-nosecondary" type="boolean" falsevalue=""
            label="Remove secondary alignment reads"
            help="Remove reads flagged with 0x100" />
        <param argument="-noqcfail" truevalue="-noqcfail" type="boolean" falsevalue=""
            label="Remove reads that do not pass the quality control"
            help="Remove reads flagged with 0x200" />
        <param argument="-nopcrdup" truevalue="-nopcrdup" type="boolean" falsevalue=""
            label="Remove reads that are marked as PCR dupicates "
            help="Remove reads flagged with 0x400" />

        <param argument="-excludebed" type="data" optional="True" format="bed" label="Remove reads that are in any of the regions" />
        <param argument="-includebed" type="data" optional="True" format="bed" label="Remove reads that are NOT any of the regions" />
        <param name="ignore_strand" truevalue="nostrand" type="boolean" falsevalue=""
            label="Strand information from BED file is ignored. Affects -excludebed and -includebed."
            help="" />

        <param argument="-includeref" type="text" value="" optional="True" label="Exclude reads NOT mapped to a reference"
            help="" />
        <param argument="-excluderef" type="text" value="" optional="True" label="Exclude reads mapped to a particular reference"
            help="e.g. chrM, or _dup chromosomes" />

        <param argument="-maximum_mismatch_ratio" type="float" value="" optional="True" min="0.0" max="1.0"
            label="Filter by maximum mismatch ratio"
            help="fraction of length"/>

    </inputs>
    <outputs>
        <data format="bam" name="outfile" />
    </outputs>
    <tests>
        <test>
            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
            <param name="minlen" value="100"/>
            <param name="maximum_mismatch_ratio" value="0.02"/>
            <output name="outfile" file="ngsutils_bam_filter_result1.bam" ftype="bam" />
        </test>
        <test>
            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
            <param name="minlen" value="250"/>
            <param name="properpair" value="True"/>
            <output name="outfile" file="ngsutils_bam_filter_result2.bam" ftype="bam" />
        </test>
        <test>
            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
            <param name="minlen" value="100"/>
            <param name="nosecondary" value="True"/>
            <param name="noqcfail" value="True"/>
            <param name="nopcrdup" value="True"/>
            <output name="outfile" file="ngsutils_bam_filter_result3.bam" ftype="bam" />
        </test>
        <test>
            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
            <output name="outfile" file="ngsutils_bam_filter_input1.bam" ftype="bam" />
        </test>
        <test>
            <param name="infile" ftype="bam" value="ngsutils_bam_filter_input1.bam"/>
            <param name="mask" value="0x40"/>
            <output name="outfile" file="ngsutils_bam_filter_result4.bam" ftype="bam" />
        </test>
    </tests>
    <help><![CDATA[
Removes reads from a BAM file based on criteria.

Given a BAM file, this tool will discard reads that did not meet the selected filtering criteria
The output is another BAM file with the reads not matching the criteria removed.

Note: this does not adjust tag values reflecting any filtering. (for example:
      if a read mapped to two locations (IH:i:2), and one was removed by
      filtering, the IH:i tag would still read IH:i:2).

Currently, the available filters are:

+--------------------------------+-------------------------------------------------+
| Agument                        | Description                                     |
+================================+=================================================+
| -minlen val                    | Remove reads that are smaller than {val}        |
+--------------------------------+-------------------------------------------------+
| -maxlen val                    | Remove reads that are larger than {val}         |
+--------------------------------+-------------------------------------------------+
| -mapped                        | Keep only mapped reads                          |
+--------------------------------+-------------------------------------------------+
| -unmapped                      | Keep only unmapped reads                        |
+--------------------------------+-------------------------------------------------+
| -properpair                    | Keep only properly paired reads (both mapped,   |
|                                | correct orientation, flag set in BAM)           |
+--------------------------------+-------------------------------------------------+
| -noproperpair                  | Keep only not-properly paired reads             |
+--------------------------------+-------------------------------------------------+
| -mask bitmask                  | Remove reads that match the mask (e.g. 0x400)   |
+--------------------------------+-------------------------------------------------+
| -uniq {length}                 | Remove reads that are have the same sequence    |
|                                | Note: BAM file should be sorted                 |
|                                | (up to an optional length)                      |
+--------------------------------+-------------------------------------------------+
| -uniq_start                    | Remove reads that start at the same position    |
|                                | Note: BAM file should be sorted                 |
|                                | (Use only for low-coverage samples)             |
+--------------------------------+-------------------------------------------------+
|-mismatch num                   | Number of mismatches or indels                  |
|                                | indel always counts as 1 regardless of length   |
|                                | (requires NM tag in reads)                      |
+--------------------------------+-------------------------------------------------+
|-nosecondary                    | Remove reads that have the 0x100 flag set       |
+--------------------------------+-------------------------------------------------+
|-noqcfail                       | Remove reads that have the 0x200 flag set       |
+--------------------------------+-------------------------------------------------+
|-nopcrdup                       | Remove reads that have the 0x400 flag set       |
+--------------------------------+-------------------------------------------------+
|-excludebed file.bed {nostrand} | Remove reads that are in any of the regions     |
|                                | from the given BED file. If 'nostrand' is given,|
|                                | strand information from the BED file is ignored.|
+--------------------------------+-------------------------------------------------+
|-includebed file.bed {nostrand} | Remove reads that are NOT any of the regions    |
|                                | from the given BED file. If 'nostrand' is given,|
|                                | strand information from the BED file is ignored.|
|                                | Note: If this is a large dataset, use           |
|                                | "bamutils extract" instead.                     |
+--------------------------------+-------------------------------------------------+
| -includeref refname            | Exclude reads NOT mapped to a reference         |
+--------------------------------+-------------------------------------------------+
| -excluderef refname            | Exclude reads mapped to a particular reference  |
|                                | (e.g. chrM, or _dup chromosomes)                |
+--------------------------------+-------------------------------------------------+
]]>
    </help>
</tool>