Mercurial > repos > jjohnson > samtools_filter
changeset 0:5dcbce4f5971
Uploaded
author | jjohnson |
---|---|
date | Fri, 30 Mar 2012 16:36:54 -0400 |
parents | |
children | f2e4e81f3639 |
files | samtools_filter.xml |
diffstat | 1 files changed, 216 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/samtools_filter.xml Fri Mar 30 16:36:54 2012 -0400 @@ -0,0 +1,216 @@ +<tool id="samtools_filter" name="Filter SAM or BAM" version="1.1.0"> + <description>files on FLAG MAPQ RG LN or by region</description> + <requirements> + <requirement type="package">samtools</requirement> + </requirements> + <!-- + samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] <in.bam>|<in.sam> [region1 [...]] + Usage: samtools view [options] <in.bam>|<in.sam> [region1 [...]] + + Options: -b output BAM + -h print header for the SAM output + -H print header only (no alignments) + -S input is SAM + -u uncompressed BAM output (force -b) + -1 fast compression (force -b) + -x output FLAG in HEX (samtools-C specific) + -X output FLAG in string (samtools-C specific) + -c print only the count of matching records + -L FILE output alignments overlapping the input BED FILE [null] + -t FILE list of reference names and lengths (force -S) [null] + -T FILE reference sequence file (force -S) [null] + -o FILE output file name [stdout] + -R FILE list of read groups to be outputted [null] + -f INT required flag, 0 for unset [0] + -F INT filtering flag, 0 for unset [0] + -q INT minimum mapping quality [0] + -l STR only output reads in library STR [null] + -r STR only output reads in read group STR [null] + -? longer help + --> + <command> +##set up input files, regions requires input.bam and input.bai +#set $input = None +#if isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('bam').__class__): + #set $input = 'input.bam' + ln -s $input1 $input && + ln -s $input1.metadata.bam_index input.bai && +#elif isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('sam').__class__): + #set $input = 'input.sam' + ln -s $input1 $input && +#end if +samtools view -o "$output1" $header + #if $input1.datatype.file_ext == 'sam': + -S + #else + -b + #end if + #if $mapq.__str__ != '': + -q $mapq + #end if + #if $flag.filter.__str__ == 'yes': + #if $flag.reqBits.__str__ != 'None': + #set $reqs = $flag.reqBits.__str__.split(',') + #set $reqFlag = 0 + #for $xn in $reqs: + #set $reqFlag += int(xn,16) + #end for + -f $hex($reqFlag) + #end if + #if $flag.skipBits.__str__ != 'None': + #set $skips = $flag.skipBits.__str__.split(',') + #set $skipFlag = 0 + #for $xn in $skips: + #set $skipFlag += int(xn,16) + #end for + -F $hex($skipFlag) + #end if + #end if + #if $read_group.__str__.strip() != '': + -r $read_group + #end if + #if $library.__str__.strip() != '': + -l $library + #end if + #if $bed_file.__str__ != "None" and len($bed_file.__str__) > 0: + -L $bed_file + #end if + $input + #if $regions.__str__.strip() != '' and $input1.datatype.file_ext == 'bam': + $regions.__str__.strip() + #end if + ## need to redirect stderr message so galaxy does not think this failed + 2>&1 + </command> + <inputs> + <param name="input1" type="data" format="sam,bam" label="SAM or BAM File to Filter" /> + <param name="header" type="select" label="Header in output"> + <option value="-h">Include Header</option> + <option value="">Exclude Header</option> + <option value="-H">Only the Header</option> + </param> + <param name="mapq" type="integer" value="" optional="true" label="Minimum MAPQ quality score"> + <validator type="in_range" message="The MAPQ quality score can't be negative" min="0"/> + </param> + <conditional name="flag"> + <param name="filter" type="select" label="Filter on bitwise flag"> + <option value="no">no</option> + <option value="yes">yes</option> + </param> + <when value="no"/> + <when value="yes"> + <param name="reqBits" type="select" multiple="true" display="checkboxes" label="Only output alignments with all of these flag bits set" > + <option value="0x0001">Read is paired</option> + <option value="0x0002">Read is mapped in a proper pair</option> + <option value="0x0004">The read is unmapped</option> + <option value="0x0008">The mate is unmapped</option> + <option value="0x0010">Read strand</option> + <option value="0x0020">Mate strand</option> + <option value="0x0040">Read is the first in a pair</option> + <option value="0x0080">Read is the second in a pair</option> + <option value="0x0100">The alignment or this read is not primary</option> + <option value="0x0200">The read fails platform/vendor quality checks</option> + <option value="0x0400">The read is a PCR or optical duplicate</option> + </param> + <param name="skipBits" type="select" multiple="true" display="checkboxes" label="Skip alignments with any of these flag bits set" > + <option value="0x0001">Read is paired</option> + <option value="0x0002">Read is mapped in a proper pair</option> + <option value="0x0004">The read is unmapped</option> + <option value="0x0008">The mate is unmapped</option> + <option value="0x0010">Read strand</option> + <option value="0x0020">Mate strand</option> + <option value="0x0040">Read is the first in a pair</option> + <option value="0x0080">Read is the second in a pair</option> + <option value="0x0100">The alignment or this read is not primary</option> + <option value="0x0200">The read fails platform/vendor quality checks</option> + <option value="0x0400">The read is a PCR or optical duplicate</option> + </param> + </when> + </conditional> + <param name="library" type="text" value="" size="20" label="Select alignments from Library" + help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/> + <param name="read_group" type="text" value="" size="20" label="Select alignments from Read Group" + help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/> + <param name="bed_file" type="data" format="bed" optional="true" label="Output alignments overlapping the regions in the BED FILE"/> + <param name="regions" type="text" value="" size="180" label="Select regions (only used when the input is in BAM format)" + help="region should be presented in one of the following formats: `chr1', `chr2:1,000' and `chr3:1000-2,000'"/> + </inputs> + <outputs> + <data name="output1" format_source="input1" label="${tool.name} on ${on_string}: ${input1.datatype.file_ext}"/> + </outputs> + <tests> + <test> + <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="yes"/> + <param name="reqBits" value="0x0080"/> + <output name="output1" > + <assert_contents> + <has_text text="141" /> + <not_has_text text="77" /> + </assert_contents> + </output> + </test> + <test> + <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="no"/> + <param name="read_group" value="rg1"/> + <output name="output1" > + <assert_contents> + <has_text text="rg1" /> + <not_has_text text="rg2" /> + </assert_contents> + </output> + </test> + <test> + <param name="input1" value="bam_to_sam_in1.sam" ftype="sam" /> + <param name="header" value=""/> + <param name="filter" value="yes"/> + <param name="skipBits" value="0x0008"/> + <param name="mapq" value="250"/> + <output name="output1" > + <assert_contents> + <has_text text="both_reads_align_clip_marked" /> + <not_has_text text="both_reads_present_only_first_aligns" /> + </assert_contents> + </output> + </test> + </tests> + <help> + +**What it does** + +This tool uses the samtools view command in SAMTools_ toolkit to filter a SAM or BAM file on the MAPQ (mapping quality), FLAG bits, Read Group, Library, or region. + +**Input** + +Input is either a SAM or BAM file. + +**Output** + +The output file will be of the same format a the Input file, filtered by the selected options. + +**Options** + +Filtering by read group or library requires headers in the input SAM or BAM file. + +If regions are specified, only alignments overlapping the specified regions will be output. An alignment may be given multiple times if it is overlapping several regions. +A region can be presented, for example, in the following format:: + + chr2 (the whole chr2) + chr2:1000000 (region starting from 1,000,000bp) + chr2:1,000,000-2,000,000 (region between 1,000,000 and 2,000,000bp including the end points). + +Note: The coordinate is 1-based. + +Multiple regions may be specified, separated by a space character:: + + chr2:1000000-2000000 chr2:1,000,000-2,000,000 chrX + + + +.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml + + </help> +</tool>