changeset 0:2d4ae2f8231e draft

Imported from capsule None
author devteam
date Thu, 27 Feb 2014 16:16:26 -0500
parents
children 94d5786febc4
files samtool_filter2.xml test-data/bam_to_sam_in1.sam test-data/bam_to_sam_in2.sam tool_dependencies.xml
diffstat 4 files changed, 277 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/samtool_filter2.xml	Thu Feb 27 16:16:26 2014 -0500
@@ -0,0 +1,232 @@
+<tool id="samtool_filter2" name="Filter SAM or BAM, output SAM or BAM" version="1.1.1">
+  <description>files on FLAG MAPQ RG LN or by region</description>
+  <requirements>
+    <requirement type="package" version="0.1.18">samtools</requirement>
+  </requirements>
+  <!-- 
+    samtools view [-bchuHS] [-t in.refList] [-o output] [-f reqFlag] [-F skipFlag] [-q minMapQ] [-l library] [-r readGroup] [-R rgFile] <in.bam>|<in.sam> [region1 [...]]
+     Usage:   samtools view [options] <in.bam>|<in.sam> [region1 [...]]
+
+     Options: -b       output BAM
+              -h       print header for the SAM output
+              -H       print header only (no alignments)
+              -S       input is SAM
+              -u       uncompressed BAM output (force -b)
+              -1       fast compression (force -b)
+              -x       output FLAG in HEX (samtools-C specific)
+              -X       output FLAG in string (samtools-C specific)
+              -c       print only the count of matching records
+              -L FILE  output alignments overlapping the input BED FILE [null]
+              -t FILE  list of reference names and lengths (force -S) [null]
+              -T FILE  reference sequence file (force -S) [null]
+              -o FILE  output file name [stdout]
+              -R FILE  list of read groups to be outputted [null]
+              -f INT   required flag, 0 for unset [0]
+              -F INT   filtering flag, 0 for unset [0]
+              -q INT   minimum mapping quality [0]
+              -l STR   only output reads in library STR [null]
+              -r STR   only output reads in read group STR [null]
+              -?       longer help
+  -->
+  <command>
+##set up input files, regions requires input.bam and input.bai
+#if isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('bam').__class__):
+  #set $input = 'input.bam'
+  ln -s $input1 $input  &amp;&amp;
+  ln -s $input1.metadata.bam_index input.bai  &amp;&amp;
+#elif isinstance($input1.datatype, $__app__.datatypes_registry.get_datatype_by_extension('sam').__class__):
+  #set $input = 'input.sam'
+  ln -s $input1 $input  &amp;&amp;
+#end if
+samtools view -o "$output1" $header
+  
+  #if $input1.datatype.file_ext == 'sam':
+   -S
+  #end if
+
+  #if $outputtype.__str__ == "bam":
+      -b 
+  #end if
+  
+  
+  #if $mapq.__str__ != '':
+   -q $mapq
+  #end if
+  #if $flag.filter.__str__ == 'yes':
+   #if $flag.reqBits.__str__ != 'None':
+     #set $reqs = $flag.reqBits.__str__.split(',') 
+     #set $reqFlag = 0
+     #for $xn in $reqs:
+       #set $reqFlag += int(xn,16)
+     #end for
+     -f $hex($reqFlag)
+   #end if
+   #if $flag.skipBits.__str__ != 'None':
+     #set $skips = $flag.skipBits.__str__.split(',') 
+     #set $skipFlag = 0
+     #for $xn in $skips:
+       #set $skipFlag += int(xn,16)
+     #end for
+     -F $hex($skipFlag)
+   #end if
+  #end if
+  #if $read_group.__str__.strip() != '':
+    -r $read_group
+  #end if
+  #if $library.__str__.strip() != '':
+    -l $library
+  #end if
+  #if $bed_file.__str__ != "None" and len($bed_file.__str__) > 0:
+    -L $bed_file
+  #end if
+  $input
+  #if $regions.__str__.strip() != '' and $input1.datatype.file_ext == 'bam':
+    $regions.__str__.strip()
+  #end if
+  ## need to redirect stderr message so galaxy does not think this failed
+  2>&amp;1
+  </command>
+  <inputs>
+    <param name="input1" type="data" format="sam,bam" label="SAM or BAM File to Filter" />
+    <param name="header" type="select" label="Header in output">
+      <option value="-h">Include Header</option>
+      <option value="">Exclude Header</option>
+      <option value="-H">Only the Header</option>
+    </param>
+    <param name="mapq" type="integer" value="" optional="true" label="Minimum MAPQ quality score">
+      <validator type="in_range" message="The MAPQ quality score can't be negative" min="0"/>
+    </param>
+    <conditional name="flag">
+      <param name="filter" type="select" label="Filter on bitwise flag">
+        <option value="no">no</option>
+        <option value="yes">yes</option>
+      </param>
+      <when value="no"/>
+      <when value="yes">
+        <param name="reqBits" type="select" multiple="true" display="checkboxes" label="Only output alignments with all of these flag bits set" >
+          <option value="0x0001">Read is paired</option>
+          <option value="0x0002">Read is mapped in a proper pair</option>
+          <option value="0x0004">The read is unmapped</option>
+          <option value="0x0008">The mate is unmapped</option>
+          <option value="0x0010">Read strand</option>
+          <option value="0x0020">Mate strand</option>
+          <option value="0x0040">Read is the first in a pair</option>
+          <option value="0x0080">Read is the second in a pair</option>
+          <option value="0x0100">The alignment or this read is not primary</option>
+          <option value="0x0200">The read fails platform/vendor quality checks</option>
+          <option value="0x0400">The read is a PCR or optical duplicate</option>
+        </param>
+        <param name="skipBits" type="select" multiple="true" display="checkboxes" label="Skip alignments with any of these flag bits set" >
+          <option value="0x0001">Read is paired</option>
+          <option value="0x0002">Read is mapped in a proper pair</option>
+          <option value="0x0004">The read is unmapped</option>
+          <option value="0x0008">The mate is unmapped</option>
+          <option value="0x0010">Read strand</option>
+          <option value="0x0020">Mate strand</option>
+          <option value="0x0040">Read is the first in a pair</option>
+          <option value="0x0080">Read is the second in a pair</option>
+          <option value="0x0100">The alignment or this read is not primary</option>
+          <option value="0x0200">The read fails platform/vendor quality checks</option>
+          <option value="0x0400">The read is a PCR or optical duplicate</option>
+        </param>
+      </when>
+    </conditional>
+    <param name="library" type="text" value="" size="20" label="Select alignments from Library"
+           help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/>
+    <param name="read_group" type="text" value="" size="20" label="Select alignments from Read Group" 
+           help="Requires headers in the input SAM or BAM, otherwise no alignments will be output."/>
+    <param name="bed_file" type="data" format="bed" optional="true" label="Output alignments overlapping the regions in the BED FILE"/>
+    <param name="regions" type="text" value="" size="180" label="Select regions (only used when the input is in BAM format)" 
+           help="region should be presented in one of the following formats: `chr1', `chr2:1,000' and `chr3:1000-2,000'"/>
+    <param name="outputtype" type="select" label="Select the output format">
+        <option value="bam">bam</option>
+        <option value="sam">sam</option>
+    </param>
+  </inputs>
+  <outputs>
+    <data name="output1" format_source="input1" label="${tool.name} on ${on_string}: ${input1.datatype.file_ext}">
+      <change_format>
+        <when input="outputtype" value="bam" format="bam" />
+      </change_format>
+    </data>
+  </outputs>
+  <tests>
+    <test>
+      <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" />
+      <param name="header" value=""/>
+      <param name="filter" value="yes"/>
+      <param name="reqBits" value="0x0080"/>
+      <param name="outputtype" value="sam"/>
+      <output name="output1" >
+       <assert_contents> 
+         <has_text text="141" /> 
+         <not_has_text text="77" />
+       </assert_contents> 
+      </output>
+    </test>
+    <test>
+      <param name="input1" value="bam_to_sam_in2.sam" ftype="sam" />
+      <param name="header" value=""/>
+      <param name="filter" value="no"/>
+      <param name="read_group" value="rg1"/>
+      <param name="outputtype" value="sam"/>
+      <output name="output1" >
+       <assert_contents> 
+         <has_text text="rg1" /> 
+         <not_has_text text="rg2" />
+       </assert_contents> 
+      </output>
+    </test>
+    <test>
+      <param name="input1" value="bam_to_sam_in1.sam" ftype="sam" />
+      <param name="header" value=""/>
+      <param name="filter" value="yes"/>
+      <param name="skipBits" value="0x0008"/>
+      <param name="mapq" value="250"/>
+      <param name="outputtype" value="sam"/>
+      <output name="output1" >
+       <assert_contents> 
+         <has_text text="both_reads_align_clip_marked" /> 
+         <not_has_text text="both_reads_present_only_first_aligns" />
+       </assert_contents> 
+      </output>
+    </test>
+  </tests>
+  <help>
+  
+
+**What it does**
+
+This tool uses the samtools view command in SAMTools_ toolkit to filter a SAM or BAM file on the MAPQ (mapping quality), FLAG bits, Read Group, Library, or region.
+
+**Input**
+
+Input is either a SAM or BAM file.
+
+**Output**
+
+The output file will be SAM or BAM (depending on the chosen option), filtered by the selected options.
+
+**Options**
+
+Filtering by read group or library requires headers in the input SAM or BAM file.   
+
+If regions are specified, only alignments overlapping the specified regions will be output.  An alignment may be given multiple times if it is overlapping several regions.  
+A region can be presented, for example, in the following format::
+
+  chr2	(the whole chr2)
+  chr2:1000000	 (region starting from 1,000,000bp) 
+  chr2:1,000,000-2,000,000	(region between 1,000,000 and 2,000,000bp including the end points). 
+
+Note:  The coordinate is 1-based.
+
+Multiple regions may be specified, separated by a space character::
+
+  chr2:1000000-2000000 chr2:1,000,000-2,000,000 chrX
+
+
+
+.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
+
+  </help>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bam_to_sam_in1.sam	Thu Feb 27 16:16:26 2014 -0500
@@ -0,0 +1,14 @@
+@HD	VN:1.0	SO:coordinate
+@SQ	SN:chr1	LN:101
+@SQ	SN:chr7	LN:404
+@SQ	SN:chr8	LN:202
+@RG	ID:0	SM:Hi,Mom!
+@PG	ID:1	PN:Hey!	VN:2.0
+both_reads_align_clip_marked	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
+both_reads_present_only_first_aligns	89	chr7	1	255	101M	*	0	0	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
+read_2_too_many_gaps	83	chr7	1	255	101M	=	302	201	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
+both_reads_align_clip_adapter	147	chr7	16	255	101M	=	21	-96	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
+both_reads_align_clip_adapter	99	chr7	21	255	101M	=	16	96	CAACAGAAGCNGGNATCTGTGTTTGTGTTTCGGATTTCCTGCTGAANNGNTTNTCGNNTCNNNNNNNNATCCCGATTTCNTTCCGCAGCTNACCTCCCAAN	)'.*.+2,))&&'&*/)-&*-)&.-)&)&),/-&&..)./.,.).*&&,&.&&-)&&&0*&&&&&&&&/32/,01460&&/6/*0*/2/283//36868/&	RG:Z:0
+both_reads_align_clip_marked	163	chr7	302	255	101M	=	1	-201	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:0
+read_2_too_many_gaps	163	chr7	302	255	10M1D10M5I76M	=	1	-201	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:0
+both_reads_present_only_first_aligns	165	*	0	0	*	chr7	1	0	NCGCGGCATCNCGATTTCTTTCCGCAGCTAACCTCCCGACAGATCGGCAGCGCGTCGTGTAGGTTATTATGGTACATCTTGTCGTGCGGCNAGAGCATACA	&/15445666651/566666553+2/14/&/555512+3/)-'/-&-'*+))*''13+3)'//++''/'))/3+&*5++)&'2+&+/*&-&&*)&-./1'1	RG:Z:0
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/bam_to_sam_in2.sam	Thu Feb 27 16:16:26 2014 -0500
@@ -0,0 +1,25 @@
+@HD	VN:1.0	SO:coordinate
+@SQ	SN:chr1	LN:10001
+@SQ	SN:chr2	LN:100001
+@SQ	SN:chr3	LN:10001
+@SQ	SN:chr4	LN:1001
+@RG	ID:rg1	SM:s1
+@RG	ID:rg2	SM:s3
+bar:record:4	77	chr1	1	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg1
+bar:record:6	77	chr1	1	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg2
+bar:record:1	77	chr1	10	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg1
+bar:record:3	77	chr1	10	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg2
+bar:record:1	141	chr1	20	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:7	77	chr1	20	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg2
+bar:record:8	77	chr1	30	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg2
+bar:record:4	141	chr1	40	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:5	77	chr1	40	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg2
+bar:record:6	141	chr1	50	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg2
+bar:record:2	77	chr2	10	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg1
+bar:record:2	141	chr2	30	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg2
+bar:record:3	141	chr3	20	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:8	141	chr3	20	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:5	141	chr3	40	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:9	77	chr4	10	0	*	*	0	0	AAAAAAAAAAAAA	1111111111111	RG:Z:rg1
+bar:record:7	141	chr4	20	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
+bar:record:9	141	chr4	60	0	*	*	0	0	CCCCCCCCCCCCC	2222222222222	RG:Z:rg1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Thu Feb 27 16:16:26 2014 -0500
@@ -0,0 +1,6 @@
+<?xml version="1.0"?>
+<tool_dependency>
+  <package name="samtools" version="0.1.18">
+      <repository changeset_revision="171cd8bc208d" name="package_samtools_0_1_18" owner="devteam" prior_installation_required="False" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>