changeset 0:74980f9f1ccc draft default tip

Uploaded
author urgi-team
date Tue, 10 Nov 2015 08:32:58 -0500
parents
children
files mapQfilter_wrapper.xml test-data/inputMapqfilter.bam test-data/outputMapqfilter.bam tool_dependencies.xml
diffstat 4 files changed, 92 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mapQfilter_wrapper.xml	Tue Nov 10 08:32:58 2015 -0500
@@ -0,0 +1,83 @@
+<tool id="mapQfilter" name="mapQfilter" version="1.0">
+    <description>Filters reads on quality, and remove both members of the pair</description>
+    <requirements>
+	<requirement type="package" version="0.1.19">samtools</requirement>
+        <requirement type="package" version="1.136">picard</requirement>
+    </requirements> 
+    <command>
+		samtools view -b -h -f 0x2 -F 0x100 -o tmpBAM.BAM -q $mapQ $input1
+		&amp;&amp; 
+		samtools view tmpBAM.BAM | cut -f 1 | sort | uniq -c | grep ' 1 ' | cut -f8 -d ' ' > min30.list 
+		&amp;&amp; 
+		java -jar \$JAVA_JAR_PATH/picard.jar FilterSamReads I=tmpBAM.BAM FILTER=excludeReadList RLF=min30.list OUTPUT="${output1}" VALIDATION_STRINGENCY=LENIENT QUIET=true VERBOSITY=ERROR
+	</command>
+    <inputs>
+        <param name="input1" type="data" format="bam" label="BAM File to filter"/>
+        <param name="mapQ" type="integer" value="30" label="Remove pairs with at least one read under the mapping quality of"/>
+    </inputs>
+    <outputs>
+        <data format="bam" name="output1" label="${tool.name} on ${on_string} (bam)"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input1" value="inputMapqfilter.bam" />
+            <param name="mapQ" value="41" />
+            <output name="output1" file="outputMapqfilter.bam" ftype="bam" />
+	</test>
+    </tests>
+    <help><![CDATA[
+**Filters paired end reads on quality, properly paired and not secondary alignment status, and remove both members of the pair**
+
+-----
+
+**what it does :**
+
+This tool filters out secondary alignments and not properly paired read pairs. You can also define a threshold to filters out read pairs with at least one of the member is below the threshold.
+It uses samtools and picard tools. The aim of this program is to avoid singleton when you filter with samtools filter on mapq value. If only on member of the read pair is below the threshold, the non-filtered read is kept and become a singleton.
+MapQFilter provides a bam file with filtered read pairs. 
+-----
+
+**input and output formats :**
+
+input format: bam file
+output format: bam file
+
+-----
+
+**example :**
+
+input bam (sam):
+----------------
+
+HWI-D00381:238:C5V3UANXX:4:1101:10239:22220	99	Contig_20	437303	60	100M	=	437511	308	AGTAATCCGGCTTGTCATCGAAGCGGAGGGAACGAGTGTAATTGAGGTAGATGGCGAACTCGTTGGGGAAGCCACGGCAGAGCACCTCGGTGGGCGTCGT	BCCCBGGGGGGGGGFGFGGGFGGGGGBEBGGGGGGGFGGGFGGGGGGGGGGEGGGGGDGEGGGBFBGGGGGGGEGGGGGGGGGGGEGGGGCGGGGGGGDC	NM:i:0	MD:Z:100	AS:i:100	XS:i:0
+HWI-D00381:238:C5V3UANXX:4:1101:10239:22220	147	Contig_20	437511	60	100M	=	437303	-308	CTCCATGTCGTCACGGCGGGATTGTTCTAGTCAAGTAAGCTACTGCACATCATTTGCAATCGGGCTACTTACCGACACCCAGGTGAGTGTTGATACTGGC	GCDGGGGFGGGDGAGGGGGGGGEFFGGGGGGGGGGGGGCEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCB	NM:i:0	MD:Z:100	AS:i:100	XS:i:0
+HWI-D00381:238:C5V3UANXX:4:1101:12349:79395	401	Contig_20	731638	1	68H32M	Contig_22	594669	0	AATGTTATGGACTGGCCTTAAGGGAGAATGCA	GGGGGGGGGGGGGBGGGGGGGGGGGGGBCBBB	NM:i:0	MD:Z:32	AS:i:32	XS:i:31	SA:Z:Contig_22,287814,-,70M30S,6,0;	XA:Z:Contig_22,+275079,31M69S,0;Contig_20,+766449,31M69S,0;
+HWI-D00381:238:C5V3UANXX:4:1101:12643:50485	65	Contig_20	753418	5	73M27S	Contig_22	309172	0	TTGTAGTAGATACAATCCAATAATCTATCTCCCAAATCATTCCTAACCTTAGTGATCCAATTTTCCACAATTAAGTATTTAACTATTTAGTAGAGTAGTA	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGBGGGGDGGGG	NM:i:3	MD:Z:1C23C6T40	AS:i:61	XS:i:57	SA:Z:Contig_22,628627,+,69S31M,0,0;	XA:Z:Contig_20,+746712,69M31S,3;Contig_22,+202567,69M31S,3;Contig_22,+170906,69M31S,4;
+HWI-D00381:238:C5V3UANXX:4:1101:1493:2084	99	Contig_22	181831	38	100M	=	181974	250	TTACTTGCTTAAGCGCAAGGAGCTCTGCCTTAGTAGTTAACGTGGTAATAGTTGCTTGTTAAGCTGCCTTCTAAATAATAGGTCCTCTAAAGAGCGTAAT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	NM:i:0	MD:Z:100	AS:i:100	XS:i:85	XA:Z:Contig_20,-721023,100M,3;Contig_20,-731502,100M,4;
+HWI-D00381:238:C5V3UANXX:4:1101:1493:2084	147	Contig_22	181974	19	74M7D26M	=	181831	-250	AATAACGCATTACTACATATTATAAGCTTATTAGGATCCCTATTCCTATATTAGATTAATTAGTATTAAGTTTAGTATAGATAGACAATTACTTAATTAA	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGCCBB@	NM:i:7	MD:Z:74^GGTTTAG26	AS:i:87	XS:i:100
+HWI-D00381:238:C5V3UANXX:4:1101:12349:79395	145	Contig_22	287814	6	70M30S	=	594669	306787	CTAAATTATAGTCTAATACTACTAACTATAAAATTCTTATACCCTTAGTAATTCCCTAGGTATATTGCAATGTTATGGACTGGCCTTAAGGGAGAATGCA	GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGBCBBB	NM:i:0	MD:Z:70	AS:i:70	XS:i:66	SA:Z:Contig_20,731638,-,68S32M,1,0;	XA:Z:Contig_22,+664888,34S66M,0;Contig_20,+998169,34S66M,0;Contig_20,+734056,34S66M,0;Contig_22,+501074,30S70M,2;
+HWI-D00381:238:C5V3UANXX:4:1101:12643:50485	129	Contig_22	309172	19	5S95M	Contig_20	753418	0	AACACAAGAATAATACTTGTCTTTTCTAAGCCCTGCGCGAAGCGCAGGTTTTGCACAGCTTAGGCGCCAAGACACTAAACCTAGCTAGGGATGCACCTAA	BCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	NM:i:3	MD:Z:19C8C64G1	AS:i:83	XS:i:75	XA:Z:Contig_22,+398438,100M,5;Contig_22,+302850,100M,6;Contig_20,-126891,100M,6;Contig_22,+586639,100M,6;
+HWI-D00381:238:C5V3UANXX:4:1101:10799:65426	163	Contig_22	318408	60	100M	=	318624	316	CACTTAGCTAGGTTTTAGGTAGTTTCTTAAACTATAGCCTTAAACTACTTAATATCCTTCTAATAGGTAACTATAGCTTCCTTCTCCTTATCCCCTTTAG	BCBBCGGFGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGFGGGGGGGGGGGGGGGGCGGGGGDG	NM:i:0	MD:Z:100	AS:i:100	XS:i:50
+HWI-D00381:238:C5V3UANXX:4:1101:10799:65426	83	Contig_22	318624	40	100M	=	318408	-316	TACTTACTAGATTGCAATACTTATTCTAGGAGATACAGTACTAGCACTTCTTAGAATTCTTTAACTTAAAATTAAAATTATTAAACCTACCCCTAGCTAG	GGGGGGGFGFGGGGGGGGGGGFGCBGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGFBGGGGGGGGGGCCCCC	NM:i:0	MD:Z:100	AS:i:100	XS:i:100
+HWI-D00381:238:C5V3UANXX:4:1101:14972:98916	99	Contig_22	349821	39	97M	=	349999	274	TATAGGCGCGCGAGTAGGTAGTAGGGGCAAGCAGAGTAATAGTATTATTAGTTTAAGCCCTTAATATAAGATCCTTAAAATAATAGTAGAGCTAGTA	ABB@AC1E/E/9B9C>1<1EFGGGG//E>GGBFFEG<F:CF1CGGD1<FFF=<B>11E@@GG1CCFCFFG@@GG0D@>EGC0FG0;=@G@@0<FGFG	NM:i:0	MD:Z:97	AS:i:97	XS:i:82	XA:Z:Contig_22,+619968,97M,3;
+HWI-D00381:238:C5V3UANXX:4:1101:14972:98916	147	Contig_22	349999	9	96M4S	=	349821	-274	AAGGTGATCCTCCTAGTAACTATAATAAAACTAAAGCTAGTTGCAGAGGGCTTAGTAGACGTTAGGATTATTAGCAAGTATAGCCTCTAACCTAGTATAG	GF0FBGF<BF0CECG>CGGCGGG>GGGF1=G>E1GGGF@GFFC@:C1BB@GFFFE>GFFGGFGCFBCCGBC@1BFEF1FGGGGGE1GGGGGGGB>ABBBA	NM:i:7	MD:Z:0G4A36A0T9G1C21A18	AS:i:65	XS:i:80
+HWI-D00381:238:C5V3UANXX:4:1101:12349:79395	97	Contig_22	594669	0	100M	=	287814	-306787	ATTAGGGTCCTTAATTTAGCAACCCTACTATAACTAAACCTAAGATAATAGAGATACAGGTAGGATATAGGCGTCTCCTGCTCTCTTATACTACTATATT	CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG	NM:i:3	MD:Z:6A1T47T43	AS:i:86	XS:i:85
+HWI-D00381:238:C5V3UANXX:4:1101:12643:50485	321	Contig_22	628627	0	69H31M	=	309172	-319456	ATTAAGTATTTAACTATTTAGTAGAGTAGTA	FGGGGGGGGGGGGGGGGGGGGBGGGGDGGGG	NM:i:0	MD:Z:31	AS:i:31	XS:i:31	SA:Z:Contig_20,753418,+,73M27S,5,3;	XA:Z:Contig_20,-60185,31M69S,0;Contig_20,-657698,31M69S,0;
+
+
+output bam (sam):
+-----------------
+
+HWI-D00381:238:C5V3UANXX:4:1101:10239:22220	99	Contig_20	437303	60	100M	=	437511	308	AGTAATCCGGCTTGTCATCGAAGCGGAGGGAACGAGTGTAATTGAGGTAGATGGCGAACTCGTTGGGGAAGCCACGGCAGAGCACCTCGGTGGGCGTCGT	BCCCBGGGGGGGGGFGFGGGFGGGGGBEBGGGGGGGFGGGFGGGGGGGGGGEGGGGGDGEGGGBFBGGGGGGGEGGGGGGGGGGGEGGGGCGGGGGGGDC	NM:i:0	MD:Z:100	AS:i:100	XS:i:0
+HWI-D00381:238:C5V3UANXX:4:1101:10239:22220	147	Contig_20	437511	60	100M	=	437303	-308	CTCCATGTCGTCACGGCGGGATTGTTCTAGTCAAGTAAGCTACTGCACATCATTTGCAATCGGGCTACTTACCGACACCCAGGTGAGTGTTGATACTGGC	GCDGGGGFGGGDGAGGGGGGGGEFFGGGGGGGGGGGGGCEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCB	NM:i:0	MD:Z:100	AS:i:100	XS:i:0
+
+-----
+
+**reference :**
+Li H., Handsaker B., Wysoker A., Fennell T., Ruan J., Homer N., Marth G., Abecasis G., Durbin R. and 1000 Genome Project Data Processing Subgroup (2009) The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics, 25, 2078-9. [PMID: 19505943]
+samtools: http://www.htslib.org/
+picard tools: http://broadinstitute.github.io/picard/
+
+]]>
+    </help>
+</tool>
Binary file test-data/inputMapqfilter.bam has changed
Binary file test-data/outputMapqfilter.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Tue Nov 10 08:32:58 2015 -0500
@@ -0,0 +1,9 @@
+<?xml version="1.0"?>
+<tool_dependency>
+    <package name="picard" version="1.136">
+	    <repository changeset_revision="3e9c24e5325b" name="package_picard_1_136" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+    <package name="samtools" version="0.1.19">
+	    <repository changeset_revision="95d2c4aefb5f" name="package_samtools_0_1_19" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" />
+    </package>
+</tool_dependency>