Mercurial > repos > urgi-team > mapqfilter
changeset 0:74980f9f1ccc draft default tip
Uploaded
author | urgi-team |
---|---|
date | Tue, 10 Nov 2015 08:32:58 -0500 |
parents | |
children | |
files | mapQfilter_wrapper.xml test-data/inputMapqfilter.bam test-data/outputMapqfilter.bam tool_dependencies.xml |
diffstat | 4 files changed, 92 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mapQfilter_wrapper.xml Tue Nov 10 08:32:58 2015 -0500 @@ -0,0 +1,83 @@ +<tool id="mapQfilter" name="mapQfilter" version="1.0"> + <description>Filters reads on quality, and remove both members of the pair</description> + <requirements> + <requirement type="package" version="0.1.19">samtools</requirement> + <requirement type="package" version="1.136">picard</requirement> + </requirements> + <command> + samtools view -b -h -f 0x2 -F 0x100 -o tmpBAM.BAM -q $mapQ $input1 + && + samtools view tmpBAM.BAM | cut -f 1 | sort | uniq -c | grep ' 1 ' | cut -f8 -d ' ' > min30.list + && + java -jar \$JAVA_JAR_PATH/picard.jar FilterSamReads I=tmpBAM.BAM FILTER=excludeReadList RLF=min30.list OUTPUT="${output1}" VALIDATION_STRINGENCY=LENIENT QUIET=true VERBOSITY=ERROR + </command> + <inputs> + <param name="input1" type="data" format="bam" label="BAM File to filter"/> + <param name="mapQ" type="integer" value="30" label="Remove pairs with at least one read under the mapping quality of"/> + </inputs> + <outputs> + <data format="bam" name="output1" label="${tool.name} on ${on_string} (bam)"/> + </outputs> + <tests> + <test> + <param name="input1" value="inputMapqfilter.bam" /> + <param name="mapQ" value="41" /> + <output name="output1" file="outputMapqfilter.bam" ftype="bam" /> + </test> + </tests> + <help><![CDATA[ +**Filters paired end reads on quality, properly paired and not secondary alignment status, and remove both members of the pair** + +----- + +**what it does :** + +This tool filters out secondary alignments and not properly paired read pairs. You can also define a threshold to filters out read pairs with at least one of the member is below the threshold. +It uses samtools and picard tools. The aim of this program is to avoid singleton when you filter with samtools filter on mapq value. If only on member of the read pair is below the threshold, the non-filtered read is kept and become a singleton. +MapQFilter provides a bam file with filtered read pairs. +----- + +**input and output formats :** + +input format: bam file +output format: bam file + +----- + +**example :** + +input bam (sam): +---------------- + +HWI-D00381:238:C5V3UANXX:4:1101:10239:22220 99 Contig_20 437303 60 100M = 437511 308 AGTAATCCGGCTTGTCATCGAAGCGGAGGGAACGAGTGTAATTGAGGTAGATGGCGAACTCGTTGGGGAAGCCACGGCAGAGCACCTCGGTGGGCGTCGT BCCCBGGGGGGGGGFGFGGGFGGGGGBEBGGGGGGGFGGGFGGGGGGGGGGEGGGGGDGEGGGBFBGGGGGGGEGGGGGGGGGGGEGGGGCGGGGGGGDC NM:i:0 MD:Z:100 AS:i:100 XS:i:0 +HWI-D00381:238:C5V3UANXX:4:1101:10239:22220 147 Contig_20 437511 60 100M = 437303 -308 CTCCATGTCGTCACGGCGGGATTGTTCTAGTCAAGTAAGCTACTGCACATCATTTGCAATCGGGCTACTTACCGACACCCAGGTGAGTGTTGATACTGGC GCDGGGGFGGGDGAGGGGGGGGEFFGGGGGGGGGGGGGCEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCB NM:i:0 MD:Z:100 AS:i:100 XS:i:0 +HWI-D00381:238:C5V3UANXX:4:1101:12349:79395 401 Contig_20 731638 1 68H32M Contig_22 594669 0 AATGTTATGGACTGGCCTTAAGGGAGAATGCA GGGGGGGGGGGGGBGGGGGGGGGGGGGBCBBB NM:i:0 MD:Z:32 AS:i:32 XS:i:31 SA:Z:Contig_22,287814,-,70M30S,6,0; XA:Z:Contig_22,+275079,31M69S,0;Contig_20,+766449,31M69S,0; +HWI-D00381:238:C5V3UANXX:4:1101:12643:50485 65 Contig_20 753418 5 73M27S Contig_22 309172 0 TTGTAGTAGATACAATCCAATAATCTATCTCCCAAATCATTCCTAACCTTAGTGATCCAATTTTCCACAATTAAGTATTTAACTATTTAGTAGAGTAGTA CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGBGGGGDGGGG NM:i:3 MD:Z:1C23C6T40 AS:i:61 XS:i:57 SA:Z:Contig_22,628627,+,69S31M,0,0; XA:Z:Contig_20,+746712,69M31S,3;Contig_22,+202567,69M31S,3;Contig_22,+170906,69M31S,4; +HWI-D00381:238:C5V3UANXX:4:1101:1493:2084 99 Contig_22 181831 38 100M = 181974 250 TTACTTGCTTAAGCGCAAGGAGCTCTGCCTTAGTAGTTAACGTGGTAATAGTTGCTTGTTAAGCTGCCTTCTAAATAATAGGTCCTCTAAAGAGCGTAAT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG NM:i:0 MD:Z:100 AS:i:100 XS:i:85 XA:Z:Contig_20,-721023,100M,3;Contig_20,-731502,100M,4; +HWI-D00381:238:C5V3UANXX:4:1101:1493:2084 147 Contig_22 181974 19 74M7D26M = 181831 -250 AATAACGCATTACTACATATTATAAGCTTATTAGGATCCCTATTCCTATATTAGATTAATTAGTATTAAGTTTAGTATAGATAGACAATTACTTAATTAA GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGGCCBB@ NM:i:7 MD:Z:74^GGTTTAG26 AS:i:87 XS:i:100 +HWI-D00381:238:C5V3UANXX:4:1101:12349:79395 145 Contig_22 287814 6 70M30S = 594669 306787 CTAAATTATAGTCTAATACTACTAACTATAAAATTCTTATACCCTTAGTAATTCCCTAGGTATATTGCAATGTTATGGACTGGCCTTAAGGGAGAATGCA GGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGBGGGGGGGGGGGGGBCBBB NM:i:0 MD:Z:70 AS:i:70 XS:i:66 SA:Z:Contig_20,731638,-,68S32M,1,0; XA:Z:Contig_22,+664888,34S66M,0;Contig_20,+998169,34S66M,0;Contig_20,+734056,34S66M,0;Contig_22,+501074,30S70M,2; +HWI-D00381:238:C5V3UANXX:4:1101:12643:50485 129 Contig_22 309172 19 5S95M Contig_20 753418 0 AACACAAGAATAATACTTGTCTTTTCTAAGCCCTGCGCGAAGCGCAGGTTTTGCACAGCTTAGGCGCCAAGACACTAAACCTAGCTAGGGATGCACCTAA BCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG NM:i:3 MD:Z:19C8C64G1 AS:i:83 XS:i:75 XA:Z:Contig_22,+398438,100M,5;Contig_22,+302850,100M,6;Contig_20,-126891,100M,6;Contig_22,+586639,100M,6; +HWI-D00381:238:C5V3UANXX:4:1101:10799:65426 163 Contig_22 318408 60 100M = 318624 316 CACTTAGCTAGGTTTTAGGTAGTTTCTTAAACTATAGCCTTAAACTACTTAATATCCTTCTAATAGGTAACTATAGCTTCCTTCTCCTTATCCCCTTTAG BCBBCGGFGGGGGGGGGGGGFGGGGGGGGGGEGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGEGGGGGGGGGGGGFGGGGGGGGGGGGGGGGCGGGGGDG NM:i:0 MD:Z:100 AS:i:100 XS:i:50 +HWI-D00381:238:C5V3UANXX:4:1101:10799:65426 83 Contig_22 318624 40 100M = 318408 -316 TACTTACTAGATTGCAATACTTATTCTAGGAGATACAGTACTAGCACTTCTTAGAATTCTTTAACTTAAAATTAAAATTATTAAACCTACCCCTAGCTAG GGGGGGGFGFGGGGGGGGGGGFGCBGGGGEGGGGGGGGGGGGGFGGGGGGGGGGGGGGGGGGGFFGGGGGGGGGGGGGGGGGGFBGGGGGGGGGGCCCCC NM:i:0 MD:Z:100 AS:i:100 XS:i:100 +HWI-D00381:238:C5V3UANXX:4:1101:14972:98916 99 Contig_22 349821 39 97M = 349999 274 TATAGGCGCGCGAGTAGGTAGTAGGGGCAAGCAGAGTAATAGTATTATTAGTTTAAGCCCTTAATATAAGATCCTTAAAATAATAGTAGAGCTAGTA ABB@AC1E/E/9B9C>1<1EFGGGG//E>GGBFFEG<F:CF1CGGD1<FFF=<B>11E@@GG1CCFCFFG@@GG0D@>EGC0FG0;=@G@@0<FGFG NM:i:0 MD:Z:97 AS:i:97 XS:i:82 XA:Z:Contig_22,+619968,97M,3; +HWI-D00381:238:C5V3UANXX:4:1101:14972:98916 147 Contig_22 349999 9 96M4S = 349821 -274 AAGGTGATCCTCCTAGTAACTATAATAAAACTAAAGCTAGTTGCAGAGGGCTTAGTAGACGTTAGGATTATTAGCAAGTATAGCCTCTAACCTAGTATAG GF0FBGF<BF0CECG>CGGCGGG>GGGF1=G>E1GGGF@GFFC@:C1BB@GFFFE>GFFGGFGCFBCCGBC@1BFEF1FGGGGGE1GGGGGGGB>ABBBA NM:i:7 MD:Z:0G4A36A0T9G1C21A18 AS:i:65 XS:i:80 +HWI-D00381:238:C5V3UANXX:4:1101:12349:79395 97 Contig_22 594669 0 100M = 287814 -306787 ATTAGGGTCCTTAATTTAGCAACCCTACTATAACTAAACCTAAGATAATAGAGATACAGGTAGGATATAGGCGTCTCCTGCTCTCTTATACTACTATATT CCCCCGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGG NM:i:3 MD:Z:6A1T47T43 AS:i:86 XS:i:85 +HWI-D00381:238:C5V3UANXX:4:1101:12643:50485 321 Contig_22 628627 0 69H31M = 309172 -319456 ATTAAGTATTTAACTATTTAGTAGAGTAGTA FGGGGGGGGGGGGGGGGGGGGBGGGGDGGGG NM:i:0 MD:Z:31 AS:i:31 XS:i:31 SA:Z:Contig_20,753418,+,73M27S,5,3; XA:Z:Contig_20,-60185,31M69S,0;Contig_20,-657698,31M69S,0; + + +output bam (sam): +----------------- + +HWI-D00381:238:C5V3UANXX:4:1101:10239:22220 99 Contig_20 437303 60 100M = 437511 308 AGTAATCCGGCTTGTCATCGAAGCGGAGGGAACGAGTGTAATTGAGGTAGATGGCGAACTCGTTGGGGAAGCCACGGCAGAGCACCTCGGTGGGCGTCGT BCCCBGGGGGGGGGFGFGGGFGGGGGBEBGGGGGGGFGGGFGGGGGGGGGGEGGGGGDGEGGGBFBGGGGGGGEGGGGGGGGGGGEGGGGCGGGGGGGDC NM:i:0 MD:Z:100 AS:i:100 XS:i:0 +HWI-D00381:238:C5V3UANXX:4:1101:10239:22220 147 Contig_20 437511 60 100M = 437303 -308 CTCCATGTCGTCACGGCGGGATTGTTCTAGTCAAGTAAGCTACTGCACATCATTTGCAATCGGGCTACTTACCGACACCCAGGTGAGTGTTGATACTGGC GCDGGGGFGGGDGAGGGGGGGGEFFGGGGGGGGGGGGGCEGGGGGGGGGGGGDGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGGCCCCB NM:i:0 MD:Z:100 AS:i:100 XS:i:0 + +----- + +**reference :** +Li H., Handsaker B., Wysoker A., Fennell T., Ruan J., Homer N., Marth G., Abecasis G., Durbin R. and 1000 Genome Project Data Processing Subgroup (2009) The Sequence alignment/map (SAM) format and SAMtools. Bioinformatics, 25, 2078-9. [PMID: 19505943] +samtools: http://www.htslib.org/ +picard tools: http://broadinstitute.github.io/picard/ + +]]> + </help> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Tue Nov 10 08:32:58 2015 -0500 @@ -0,0 +1,9 @@ +<?xml version="1.0"?> +<tool_dependency> + <package name="picard" version="1.136"> + <repository changeset_revision="3e9c24e5325b" name="package_picard_1_136" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> + <package name="samtools" version="0.1.19"> + <repository changeset_revision="95d2c4aefb5f" name="package_samtools_0_1_19" owner="devteam" toolshed="https://toolshed.g2.bx.psu.edu" /> + </package> +</tool_dependency>