comparison VCFFiltering_wrapper.xml @ 0:3552a8d9f51c draft

Uploaded
author urgi-team
date Tue, 10 Nov 2015 08:30:56 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3552a8d9f51c
1 <tool id="VCFFiltering" name="VCFFiltering" version="0.01">
2 <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description>
3 <requirements>
4 <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
5 </requirements>
6 <version_command>
7 VCFFiltering.py --version
8 </version_command>
9 <command interpreter="python">
10 #if $DP_auto.is_DP_auto
11 VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN
12 #else
13 VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax
14 #end if
15 ## Bed files
16 #if $BedFile_List.is_BedFile
17 #for $bed in $BedFile_List.BedFiles
18 -b $bed.inputBed
19 #end for
20 #end if
21 --graphHTML $output_html --dirGraphs "$output_html.files_path"
22
23 </command>
24 <inputs>
25 <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/>
26 <conditional name="DP_auto">
27 <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" />
28 <when value="yes"/>
29 <when value="no">
30 <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1">
31 <validator type="in_range" min="0" message="DP can't be negative" />
32 </param>
33 <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200">
34 <validator type="in_range" min="0" message="DP can't be negative" />
35 </param>
36 </when>
37 </conditional>
38 <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9">
39 <validator type="in_range" min="0.0" max="1.0"/>
40 </param>
41 <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2">
42 <validator type="in_range" min="1" message="Allele number can't be negative" />
43 </param>
44 <conditional name="BedFile_List">
45 <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" />
46 <when value="no"/>
47 <when value="yes">
48 <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1">
49 <param name="inputBed" type="data" format="bed" label="Select Bed file "/>
50 </repeat>
51 </when>
52 </conditional>
53 </inputs>
54 <outputs>
55 <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/>
56 <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
57 </data>
58 </outputs>
59 <tests>
60 <test>
61 <param name="is_DP_auto" value="yes" />
62 <param name="AF" value="0.9"/>
63 <param name="AN" value="2"/>
64 <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
65 <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/>
66 </test>
67 <test>
68 <param name="is_DP_auto" value="no" />
69 <param name="DPmin" value="4"/>
70 <param name="DPmax" value="200"/>
71 <param name="AF" value="0.9"/>
72 <param name="AN" value="2"/>
73 <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
74 <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/>
75 </test>
76 </tests>
77 <help><![CDATA[
78 **Filters SNP on a VCF depending on depth, allele number and allele frequency**
79
80 -----
81
82 **what it does :**
83
84 VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are :
85
86 - Allele number : number of possible allele at the genomic position
87 - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x)
88 - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions.
89
90 Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter.
91
92 This script has been developped to be used with freebayes output, on haploïd data.
93
94
95 .. class:: infomark
96
97 the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool
98
99 -----
100
101 **input and output formats :**
102
103 input format is a VCF file obtaines with freebayes ; headers are necessary
104 you can also add some bed files to filter some specific regions.
105
106 output format is a filtered VCF file.
107
108 -----
109
110 **example :**
111
112
113 VCF input file: ::
114
115 ##fileformat=VCFv4.1
116 ##fileDate=20150126
117 ##source=freeBayes v0.9.13-2-ga830efd
118 ##reference=ref.fsa
119 ##phasing=none
120 ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
121 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
122 chrom1 1 . T . . . DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
123 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
124 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
125 chrom1 4 . G T . . DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
126 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
127
128 purposed options: ::
129
130 - Calculate optimal depth range automatically = no
131 - minumum Depth = 5
132 - maximum Depth = 14
133 - minimum allele frequency = 0.9
134 - maximum allele number = 2
135
136 exemple result : ::
137
138 ##fileformat=VCFv4.1
139 ##fileDate=20150126
140 ##source=freeBayes v0.9.13-2-ga830efd
141 ##reference=ref.fsa
142 ##phasing=none
143 ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
144 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
145 chrom1 1 . T . . G_DP DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
146 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
147 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
148 chrom1 4 . G T . G_AF DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
149 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
150
151 -----
152
153 **reference :**
154
155 ]]>
156 </help>
157 </tool>