0
|
1 <tool id="VCFFiltering" name="VCFFiltering" version="0.01">
|
|
2 <description>Filters SNP on a VCF depending on depth, allele number and allele frequency</description>
|
|
3 <requirements>
|
|
4 <requirement type="package" version="1.0">VCF_Gandalf_Tools</requirement>
|
|
5 </requirements>
|
|
6 <version_command>
|
|
7 VCFFiltering.py --version
|
|
8 </version_command>
|
|
9 <command interpreter="python">
|
|
10 #if $DP_auto.is_DP_auto
|
|
11 VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN
|
|
12 #else
|
|
13 VCFFiltering_wrapper.py -f $inputVCF -o $outputVCF -F $AF -N $AN -m $DP_auto.DPmin -M $DP_auto.DPmax
|
|
14 #end if
|
|
15 ## Bed files
|
|
16 #if $BedFile_List.is_BedFile
|
|
17 #for $bed in $BedFile_List.BedFiles
|
|
18 -b $bed.inputBed
|
|
19 #end for
|
|
20 #end if
|
|
21 --graphHTML $output_html --dirGraphs "$output_html.files_path"
|
|
22
|
|
23 </command>
|
|
24 <inputs>
|
|
25 <param name="inputVCF" type="data" format="vcf" label="Input VCF File name (from FreeBayes)"/>
|
|
26 <conditional name="DP_auto">
|
|
27 <param name="is_DP_auto" type="boolean" label="Calculate optimal depth range automatically" truevalue="yes" falsevalue="no" checked="on" />
|
|
28 <when value="yes"/>
|
|
29 <when value="no">
|
|
30 <param name="DPmin" type="integer" label="minumum Depth" value="1" help="default = 1">
|
|
31 <validator type="in_range" min="0" message="DP can't be negative" />
|
|
32 </param>
|
|
33 <param name="DPmax" type="integer" label="maximum Depth" value="200" help="default = 200">
|
|
34 <validator type="in_range" min="0" message="DP can't be negative" />
|
|
35 </param>
|
|
36 </when>
|
|
37 </conditional>
|
|
38 <param name="AF" type="float" value="0.9" label="minimum allele frequency" help="default = 0.9">
|
|
39 <validator type="in_range" min="0.0" max="1.0"/>
|
|
40 </param>
|
|
41 <param name="AN" type="integer" value="2" label="maximum allele number" help="default = 2">
|
|
42 <validator type="in_range" min="1" message="Allele number can't be negative" />
|
|
43 </param>
|
|
44 <conditional name="BedFile_List">
|
|
45 <param name="is_BedFile" type="boolean" label="bed files : list of coordinates to filter, multiple beds allowed" truevalue="yes" falsevalue="no" checked="off" />
|
|
46 <when value="no"/>
|
|
47 <when value="yes">
|
|
48 <repeat name="BedFiles" title="bed files : list of coordinates to filter, multiple beds allowed" min="1">
|
|
49 <param name="inputBed" type="data" format="bed" label="Select Bed file "/>
|
|
50 </repeat>
|
|
51 </when>
|
|
52 </conditional>
|
|
53 </inputs>
|
|
54 <outputs>
|
|
55 <data format="vcf" name="outputVCF" label="${tool.name} on ${on_string} (vcf)"/>
|
|
56 <data format="html" name="output_html" label="${tool.name} graphs on ${on_string} (html)">
|
|
57 </data>
|
|
58 </outputs>
|
|
59 <tests>
|
|
60 <test>
|
|
61 <param name="is_DP_auto" value="yes" />
|
|
62 <param name="AF" value="0.9"/>
|
|
63 <param name="AN" value="2"/>
|
|
64 <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
|
|
65 <output name="outputVCF" file="VCFFiltering_DPauto_output.vcf"/>
|
|
66 </test>
|
|
67 <test>
|
|
68 <param name="is_DP_auto" value="no" />
|
|
69 <param name="DPmin" value="4"/>
|
|
70 <param name="DPmax" value="200"/>
|
|
71 <param name="AF" value="0.9"/>
|
|
72 <param name="AN" value="2"/>
|
|
73 <param name="inputVCF" ftype="vcf" value="VCFFiltering_input.vcf" />
|
|
74 <output name="outputVCF" file="VCFFiltering_DP_4_200_output.vcf"/>
|
|
75 </test>
|
|
76 </tests>
|
|
77 <help><![CDATA[
|
|
78 **Filters SNP on a VCF depending on depth, allele number and allele frequency**
|
|
79
|
|
80 -----
|
|
81
|
|
82 **what it does :**
|
|
83
|
|
84 VCFFiltering is a python script that allows to filter SNP results from freebayes on multiple criterias as once. The filters are :
|
|
85
|
|
86 - Allele number : number of possible allele at the genomic position
|
|
87 - Allele frequency : frenquency of the most represented allele ; note that if the most represented allele is the reference (a "." in the 4th column of the VCF, the allele frequency will still work but allele frequency should be under 1-x)
|
|
88 - Depth : Higher and lower bound of the depth ; the depth is the number of reads mapped on the genomic positions.
|
|
89
|
|
90 Depth can be automatically detected. If you do so, The 90 % of the positions with a depth closest to the most frequent depth will pass the filter.
|
|
91
|
|
92 This script has been developped to be used with freebayes output, on haploïd data.
|
|
93
|
|
94
|
|
95 .. class:: infomark
|
|
96
|
|
97 the VCF source is detected from the header. Please keep the header of your VCF file if you want to use this tool
|
|
98
|
|
99 -----
|
|
100
|
|
101 **input and output formats :**
|
|
102
|
|
103 input format is a VCF file obtaines with freebayes ; headers are necessary
|
|
104 you can also add some bed files to filter some specific regions.
|
|
105
|
|
106 output format is a filtered VCF file.
|
|
107
|
|
108 -----
|
|
109
|
|
110 **example :**
|
|
111
|
|
112
|
|
113 VCF input file: ::
|
|
114
|
|
115 ##fileformat=VCFv4.1
|
|
116 ##fileDate=20150126
|
|
117 ##source=freeBayes v0.9.13-2-ga830efd
|
|
118 ##reference=ref.fsa
|
|
119 ##phasing=none
|
|
120 ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
|
|
121 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
|
|
122 chrom1 1 . T . . . DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
|
|
123 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
|
|
124 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
|
|
125 chrom1 4 . G T . . DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
|
|
126 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177 GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
|
|
127
|
|
128 purposed options: ::
|
|
129
|
|
130 - Calculate optimal depth range automatically = no
|
|
131 - minumum Depth = 5
|
|
132 - maximum Depth = 14
|
|
133 - minimum allele frequency = 0.9
|
|
134 - maximum allele number = 2
|
|
135
|
|
136 exemple result : ::
|
|
137
|
|
138 ##fileformat=VCFv4.1
|
|
139 ##fileDate=20150126
|
|
140 ##source=freeBayes v0.9.13-2-ga830efd
|
|
141 ##reference=ref.fsa
|
|
142 ##phasing=none
|
|
143 ##commandline="freebayes --report-monomorphic --ploidy 2 -X -u -f ref.fsa strain_1.bam"
|
|
144 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT strain_1
|
|
145 chrom1 1 . T . . G_DP DP=4;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=4;RPPR=5.18177;G_AN=0;G_AF=0.00;G_DP=4;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:4:4:38:.:.:0
|
|
146 chrom1 2 . A . . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;RO=11;RPPR=5.18177;G_AN=0;G_AF=0.08;G_DP=12;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:12:11:38:.:.:0
|
|
147 chrom1 3 . T A . . DP=5;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=1.00;G_DP=5;G_Base=A GT:DP:RO:QR:AO:QA:GL 0/0:5:0:38:5:.:0
|
|
148 chrom1 4 . G T . G_AF DP=6;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=5;RPPR=5.18177;G_AN=0;G_AF=0.83;G_DP=6;G_Base=T GT:DP:RO:QR:AO:QA:GL 0/0:6:1:38:5:.:0
|
|
149 chrom1 5 . C C . . DP=12;DPB=1;EPPR=5.18177;GTI=0;MQMR=36;NS=1;NUMALT=0;ODDS=0;PAIREDR=1;PQR=0;PRO=0;QR=38;AO=11;RPPR=5.18177;G_AN=0;G_AF=0.92;G_DP=12;G_Base=C GT:DP:RO:QR:AO:QA:GL 0/0:12:1:38:11:.:0
|
|
150
|
|
151 -----
|
|
152
|
|
153 **reference :**
|
|
154
|
|
155 ]]>
|
|
156 </help>
|
|
157 </tool>
|