0
|
1 <tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">
|
|
2
|
|
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
|
|
4 <description> </description>
|
|
5
|
|
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
|
|
7 <requirements>
|
|
8 <requirement type="binary">perl</requirement>
|
|
9 <requirement type="package" version="0.1.12b">vcftools</requirement>
|
|
10 </requirements>
|
|
11
|
|
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
|
|
13 <version_command>
|
|
14 <!--
|
|
15 tool_binary -v
|
|
16 -->
|
|
17 </version_command>
|
|
18
|
|
19 <!-- [REQUIRED] The command to execute -->
|
|
20 <command interpreter="perl">
|
|
21 vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
|
|
22 #if str( $samples ) == "":
|
|
23 'None'
|
|
24 #else
|
|
25 $samples
|
|
26 #end if
|
|
27 #if str( $chromosomes ) == "":
|
|
28 'None'
|
|
29 #else
|
|
30 $chromosomes
|
|
31 #end if
|
|
32 #if str( $export ) == "plink":
|
|
33 $fileout_map
|
|
34 #else
|
|
35 ''
|
|
36 #end if
|
|
37 </command>
|
|
38
|
|
39 <!-- [REQUIRED] Input files and tool parameters -->
|
|
40 <inputs>
|
|
41 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
|
|
42 <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
|
|
43 <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
|
|
44 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
|
|
45 </param>
|
|
46 <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
|
|
47 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
|
|
48 </param>
|
|
49 <param name="export" type="select" label="Output format" >
|
|
50 <option value="VCF" selected="true">VCF</option>
|
|
51 <option value="freq">freq</option>
|
|
52 <option value="plink">plink</option>
|
|
53 </param>
|
|
54 <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
|
|
55 <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
|
|
56 <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
|
|
57 <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
|
|
58 <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
|
|
59 <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
|
|
60 <option value="ALL" selected="true">All</option>
|
|
61 <option value="SNP">SNP</option>
|
|
62 <option value="INDEL">Indel</option>
|
|
63 </param>
|
|
64 <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
|
|
65 <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
|
|
66 </inputs>
|
|
67
|
|
68 <!-- [REQUIRED] Output files -->
|
|
69 <outputs>
|
|
70 <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
|
|
71 <change_format>
|
|
72 <when input="export" value="freq" format="tabular" />
|
|
73 <when input="export" value="plink" format="txt" />
|
|
74 </change_format>
|
|
75 </data>
|
|
76 <data name="fileout_map" format="txt" label="${fileout_label}.map">
|
|
77 <filter>(export == 'plink')</filter>
|
|
78 </data>
|
|
79 <data name="filelog" format="txt" label="${fileout_label}.log" />
|
|
80 </outputs>
|
|
81
|
|
82 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
|
|
83 <stdio>
|
|
84 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
|
|
85 <exit_code range="1:" level="fatal" />
|
|
86 </stdio>
|
|
87
|
|
88 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
|
|
89 <tests>
|
|
90 <!-- [HELP] Test files have to be in the ~/test-data directory -->
|
|
91 <test>
|
|
92 <param name="filein" value="sample.vcf" />
|
|
93 <param name="chromosomes" value="chr1" />
|
|
94 <param name="export" value="VCF" />
|
|
95 <param name="frequency" value="0.001" />
|
|
96 <param name="max_freq" value="0.5" />
|
|
97 <param name="allow_missing" value="1" />
|
|
98 <param name="nb_alleles_min" value="2" />
|
|
99 <param name="nb_alleles_max" value="4" />
|
|
100 <param name="type_p" value="ALL" />
|
|
101 <param name="bound_start" value="1" />
|
|
102 <param name="bound_end" value="100000000" />
|
|
103 <output name="fileout" file="result.vcf" />
|
|
104 <output name="filelog" file="result.log" />
|
|
105 </test>
|
|
106 </tests>
|
|
107
|
|
108 <!-- [OPTIONAL] Help displayed in Galaxy -->
|
|
109 <help>
|
|
110
|
|
111 .. class:: infomark
|
|
112
|
1
|
113 **Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : .. _VCFtools: http://vcftools.sourceforge.net
|
0
|
114
|
1
|
115 | ** Please cite ** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, ** Bioinformatics **, 2011
|
0
|
116
|
|
117 .. class:: infomark
|
|
118
|
|
119 **Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.
|
|
120
|
|
121 .. class:: infomark
|
|
122
|
|
123 **Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr
|
|
124
|
|
125 ---------------------------------------------------
|
|
126
|
|
127
|
|
128
|
|
129 ================
|
|
130 VCF tools filter
|
|
131 ================
|
|
132
|
|
133 -----------
|
|
134 Description
|
|
135 -----------
|
|
136
|
1
|
137 | Filter VCF file
|
|
138 | For further informations on VCFtools, please visite the VCFtools website.
|
|
139 | .. _VCFtools: http://vcftools.sourceforge.net
|
0
|
140
|
|
141 -----------------
|
|
142 Workflow position
|
|
143 -----------------
|
|
144
|
|
145 **Upstream tools**
|
|
146
|
|
147 =========== ========================== =======
|
|
148 Name output file(s) format
|
|
149 =========== ========================== =======
|
|
150 =========== ========================== =======
|
|
151
|
|
152
|
|
153 **Downstream tools**
|
|
154
|
|
155 =========== ========================== =======
|
|
156 Name output file(s) format
|
|
157 =========== ========================== =======
|
|
158 =========== ========================== =======
|
|
159
|
|
160
|
|
161 ----------
|
|
162 Input file
|
|
163 ----------
|
|
164
|
|
165 VCF file
|
|
166 VCF file with all SNPs
|
|
167
|
|
168 ----------
|
|
169 Parameters
|
|
170 ----------
|
|
171
|
|
172 Output file basename
|
|
173 Prefix for the output VCF file
|
|
174
|
|
175 Samples
|
|
176 Samples to be analyzed. Comma separated list
|
|
177
|
|
178 Chromosomes
|
|
179 Chromosomes to be analyzed. Comma separated list
|
|
180
|
|
181 Output format
|
|
182 VCF/freq/plink
|
|
183
|
|
184 Minimum MAF
|
|
185 Minimum frequency
|
|
186
|
|
187 Maximum MAF
|
|
188 Maximum frequency
|
|
189
|
|
190 Missing data proportion
|
|
191 Allowed missing data proportion per site. Must be comprised between 0 and 1.
|
|
192
|
|
193 Number of alleles
|
|
194 Accepted number of alleles min and max.
|
|
195
|
|
196 Polymorphisms
|
|
197 Type of polymorphisms to keep (ALL/SNP/INDEL).
|
|
198 Bounds
|
|
199 Lower bound and upper bound for a range of sites to be processed.
|
|
200
|
|
201 ------------
|
|
202 Output files
|
|
203 ------------
|
|
204
|
|
205 VCF file
|
|
206 VCF file filtered
|
|
207
|
|
208 Log file
|
|
209
|
|
210 ---------------------------------------------------
|
|
211
|
|
212 ---------------
|
|
213 Working example
|
|
214 ---------------
|
|
215
|
|
216 Input files
|
|
217 ===========
|
|
218
|
|
219 VCF file
|
|
220 ---------
|
|
221
|
|
222 ::
|
|
223
|
|
224 #fileformat=VCFv4.1
|
|
225 #FILTER=<ID=LowQual,Description="Low quality">
|
|
226 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
227 [...]
|
|
228 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
229 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
|
|
230
|
|
231
|
|
232 Parameters
|
|
233 ==========
|
|
234
|
|
235 Output name -> filtered_chr1
|
|
236
|
|
237 Chromosomes -> chr1
|
|
238
|
|
239 Output format -> VCF
|
|
240
|
|
241 Minimum MAF -> 0.001
|
|
242
|
|
243 Maximum MAF -> 0.5
|
|
244
|
|
245 Missing data proportion -> 1
|
|
246
|
|
247 Number of alleles min -> 2
|
|
248
|
|
249 Number of alleles max -> 4
|
|
250
|
|
251 Polymorphisms -> All
|
|
252
|
|
253 Lower bound -> 1
|
|
254
|
|
255 Upper bound -> 100000000
|
|
256
|
|
257
|
|
258 Output files
|
|
259 ============
|
|
260
|
|
261 filtered_genelist_intron.vcf
|
|
262 ----------------------------
|
|
263
|
|
264 ::
|
|
265
|
|
266 #fileformat=VCFv4.1
|
|
267 #FILTER=<ID=LowQual,Description="Low quality">
|
|
268 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
269 [...]
|
|
270 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
271 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0
|
|
272
|
|
273
|
|
274 </help>
|
|
275
|
|
276 </tool>
|