comparison VCFToolFilter/vcfToolsFilter.xml @ 2:ac7c9e40d601 draft

Uploaded
author gandres
date Fri, 10 Jul 2015 04:16:17 -0400
parents
children 612066e3f57d
comparison
equal deleted inserted replaced
1:0f67ed444d47 2:ac7c9e40d601
1 <tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">
2
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
4 <description> </description>
5
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
7 <requirements>
8 <requirement type="binary">perl</requirement>
9 <requirement type="package" version="0.1.12b">vcftools</requirement>
10 </requirements>
11
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
13 <version_command>
14 <!--
15 tool_binary -v
16 -->
17 </version_command>
18
19 <!-- [REQUIRED] The command to execute -->
20 <command interpreter="perl">
21 vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
22 #if str( $samples ) == "":
23 'None'
24 #else
25 $samples
26 #end if
27 #if str( $chromosomes ) == "":
28 'None'
29 #else
30 $chromosomes
31 #end if
32 #if str( $export ) == "plink":
33 $fileout_map
34 #else
35 ''
36 #end if
37 </command>
38
39 <!-- [REQUIRED] Input files and tool parameters -->
40 <inputs>
41 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
42 <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
43 <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
44 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
45 </param>
46 <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
47 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
48 </param>
49 <param name="export" type="select" label="Output format" >
50 <option value="VCF" selected="true">VCF</option>
51 <option value="freq">freq</option>
52 <option value="plink">plink</option>
53 </param>
54 <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
55 <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
56 <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
57 <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
58 <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
59 <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
60 <option value="ALL" selected="true">All</option>
61 <option value="SNP">SNP</option>
62 <option value="INDEL">Indel</option>
63 </param>
64 <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
65 <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
66 </inputs>
67
68 <!-- [REQUIRED] Output files -->
69 <outputs>
70 <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
71 <change_format>
72 <when input="export" value="freq" format="tabular" />
73 <when input="export" value="plink" format="txt" />
74 </change_format>
75 </data>
76 <data name="fileout_map" format="txt" label="${fileout_label}.map">
77 <filter>(export == 'plink')</filter>
78 </data>
79 <data name="filelog" format="txt" label="${fileout_label}.log" />
80 </outputs>
81
82 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
83 <stdio>
84 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
85 <exit_code range="1:" level="fatal" />
86 </stdio>
87
88 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
89 <tests>
90 <!-- [HELP] Test files have to be in the ~/test-data directory -->
91 <test>
92 <param name="filein" value="sample.vcf" />
93 <param name="chromosomes" value="chr1" />
94 <param name="export" value="VCF" />
95 <param name="frequency" value="0.001" />
96 <param name="max_freq" value="0.5" />
97 <param name="allow_missing" value="1" />
98 <param name="nb_alleles_min" value="2" />
99 <param name="nb_alleles_max" value="4" />
100 <param name="type_p" value="ALL" />
101 <param name="bound_start" value="1" />
102 <param name="bound_end" value="100000000" />
103 <output name="fileout" file="result.vcf" />
104 <output name="filelog" file="result.log" />
105 </test>
106 </tests>
107
108 <!-- [OPTIONAL] Help displayed in Galaxy -->
109 <help>
110
111 .. class:: infomark
112
113 **Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : VCFtools_
114
115 .. _VCFtools: http://vcftools.sourceforge.net
116
117 | **Please cite** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, **Bioinformatics**, 2011
118
119 .. class:: infomark
120
121 **Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.
122
123 .. class:: infomark
124
125 **Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr
126
127 ---------------------------------------------------
128
129
130
131 ================
132 VCF tools filter
133 ================
134
135 -----------
136 Description
137 -----------
138
139 | Filter VCF file
140 | For further informations on VCFtools, please visite the VCFtools website_.
141
142 .. _website: http://vcftools.sourceforge.net
143
144 -----------------
145 Workflow position
146 -----------------
147
148 **Upstream tools**
149
150 =========== ========================== =======
151 Name output file(s) format
152 =========== ========================== =======
153 =========== ========================== =======
154
155
156 **Downstream tools**
157
158 =========== ========================== =======
159 Name output file(s) format
160 =========== ========================== =======
161 =========== ========================== =======
162
163
164 ----------
165 Input file
166 ----------
167
168 VCF file
169 VCF file with all SNPs
170
171 ----------
172 Parameters
173 ----------
174
175 Output file basename
176 Prefix for the output VCF file
177
178 Samples
179 Samples to be analyzed. Comma separated list
180
181 Chromosomes
182 Chromosomes to be analyzed. Comma separated list
183
184 Output format
185 VCF/freq/plink
186
187 Minimum MAF
188 Minimum frequency
189
190 Maximum MAF
191 Maximum frequency
192
193 Missing data proportion
194 Allowed missing data proportion per site. Must be comprised between 0 and 1.
195
196 Number of alleles
197 Accepted number of alleles min and max.
198
199 Polymorphisms
200 Type of polymorphisms to keep (ALL/SNP/INDEL).
201 Bounds
202 Lower bound and upper bound for a range of sites to be processed.
203
204 ------------
205 Output files
206 ------------
207
208 VCF file
209 VCF file filtered
210
211 Log file
212
213 ---------------------------------------------------
214
215 ---------------
216 Working example
217 ---------------
218
219 Input files
220 ===========
221
222 VCF file
223 ---------
224
225 ::
226
227 #fileformat=VCFv4.1
228 #FILTER=&lt;ID=LowQual,Description="Low quality">
229 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
230 [...]
231 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
232 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
233
234
235 Parameters
236 ==========
237
238 Output name -> filtered_chr1
239
240 Chromosomes -> chr1
241
242 Output format -> VCF
243
244 Minimum MAF -> 0.001
245
246 Maximum MAF -> 0.5
247
248 Missing data proportion -> 1
249
250 Number of alleles min -> 2
251
252 Number of alleles max -> 4
253
254 Polymorphisms -> All
255
256 Lower bound -> 1
257
258 Upper bound -> 100000000
259
260
261 Output files
262 ============
263
264 filtered_genelist_intron.vcf
265 ----------------------------
266
267 ::
268
269 #fileformat=VCFv4.1
270 #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
271 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
272 [...]
273 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
274 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0
275
276
277 </help>
278 <citations>
279 <!-- [HELP] As DOI or BibTex entry -->
280 <citation type="bibtex">
281 @article{Danecek01082011,
282 author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group},
283 title = {The variant call format and VCFtools},
284 volume = {27},
285 number = {15},
286 pages = {2156-2158},
287 year = {2011},
288 doi = {10.1093/bioinformatics/btr330},
289 abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk},
290 URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract},
291 eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html},
292 journal = {Bioinformatics}
293 }
294 </citation>
295
296 </citations>
297
298 </tool>