comparison vcftools_main/VCFToolFilter/vcfToolsFilter.xml @ 0:3b1436a9a6e5 draft

Uploaded
author gandres
date Thu, 02 Jul 2015 05:21:40 -0400
parents
children 0f67ed444d47
comparison
equal deleted inserted replaced
-1:000000000000 0:3b1436a9a6e5
1 <tool id="sniplay_vcftoolsfilter" name="VCFtools Filter" version="1.1.1">
2
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
4 <description> </description>
5
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
7 <requirements>
8 <requirement type="binary">perl</requirement>
9 <requirement type="package" version="0.1.12b">vcftools</requirement>
10 </requirements>
11
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
13 <version_command>
14 <!--
15 tool_binary -v
16 -->
17 </version_command>
18
19 <!-- [REQUIRED] The command to execute -->
20 <command interpreter="perl">
21 vcfToolsFilter.sh $filein $fileout_label $fileout $filelog $export $frequency $max_freq $allow_missing $nb_alleles_min $nb_alleles_max $type_p $bound_start $bound_end
22 #if str( $samples ) == "":
23 'None'
24 #else
25 $samples
26 #end if
27 #if str( $chromosomes ) == "":
28 'None'
29 #else
30 $chromosomes
31 #end if
32 #if str( $export ) == "plink":
33 $fileout_map
34 #else
35 ''
36 #end if
37 </command>
38
39 <!-- [REQUIRED] Input files and tool parameters -->
40 <inputs>
41 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
42 <param name="fileout_label" type="text" value="filtered" optional="false" label="Output file basename"/>
43 <param name="samples" type="text" optional="true" label="Samples" help="Samples to be analyzed. Comma separated list">
44 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
45 </param>
46 <param name="chromosomes" type="text" optional="true" label="Chromosomes" help="Chromosomes to be analyzed. Comma separated list">
47 <validator type="regex" message="Please enter a comma separated list.">^\w+(,\w+)*$</validator>
48 </param>
49 <param name="export" type="select" label="Output format" >
50 <option value="VCF" selected="true">VCF</option>
51 <option value="freq">freq</option>
52 <option value="plink">plink</option>
53 </param>
54 <param name="frequency" type="float" value="0.001" label="Minimum MAF." help="Minimum frequency." />
55 <param name="max_freq" type="float" value="0.5" label="Maximum MAF." help="Maximum frequency." />
56 <param name="allow_missing" type="float" value="1" min="0" max="1" label="Missing data proportion" help="Allowed missing data proportion per site. Must be comprised between 0 and 1." />
57 <param name="nb_alleles_min" type="integer" value="2" label="Minimum number of alleles" help="Minimum accepted number of alleles." min="2" max="4" />
58 <param name="nb_alleles_max" type="integer" value="2" label="Maximum number of alleles" help="Maximum accepted number of alleles." min="2" max="4" />
59 <param name="type_p" type="select" label="Polymorphisms" help="Type of polymorphisms to keep." >
60 <option value="ALL" selected="true">All</option>
61 <option value="SNP">SNP</option>
62 <option value="INDEL">Indel</option>
63 </param>
64 <param name="bound_start" type="integer" value="1" label="Lower bound" help="Lower bound for a range of sites to be processed." />
65 <param name="bound_end" type="integer" value="100000000" label="Upper bound" help="Upper bound for a range of sites to be processed." />
66 </inputs>
67
68 <!-- [REQUIRED] Output files -->
69 <outputs>
70 <data name="fileout" format="vcf" label="${fileout_label}.#if str($export)=='plink' then 'ped' else '' # #if str($export)=='freq' then 'frq' else '' # #if str($export)=='VCF' then 'vcf' else '' #" >
71 <change_format>
72 <when input="export" value="freq" format="tabular" />
73 <when input="export" value="plink" format="txt" />
74 </change_format>
75 </data>
76 <data name="fileout_map" format="txt" label="${fileout_label}.map">
77 <filter>(export == 'plink')</filter>
78 </data>
79 <data name="filelog" format="txt" label="${fileout_label}.log" />
80 </outputs>
81
82 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
83 <stdio>
84 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
85 <exit_code range="1:" level="fatal" />
86 </stdio>
87
88 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
89 <tests>
90 <!-- [HELP] Test files have to be in the ~/test-data directory -->
91 <test>
92 <param name="filein" value="sample.vcf" />
93 <param name="chromosomes" value="chr1" />
94 <param name="export" value="VCF" />
95 <param name="frequency" value="0.001" />
96 <param name="max_freq" value="0.5" />
97 <param name="allow_missing" value="1" />
98 <param name="nb_alleles_min" value="2" />
99 <param name="nb_alleles_max" value="4" />
100 <param name="type_p" value="ALL" />
101 <param name="bound_start" value="1" />
102 <param name="bound_end" value="100000000" />
103 <output name="fileout" file="result.vcf" />
104 <output name="filelog" file="result.log" />
105 </test>
106 </tests>
107
108 <!-- [OPTIONAL] Help displayed in Galaxy -->
109 <help>
110
111 .. class:: infomark
112
113 **Authors** Dereeper Alexis
114
115 | Dereeper et al. 2015 in prep.
116
117
118 .. class:: infomark
119
120 **Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.
121
122 .. class:: infomark
123
124 **Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr
125
126 ---------------------------------------------------
127
128
129
130 ================
131 VCF tools filter
132 ================
133
134 -----------
135 Description
136 -----------
137
138 Filter VCF file
139
140 -----------------
141 Workflow position
142 -----------------
143
144 **Upstream tools**
145
146 =========== ========================== =======
147 Name output file(s) format
148 =========== ========================== =======
149 =========== ========================== =======
150
151
152 **Downstream tools**
153
154 =========== ========================== =======
155 Name output file(s) format
156 =========== ========================== =======
157 =========== ========================== =======
158
159
160 ----------
161 Input file
162 ----------
163
164 VCF file
165 VCF file with all SNPs
166
167 ----------
168 Parameters
169 ----------
170
171 Output file basename
172 Prefix for the output VCF file
173
174 Samples
175 Samples to be analyzed. Comma separated list
176
177 Chromosomes
178 Chromosomes to be analyzed. Comma separated list
179
180 Output format
181 VCF/freq/plink
182
183 Minimum MAF
184 Minimum frequency
185
186 Maximum MAF
187 Maximum frequency
188
189 Missing data proportion
190 Allowed missing data proportion per site. Must be comprised between 0 and 1.
191
192 Number of alleles
193 Accepted number of alleles min and max.
194
195 Polymorphisms
196 Type of polymorphisms to keep (ALL/SNP/INDEL).
197 Bounds
198 Lower bound and upper bound for a range of sites to be processed.
199
200 ------------
201 Output files
202 ------------
203
204 VCF file
205 VCF file filtered
206
207 Log file
208
209 ---------------------------------------------------
210
211 ---------------
212 Working example
213 ---------------
214
215 Input files
216 ===========
217
218 VCF file
219 ---------
220
221 ::
222
223 #fileformat=VCFv4.1
224 #FILTER=&lt;ID=LowQual,Description="Low quality">
225 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
226 [...]
227 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
228 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
229
230
231 Parameters
232 ==========
233
234 Output name -> filtered_chr1
235
236 Chromosomes -> chr1
237
238 Output format -> VCF
239
240 Minimum MAF -> 0.001
241
242 Maximum MAF -> 0.5
243
244 Missing data proportion -> 1
245
246 Number of alleles min -> 2
247
248 Number of alleles max -> 4
249
250 Polymorphisms -> All
251
252 Lower bound -> 1
253
254 Upper bound -> 100000000
255
256
257 Output files
258 ============
259
260 filtered_genelist_intron.vcf
261 ----------------------------
262
263 ::
264
265 #fileformat=VCFv4.1
266 #FILTER=&lt;ID=LowQual,Description="Low quality"&gt;
267 #FORMAT=&lt;ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
268 [...]
269 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
270 chr1 5059 . C G 146.84 . AC=2;AF=1.00;AN=2;DP=8;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=24.14;MQ0=1;QD=18.35;EFF=INTRON(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,8:8:18:175,18,0
271
272
273 </help>
274
275 </tool>