2
|
1 <tool id="sniplay_vcftoolsstats" name="VCFtools Stats" version="1.0.0">
|
|
2
|
|
3 <!-- [REQUIRED] Tool description displayed after the tool name -->
|
|
4 <description> </description>
|
|
5
|
|
6 <!-- [OPTIONAL] 3rd party tools, binaries, modules... required for the tool to work -->
|
|
7 <requirements>
|
|
8 <requirement type="binary">perl</requirement>
|
|
9 <requirement type="package" version="0.1.12b">vcftools</requirement>
|
|
10 </requirements>
|
|
11
|
|
12 <!-- [OPTIONAL] Command to be executed to get the tool's version string -->
|
|
13 <version_command>
|
|
14 <!--
|
|
15 tool_binary -v
|
|
16 -->
|
|
17 </version_command>
|
|
18
|
|
19 <!-- [REQUIRED] The command to execute -->
|
|
20 <command interpreter="perl">
|
|
21 vcfToolsStats.sh $filein $fileout_label $fileout_annot $fileout_het $fileout_imiss $fileout_sum $filelog
|
|
22 </command>
|
|
23
|
|
24 <!-- [REQUIRED] Input files and tool parameters -->
|
|
25 <inputs>
|
|
26 <param name="filein" type="data" format="vcf" optional="false" label="VCF input" />
|
|
27 <param name="fileout_label" type="text" value="vcf_stats" optional="false" label="Output file basename"/>
|
|
28 </inputs>
|
|
29
|
|
30 <!-- [REQUIRED] Output files -->
|
|
31 <outputs>
|
|
32 <data name="fileout_annot" format="txt" label="${fileout_label}.annotation" />
|
|
33 <data name="fileout_het" format="txt" label="${fileout_label}.het" />
|
|
34 <data name="fileout_imiss" format="txt" label="${fileout_label}.imiss" />
|
|
35 <data name="fileout_sum" format="txt" label="${fileout_label}.TsTv.summary" />
|
|
36 <data name="filelog" format="txt" label="${fileout_label}.log" />
|
|
37 </outputs>
|
|
38
|
|
39 <!-- [STRONGLY RECOMMANDED] Exit code rules -->
|
|
40 <stdio>
|
|
41 <!-- [HELP] If no exit code rule is defined, the tool will stop if anything is written to STDERR -->
|
|
42 <exit_code range="1:" level="fatal" />
|
|
43 </stdio>
|
|
44
|
|
45 <!-- [OPTIONAL] Tests to be run manually by the Galaxy admin -->
|
|
46 <tests>
|
|
47 <!-- [HELP] Test files have to be in the ~/test-data directory -->
|
|
48 <test>
|
|
49 <param name="filein" value="sample.vcf" />
|
|
50 <output name="fileout_annot" file="result.annotation" />
|
|
51 <output name="fileout_het" file="result.het" />
|
|
52 <output name="fileout_imiss" file="result.imiss" />
|
|
53 <output name="fileout_sum" file="result.TsTv.summary" />
|
|
54 <output name="filelog" file="result.log" />
|
|
55 </test>
|
|
56 </tests>
|
|
57
|
|
58 <!-- [OPTIONAL] Help displayed in Galaxy -->
|
|
59 <help>
|
|
60
|
|
61 .. class:: infomark
|
|
62
|
|
63 **Authors** Adam Auton, Petr Danecek and Anthony Marcketta (C++ Module) : VCFtools_
|
|
64
|
|
65 .. _VCFtools: http://vcftools.sourceforge.net
|
|
66
|
|
67 | **Please cite** "The Variant Call Format and VCFtools", Petr Danecek, Adam Auton, Goncalo Abecasis, Cornelis A. Albers, Eric Banks, Mark A. DePristo, Robert Handsaker, Gerton Lunter, Gabor Marth, Stephen T. Sherry, Gilean McVean, Richard Durbin and 1000 Genomes Project Analysis Group, **Bioinformatics**, 2011
|
|
68
|
|
69 .. class:: infomark
|
|
70
|
|
71 **Galaxy integration** Andres Gwendoline, Institut Français de Bioinformatique.
|
|
72
|
|
73 .. class:: infomark
|
|
74
|
|
75 **Support** For any questions about Galaxy integration, please send an e-mail to support.abims@sb-roscoff.fr
|
|
76
|
|
77 ---------------------------------------------------
|
|
78
|
|
79
|
|
80 ================
|
|
81 VCF tools filter
|
|
82 ================
|
|
83
|
|
84 -----------
|
|
85 Description
|
|
86 -----------
|
|
87
|
|
88 | Compute statistics on VCF file
|
|
89 | For further informations on VCFtools, please visite the VCFtools website_.
|
|
90
|
|
91 .. _website: http://vcftools.sourceforge.net
|
|
92
|
|
93 -----------------
|
|
94 Workflow position
|
|
95 -----------------
|
|
96
|
|
97 **Upstream tools**
|
|
98
|
|
99 =========== ========================== =======
|
|
100 Name output file(s) format
|
|
101 =========== ========================== =======
|
|
102 =========== ========================== =======
|
|
103
|
|
104
|
|
105 **Downstream tools**
|
|
106
|
|
107 =========== ========================== =======
|
|
108 Name output file(s) format
|
|
109 =========== ========================== =======
|
|
110 =========== ========================== =======
|
|
111
|
|
112
|
|
113 ----------
|
|
114 Input file
|
|
115 ----------
|
|
116
|
|
117 VCF file
|
|
118 VCF file with all SNPs
|
|
119
|
|
120 ----------
|
|
121 Parameters
|
|
122 ----------
|
|
123
|
|
124 Output file basename
|
|
125 Prefix for the output VCF file
|
|
126
|
|
127 ------------
|
|
128 Output files
|
|
129 ------------
|
|
130
|
|
131 .annotation file
|
|
132 Statistics on annotation/location along genome
|
|
133
|
|
134 .het file
|
|
135 Statistics on heterozygosity of the individuals
|
|
136
|
|
137 .imiss
|
|
138 Statistics on missing data of the inidividuals
|
|
139 .TsTv.summary
|
|
140 Statistics on mutation types and transition/transvertion number
|
|
141
|
|
142 .log file
|
|
143
|
|
144 ---------------------------------------------------
|
|
145
|
|
146 ---------------
|
|
147 Working example
|
|
148 ---------------
|
|
149
|
|
150 Input files
|
|
151 ===========
|
|
152
|
|
153 VCF file
|
|
154 ---------
|
|
155
|
|
156 ::
|
|
157
|
|
158 #fileformat=VCFv4.1
|
|
159 #FILTER=<ID=LowQual,Description="Low quality">
|
|
160 #FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
|
|
161 [...]
|
|
162 CHROM POS ID REF ALT QUAL FILTER INFO FORMAT CATB1
|
|
163 chr1 2209 . G T 213.84 . AC=2;AF=1.00;AN=2;DP=7;Dels=0.00;FS=0.000;HaplotypeScore=0.0000;MLEAC=2;MLEAF=1.00;MQ=41.50;MQ0=0;QD=30.55;EFF=DOWNSTREAM(MODIFIER||||Cc01g00020|mRNA||GSCOCT00012438001|),UPSTREAM(MODIFIER||||Cc01g00010|mRNA||GSCOCT00012439001|) GT:AD:DP:GQ:PL 1/1:0,7:7:18:242,18,0
|
|
164
|
|
165
|
|
166 Parameters
|
|
167 ==========
|
|
168
|
|
169 Output name -> vcf_stat
|
|
170
|
|
171
|
|
172 Output files
|
|
173 ============
|
|
174
|
|
175 .annotation file
|
|
176 ----------------
|
|
177
|
|
178 ::
|
|
179
|
|
180 Genic 4489
|
|
181 Intergenic 466
|
|
182 ========
|
|
183 Intron 960
|
|
184 Exon 3248
|
|
185 UTR 281
|
|
186 ========
|
|
187 Non-syn 226
|
|
188 Synonym 3022
|
|
189
|
|
190 .het file
|
|
191 ---------
|
|
192
|
|
193 ::
|
|
194
|
|
195 INDV O(HOM) E(HOM) N_SITES F
|
|
196 CATB1 0 0.0 3616 0.00000
|
|
197
|
|
198 .imiss file
|
|
199 -----------
|
|
200
|
|
201 ::
|
|
202
|
|
203 INDV N_DATA N_GENOTYPES_FILTERED N_MISS F_MISS
|
|
204 CATB1 4813 0 0 0
|
|
205
|
|
206 .TsTv.summary file
|
|
207 ------------------
|
|
208
|
|
209 ::
|
|
210
|
|
211 MODEL COUNT
|
|
212 AC 371
|
|
213 AG 1467
|
|
214 AT 562
|
|
215 CG 330
|
|
216 CT 1659
|
|
217 GT 397
|
|
218 Ts 3126
|
|
219 Tv 1660
|
|
220
|
|
221
|
|
222 </help>
|
|
223 <citations>
|
|
224 <!-- [HELP] As DOI or BibTex entry -->
|
|
225 <citation type="bibtex">
|
|
226 @article{Danecek01082011,
|
|
227 author = {Danecek, Petr and Auton, Adam and Abecasis, Goncalo and Albers, Cornelis A. and Banks, Eric and DePristo, Mark A. and Handsaker, Robert E. and Lunter, Gerton and Marth, Gabor T. and Sherry, Stephen T. and McVean, Gilean and Durbin, Richard and 1000 Genomes Project Analysis Group},
|
|
228 title = {The variant call format and VCFtools},
|
|
229 volume = {27},
|
|
230 number = {15},
|
|
231 pages = {2156-2158},
|
|
232 year = {2011},
|
|
233 doi = {10.1093/bioinformatics/btr330},
|
|
234 abstract ={Summary: The variant call format (VCF) is a generic format for storing DNA polymorphism data such as SNPs, insertions, deletions and structural variants, together with rich annotations. VCF is usually stored in a compressed manner and can be indexed for fast data retrieval of variants from a range of positions on the reference genome. The format was developed for the 1000 Genomes Project, and has also been adopted by other projects such as UK10K, dbSNP and the NHLBI Exome Project. VCFtools is a software suite that implements various utilities for processing VCF files, including validation, merging, comparing and also provides a general Perl API.Availability: http://vcftools.sourceforge.netContact: rd@sanger.ac.uk},
|
|
235 URL = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.abstract},
|
|
236 eprint = {http://bioinformatics.oxfordjournals.org/content/27/15/2156.full.pdf+html},
|
|
237 journal = {Bioinformatics}
|
|
238 }
|
|
239 </citation>
|
|
240
|
|
241 </citations>
|
|
242
|
|
243 </tool>
|