comparison STACKS_genotypes.xml @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 <tool id="STACKSgenotypes" name="STACKS : genotypes" force_history_refresh="True">
2 <description>Run the STACKS genotypes program</description>
3
4
5 <requirements>
6 <requirement type="package" version="1.18">stacks</requirement>
7 </requirements>
8
9 <command interpreter="python">
10
11 STACKS_genotypes.py
12 -P $STACKS_archive
13 -b $batchid
14 -c $corrections
15 #if str( $options_output.options_output_selector ) == "1":
16 -t $options_output.map
17 -o $options_output.filetype
18 #end if
19 #if str( $options_enzyme.options_enzyme_selector ) == "1":
20 -e $options_enzyme.enzyme
21 #end if
22 --active_advanced $active_advanced
23 -r $advanced_options.minprogeny
24 -m $advanced_options.mindepth
25 #if str( $advanced_options.blacklistselect.advanced_blackoptions_selector) == "advanced"
26 -B $advanced_options.blacklistselect.blacklist
27 #end if
28 #if str( $advanced_options.whitelistselect.advanced_whiteoptions_selector) == "advanced"
29 -W $advanced_options.whitelistselect.whitelist
30 #end if
31 --active_autocorrect $active_autocorrect
32 --min_hom_seqs $options_autocorrect.hom
33 --min_het_seqs $options_autocorrect.het
34 --max_het_seqs $options_autocorrect.hetmax
35 --logfile $output
36 --compress_output $output_compress
37 ##additionnal outputs
38 --total_output $total_output
39
40
41 </command>
42
43 <inputs>
44 <param name="STACKS_archive" format="zip,tar.gz" type="data" label="Archive from STACKS pipeline regrouping all outputs" />
45 <param name="batchid" type="integer" value="1" label="Batch ID" help="Batch ID to examine when exporting from the catalog" />
46 <conditional name="options_output">
47 <param name="options_output_selector" type="select" label="Did you want to use the file type output option?">
48 <option value="1">Yes</option>
49 <option value="2" selected="true">No</option>
50 </param>
51 <when value="1">
52 <param name="map" type="select" format="text" label="map type" help="map type to write. 'CP', 'DH', 'F2', 'BC1', and 'GEN' are the currently supported map types" >
53 <option value="CP">CP</option>
54 <option value="DH">DH</option>
55 <option value="F2">F2</option>
56 <option value="BC1">BC1</option>
57 <option value="GEN">GEN</option>
58 </param>
59 <param name="filetype" type="select" format="text" label="output file type" help="output file type to write, 'joinmap', 'onemap', 'rqtl', and 'genomic' are currently supported" >
60 <option value="joinmap">joinmap</option>
61 <option value="onemap">onemap</option>
62 <option value="rqtl">rqtl</option>
63 <option value="genomic">genomic</option>
64 </param>
65 </when>
66 <when value="2">
67 </when>
68 </conditional>
69 <conditional name="options_enzyme">
70 <param name="options_enzyme_selector" type="select" label="Did you want to use the genomic output option?">
71 <option value="1">Yes</option>
72 <option value="2" selected="true">No</option>
73 </param>
74 <when value="1">
75 <param name="enzyme" type="select" format="text" label="provide the restriction enzyme used" help="required if generating genomic output" >
76 <option value="apeKI">apeKI</option>
77 <option value="bamHI">bamHI</option>
78 <option value="claI">claI</option>
79 <option value="dpnII">dpnII</option>
80 <option value="eaeI">eaeI</option>
81 <option value="ecoRI">ecoRI</option>
82 <option value="ecoT22I">ecoT22I</option>
83 <option value="hindIII">hindIII</option>
84 <option value="mluCI">mluCI</option>
85 <option value="mseI">mseI</option>
86 <option value="mspI">mspI</option>
87 <option value="ndeI">ndeI</option>
88 <option value="nlaIII">nlaIII</option>
89 <option value="notI">notI</option>
90 <option value="nsiI">nsiI</option>
91 <option value="pstI">pstI</option>
92 <option value="sau3AI">sau3AI</option>
93 <option value="sbfI">sbfI</option>
94 <option value="sexAI">sexAI</option>
95 <option value="sgrAI">sgrAI</option>
96 <option value="sphI">sphI</option>
97 <option value="taqI">taqI</option>
98 <option value="xbaI">xbaI</option>
99 </param>
100 </when>
101 <when value="2">
102 </when>
103 </conditional>
104 <param name="corrections" type="boolean" checked="false" default="false" label="Activate automated corrections" />
105 <param name="active_autocorrect" type="boolean" checked="false" label="Activate automated corrections advanced options" help="autocorrect options are defined below" />
106 <section name="options_autocorrect" title="autocorrect options" expanded="False">
107 <param name="hom" type="integer" value="5" label="min number of reads for homozygous genotype" help="minimum number of reads required at a stack to call a homozygous genotype (default 5)" />
108 <param name="het" type="float" value="0.05" label="homozygote minor minimum allele frequency" help="below this minor allele frequency a stack is called a homozygote, above it (next choice) it is called unknown (default 0.05)" />
109 <param name="hetmax" type="float" value="0.1" label="heterozygote minor minimum allele frequency" help=" minimum frequency of minor allele to call a heterozygote (default 0.1)" />
110 </section>
111 <!-- Output options -->
112 <param name="active_advanced" type="boolean" checked="false" label="Activate advanced options" help="advanced options are defined below" />
113 <section name="advanced_options" title="advanced options" expanded="False">
114 <conditional name="whitelistselect">
115 <param name="advanced_whiteoptions_selector" type="select" label="whitelist advanced options">
116 <option value="default" selected="true">Default</option>
117 <option value="advanced">Advanced</option>
118 </param>
119 <when value="default"></when>
120 <when value="advanced">
121 <param name="whitelist" format="txt, tabular" type="data" label="specify a file containing Whitelisted markers to include in the export" />
122 </when>
123 </conditional>
124 <conditional name="blacklistselect">
125 <param name="advanced_blackoptions_selector" type="select" label="blacklist advanced options">
126 <option value="default" selected="true">Default</option>
127 <option value="advanced">Advanced</option>
128 </param>
129 <when value="default"></when>
130 <when value="advanced">
131 <param name="blacklist" format="txt, tabular" type="data" label="specify a file containing Blacklisted markers to be excluded from the export" />
132 </when>
133 </conditional>
134 <param name="minprogeny" type="integer" value="50" label="min number of progeny" help="minimum number of progeny required to print a marker" />
135 <param name="mindepth" type="integer" value="1" label="min stack depth" help="specify a minimum stack depth required for individuals at a locus" />
136 </section>
137 <param name="output_compress" type="select" label="Output type" help="please see below for details">
138 <option value="default" selected="true">No compression</option>
139 <option value="total">Compressed all outputs</option>
140 </param>
141 </inputs>
142 <outputs>
143
144 <data format="tabular" name="output" label="results with ${tool.name} on ${on_string}" />
145
146 <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true">
147 <discover_datasets pattern="__designation__" ext="tabular" directory="galaxy_outputs" visible="true" />
148 </data>
149
150
151 <!-- additionnal output archives -->
152 <data format="zip" name="total_output" label="total_output.zip with ${tool.name} on ${on_string}" >
153 <filter>output_compress == "total"</filter>
154 </data>
155
156 </outputs>
157
158 <stdio>
159 <exit_code range="1" level="fatal" description="Error in Stacks Denovo execution" />
160 </stdio>
161 <help>
162
163 .. class:: infomark
164
165 **What it does**
166
167 This program exports a Stacks data set either as a set of observed haplotypes at each locus in the population, or with the haplotypes encoded into genotypes. The -r option allows only loci that exist in a certain number of population individuals to be exported. In a mapping context, raising or lowering this limit is an effective way to control the quality level of markers exported as genuine markers will be found in a large number of progeny. If exporting a set of observed haplotypes in a population, the "min stack depth" option can be used to restict exported loci to those that have a minimum depth of reads.
168
169 By default, when executing the pipeline (either denovo_map or ref_map) the genotypes program will be executed last and will identify mappable markers in the population and export both a set of observed haplotypes and a set of generic genotypes with "min number of progeny" option = 1.
170
171
172 Making Corrections
173
174 If enabled with the "make automated corrections to the data" option, the genotypes program will make automated corrections to the data. Since loci are matched up in the population, the script can correct false-negative heterozygote alleles since it knows the existence of alleles at a particular locus in the other individuals. For example, the program will identify loci with SNPs that didn’t have high enough coverage to be identified by the SNP caller. It will also check that homozygous tags have a minimum depth of coverage, since a low-coverage polymorphic locus may appear homozygous simply because the other allele wasn’t sequenced.
175
176
177 Correction Thresholds
178
179 The thresholds for automatic corrections can be modified by using the "automated corrections option" and changing the default values for the "min number of reads for homozygous genotype", "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" parameters to genotypes. "min number of reads for homozygous genotype" is the minimum number of reads required to consider a stack homozygous (default of 5). The "homozygote minor minimum allele frequency" and "heterozygote minor minimum allele frequency" variables represent fractions. If the ratio of the depth of the the smaller allele to the bigger allele is greater than "heterozygote minor minimum allele frequency" (default of 1/10) a stack is called a het. If the ratio is less than homozygote minor minimum allele frequency (default of 1/20) a stack is called homozygous. If the ratio is in between the two values it is unknown and a genotype will not be assigned.
180
181 Automated corrections made by the program are shown in the output file in capital letters.
182
183 --------
184
185 **Created by:**
186
187 Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko
188
189 --------
190
191 **Example:**
192
193 Input files:
194
195 FASTQ, FASTA, zip, tar.gz
196
197 Output files:
198
199 - XXX.tags.tsv file::
200
201 Column Name Description
202 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
203 2 Sample ID Each sample passed through Stacks gets a unique id for that sample.
204 3 Stack ID Each stack formed gets an ID.
205 4 Chromosome If aligned to a reference genome using pstacks, otherwise it is blank.
206 5 Basepair If aligned to ref genome using pstacks.
207 6 Strand If aligned to ref genome using pstacks.
208 7 Sequence Type Either 'consensus', 'primary' or 'secondary', see the Stacks paper for definitions of these terms.
209 8 Sequence ID The individual sequence read that was merged into this stack.
210 9 Sequence The raw sequencing read.
211 10 Deleveraged Flag If "1", this stack was processed by the deleveraging algorithm and was broken down from a larger stack.
212 11 Blacklisted Flag If "1", this stack was still confounded depsite processing by the deleveraging algorithm.
213 12 Lumberja ckstack Flag If "1", this stack was set aside due to having an extreme depth of coverage.
214
215 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
216
217
218 - XXX.snps.tsv file::
219
220 Column Name Description
221 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
222 2 Sample ID
223 3 Stack ID
224 4 SNP Column
225 5 Likelihood ratio From the SNP-calling model.
226 6 Rank_1 Majority nucleotide.
227 7 Rank_2 Alternative nucleotide.
228
229 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
230
231
232 - XXX.alleles.tsv file::
233
234 Column Name Description
235 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
236 2 Sample ID
237 3 Stack ID
238 4 Haplotype The haplotype, as constructed from the called SNPs at each locus.
239 5 Percent Percentage of reads that have this haplotype
240 6 Count Raw number of reads that have this haplotype
241
242
243 - XXX.matches.tsv file::
244
245 Column Name Description
246 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
247 2 Batch ID
248 3 Catalog ID
249 4 Sample ID
250 5 Stack ID
251 6 Haplotype
252 7 Stack Depth
253
254 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
255
256
257 - batch_X.sumstats.tsv Summary Statistics Output::
258
259 Batch ID The batch identifier for this data set.
260 Locus ID Catalog locus identifier.
261 Chromosome If aligned to a reference genome.
262 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
263 Column The nucleotide site within the catalog locus.
264 Population ID The ID supplied to the populations program, as written in the population map file.
265 P Nucleotide The most frequent allele at this position in this population.
266 Q Nucleotide The alternative allele.
267 Number of Individuals Number of individuals sampled in this population at this site.
268 P Frequency of most frequent allele.
269 Observed Heterozygosity The proportion of individuals that are heterozygotes in this population.
270 Observed Homozygosity The proportion of individuals that are homozygotes in this population.
271 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
272 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
273 pi An estimate of nucleotide diversity.
274 Smoothed pi A weighted average of p depending on the surrounding 3s of sequence in both directions.
275 Smoothed pi P-value If bootstrap resampling is enabled, a p-value ranking the significance of p within this population.
276 FIS The inbreeding coefficient of an individual (I) relative to the subpopulation (S).
277 Smoothed FIS A weighted average of FIS depending on the surrounding 3s of sequence in both directions.
278 Smoothed FIS P-value If bootstrap resampling is enabled, a p-value ranking the significance of FIS within this population.
279 Private allele True (1) or false (0), depending on if this allele is only occurs in this population.
280
281 - batch_X.fst_Y-Z.tsv Pairwise FST Output::
282
283 Batch ID The batch identifier for this data set.
284 Locus ID Catalog locus identifier.
285 Population ID 1 The ID supplied to the populations program, as written in the population map file.
286 Population ID 2 The ID supplied to the populations program, as written in the population map file.
287 Chromosome If aligned to a reference genome.
288 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
289 Column The nucleotide site within the catalog locus.
290 Overall pi An estimate of nucleotide diversity across the two populations.
291 FST A measure of population differentiation.
292 FET p-value P-value describing if the FST measure is statistically significant according to Fisher's Exact Test.
293 Odds Ratio Fisher's Exact Test odds ratio
294 CI High Fisher's Exact Test confidence interval.
295 CI Low Fisher's Exact Test confidence interval.
296 LOD Score Logarithm of odds score.
297 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
298 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
299 Corrected FST FST with either the FET p-value, or a window-size or genome size Bonferroni correction.
300 Smoothed FST A weighted average of FST depending on the surrounding 3s of sequence in both directions.
301 Smoothed FST P-value If bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.
302
303
304 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .
305
306 --------
307
308 **Output type:**
309
310 - Output type details::
311
312 No compression All files will be added in the current history.
313 Compressed by categories Files will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.
314 Compressed all outputs All files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.
315
316
317 --------
318
319 **Project links:**
320
321 `STACKS website &lt;http://creskolab.uoregon.edu/stacks/&gt;`_ .
322
323 `STACKS manual &lt;http://creskolab.uoregon.edu/stacks/stacks_manual.pdf&gt;`_ .
324
325 `STACKS google group &lt;https://groups.google.com/forum/#!forum/stacks-users&gt;`_ .
326
327 --------
328
329 **References:**
330
331 -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.
332
333 -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.
334
335 -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.
336
337 -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011.
338
339 -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.
340
341 -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.
342
343 --------
344
345 **Integrated by:**
346
347 Yvan Le Bras and Cyril Monjeaud
348
349 GenOuest Bio-informatics Core Facility
350
351 UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
352
353 support@genouest.org
354
355 If you use this tool in Galaxy, please cite :
356
357 `Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
358
359
360 </help>
361 <citations>
362 <citation type="doi">10.1111/mec.12354</citation>
363 <citation type="doi">10.1111/mec.12330</citation>
364 <citation type="doi">10.1534/g3.111.000240</citation>
365 <citation type="doi">10.1534/genetics.111.127324</citation>
366 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
367 <citation type="doi">10.1073/pnas.1006538107</citation>
368
369 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
370 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
371 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
372 booktitle = {JOBIM 2013 Proceedings},
373 year = {2013},
374 url = {https://www.e-biogenouest.org/resources/128},
375 pages = {97-106}
376 }</citation>
377 </citations>
378 </tool>
379