comparison STACKS_population.xml @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 <tool id="STACKSpopulation" name="STACKS : populations" force_history_refresh="True">
2 <description>Run the STACKS populations program</description>
3
4
5 <requirements>
6 <requirement type="package" version="1.18">stacks</requirement>
7 </requirements>
8
9 <command interpreter="python">
10
11 STACKS_population.py
12 -P $STACKS_archive
13 -b $batchid
14 -M $popmap
15
16 #if $options_kernel.kernel
17 -k
18 --window_size $options_kernel.window
19 #end if
20
21 #if str( $options_enzyme.options_enzyme_selector ) == "1":
22 -e $options_enzyme.enzyme
23 --genomic $options_enzyme.genomic
24 #end if
25
26 ## advanced options
27 --advanced_options_activate $advanced_options_activate
28 #if $advanced_options_activate
29 -r $advanced_options.minperc
30 -p $advanced_options.minpop
31 -m $advanced_options.mindepth
32 -a $advanced_options.minminor
33 #if str( $advanced_options.correction_select.correction ) != "no_corr":
34 -f $advanced_options.correction_select.correction
35 --p_value_cutoff $advanced_options.correction_select.pcutoff
36 #end if
37 #if str( $advanced_options.blacklistselect.advanced_blackoptions_selector) == "advanced"
38 -B $advanced_options.blacklistselect.blacklist
39 #end if
40 #if str( $advanced_options.whitelistselect.advanced_whiteoptions_selector) == "advanced"
41 -W $advanced_options.whitelistselect.whitelist
42 #end if
43 #if str( $advanced_options.bootstrapresampling.advanced_bootoptions_selector) == "advanced"
44 --bootstrap $advanced_options.bootstrapresampling.bootstrap
45 --bootstrap_reps $advanced_options.bootstrapresampling.bootstrapreps
46 #end if
47 #end if
48
49 ## output files
50 --ss $sumstatssum
51 --s $sumstats
52 --fst_output $outfst
53
54 ## output section
55 #if $options_output.vcf
56 --vcf
57 --ov $outvcf
58 #end if
59 #if $options_output.phylip
60 --phylip
61 --op $outphylip
62 #end if
63 #if $options_output.phylip
64 --phylip_var
65 --ol $outphyliplog
66 #end if
67 #if $options_output.fasta
68 --fasta
69 --of $outfasta
70 #end if
71 #if $options_output.structure
72 --structure
73 --os $outstructure
74 #end if
75 #if $options_output.plink
76 --plink
77 --oe $outplinkped
78 --om=$outplinkmap
79 #end if
80 #if $options_output.phase
81 --phase
82 --phase_output $outphase
83 #end if
84 #if $options_output.beagle
85 --beagle
86 --unphased_output $outbeagle
87 #end if
88 --markers_output $outmarkers
89 #if $options_output.genepop
90 --genepop
91 --og=$outgenepop
92 #end if
93 #if $options_output.write_single_snp
94 --write_single_snp
95 #end if
96 --logfile $output
97
98 </command>
99
100 <inputs>
101 <param name="STACKS_archive" format="zip,tar.gz" type="data" label="Archive from STACKS pipeline regrouping all outputs" />
102 <param name="batchid" type="integer" value="1" label="Batch ID" help="Batch ID to examine when exporting from the catalog" />
103 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="specify a population map" />
104 <section name="options_output" title="Output options" expanded="False">
105 <param name="vcf" type="boolean" checked="false" default="false" label="output results in Variant Call Format (VCF)" />
106 <param name="genepop" type="boolean" checked="false" default="false" label="output results in GenePop Format" />
107 <param name="structure" type="boolean" checked="false" default="false" label="output results in Structure Format" />
108 <param name="fasta" type="boolean" checked="false" default="false" label="output full sequence for each allele, from each sample locus in FASTA format" />
109 <param name="phase" type="boolean" checked="false" default="false" label="output genotypes in PHASE/fastPHASE format" />
110 <param name="beagle" type="boolean" checked="false" default="false" label="output genotypes in Beagle format" />
111 <param name="plink" type="boolean" checked="false" default="false" label="output genotypes in PLINK format" />
112 <param name="phylip" type="boolean" checked="false" default="false" label="output nucleotides that are fixed-within, and variant among populations in Phylip format for phylogenetic tree construction" />
113 <param name="phylip_var" type="boolean" checked="false" default="false" label="include variable sites in the phylip output" />
114 <param name="write_single_snp" type="boolean" checked="false" default="false" label="write only the first SNP per locus in Genepop and Structure outputs" />
115 </section>
116 <section name="options_kernel" title="Kernel options" expanded="False">
117 <param name="kernel" type="boolean" checked="false" default="false" label="enable kernel-smoothed FIS, π, and FST calculations" />
118 <param name="window" type="integer" value="150" label="window size" help="distance over which to average values (sigma, default 150Kb)" />
119 </section>
120
121 <conditional name="options_enzyme">
122 <param name="options_enzyme_selector" type="select" label="Did you want to use the genomic output option?">
123 <option value="1">Yes</option>
124 <option value="2" selected="true">No</option>
125 </param>
126 <when value="1">
127 <param name="enzyme" type="select" format="text" label="provide the restriction enzyme used" help="required if generating genomic output" >
128 <option value="apeKI">apeKI</option>
129 <option value="bamHI">bamHI</option>
130 <option value="claI">claI</option>
131 <option value="dpnII">dpnII</option>
132 <option value="eaeI">eaeI</option>
133 <option value="ecoRI">ecoRI</option>
134 <option value="ecoT22I">ecoT22I</option>
135 <option value="hindIII">hindIII</option>
136 <option value="mluCI">mluCI</option>
137 <option value="mseI">mseI</option>
138 <option value="mspI">mspI</option>
139 <option value="ndeI">ndeI</option>
140 <option value="nlaIII">nlaIII</option>
141 <option value="notI">notI</option>
142 <option value="nsiI">nsiI</option>
143 <option value="pstI">pstI</option>
144 <option value="sau3AI">sau3AI</option>
145 <option value="sbfI">sbfI</option>
146 <option value="sexAI">sexAI</option>
147 <option value="sgrAI">sgrAI</option>
148 <option value="sphI">sphI</option>
149 <option value="taqI">taqI</option>
150 <option value="xbaI">xbaI</option>
151 </param>
152 <param name="genomic" type="boolean" checked="false" default="false" label="output each nucleotide position (fixed or polymorphic) in all population members to a file" />
153 </when>
154 <when value="2">
155 </when>
156 </conditional>
157 <param name="advanced_options_activate" type="boolean" label="Activate advanced options" help="advanced options are defined below" />
158 <section name="advanced_options" title="Advanced options">
159 <conditional name="whitelistselect">
160 <param name="advanced_whiteoptions_selector" type="select" label="whitelist advanced options">
161 <option value="default" selected="true">Default</option>
162 <option value="advanced">Advanced</option>
163 </param>
164 <when value="default"></when>
165 <when value="advanced">
166 <param name="whitelist" format="txt, tabular" type="data" label="specify a file containing Whitelisted markers to include in the export" />
167 </when>
168 </conditional>
169 <conditional name="blacklistselect">
170 <param name="advanced_blackoptions_selector" type="select" label="blacklist advanced options">
171 <option value="default" selected="true">Default</option>
172 <option value="advanced">Advanced</option>
173 </param>
174 <when value="default"></when>
175 <when value="advanced">
176 <param name="blacklist" format="txt, tabular" type="data" label="specify a file containing Blacklisted markers to be excluded from the export" />
177 </when>
178 </conditional>
179 <param name="minperc" type="float" value="0.5" min="0" max="1" label="min percentage of individuals by population" help="minimum percentage of individuals in a population required to process a locus for that population" />
180 <param name="minpop" type="integer" value="2" label="min number of populations" help="minimum number of populations a locus must be present in to process a locus" />
181 <param name="mindepth" type="integer" value="1" label="min stack depth" help="specify a minimum stack depth required for individuals at a locus" />
182 <param name="minminor" type="float" value="0.25" label="min minor allele frequency" help="specify a minimum minor allele frequency required before calculating Fst at a locus (between 0 and 0.5)" />
183 <conditional name="correction_select">
184 <param name="correction" type="select" format="text" label="Correction type" help="specify a correction to be applied to Fst values: 'p_value', 'bonferroni_win', or 'bonferroni_gen'" >
185 <option value="no_corr">No correction</option>
186 <option value="p_value">p_value</option>
187 <option value="bonferroni_win">bonferroni_win</option>
188 <option value="bonferroni_gen">bonferroni_gen</option>
189 </param>
190 <when value="no_corr"></when>
191 <when value="p_value">
192 <param name="pcutoff" type="float" value="0.05" label="p-value" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
193 </when>
194 <when value="bonferroni_win">
195 <param name="pcutoff" type="float" value="0.05" label="p-value" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
196 </when>
197 <when value="bonferroni_gen">
198 <param name="pcutoff" type="float" value="0.05" label="p-value" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
199 </when>
200 </conditional>
201 <conditional name="bootstrapresampling">
202 <param name="advanced_bootoptions_selector" type="select" label="bootstrap resampling advanced options">
203 <option value="default" selected="true">Default</option>
204 <option value="advanced">Advanced</option>
205 </param>
206 <when value="default"></when>
207 <when value="advanced">
208 <param name="bootstrap" type="select" format="text" label="Bootstrap resampling" help="enable bootstrap resampling for population statistics (reference genome required)" >
209 <option value="exact">exact</option>
210 <option value="approx">approx</option>
211 </param>
212 <param name="bootstrapreps" type="integer" value="100" label="number of resampling" help="number of bootstrap resamplings to calculate" />
213 </when>
214 </conditional>
215 </section>
216 </inputs>
217 <outputs>
218 <data format="txt" name="output" label="result.log with ${tool.name} on ${on_string}" />
219 <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true">
220 <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" />
221 </data>
222
223 <data format="tabular" name="sumstatssum" label="sumstats_summary.tsv with ${tool.name} on ${on_string}" />
224 <data format="tabular" name="sumstats" label="sumstats.tsv with ${tool.name} on ${on_string}" />
225 <data format="zip" name="outfst" label="fst.zip with ${tool.name} on ${on_string}" />
226
227 <data format="vcf" name="outvcf" label="vcf file with ${tool.name} on ${on_string}">
228 <filter>options_output['vcf']</filter>
229 <filter>options_output['options_output_selector'] == '1' </filter>
230 </data>
231 <data format="phylip" name="outphylip" label="phylip file with ${tool.name} on ${on_string}">
232 <filter>options_output['phylip']</filter>
233 <filter>options_output['options_output_selector'] == '1' </filter>
234 </data>
235 <data format="txt" name="outphyliplog" label="phylip.log file with ${tool.name} on ${on_string}">
236 <filter>options_output['phylip']</filter>
237 <filter>options_output['options_output_selector'] == '1' </filter>
238 </data>
239 <data format="txt" name="outunphasedlog" label="unphased.log file with ${tool.name} on ${on_string}">
240 <filter>options_output['beagle']</filter>
241 <filter>options_output['options_output_selector'] == '1' </filter>
242 </data>
243 <data format="fasta" name="outfasta" label="fasta file with ${tool.name} on ${on_string}">
244 <filter>options_output['fasta']</filter>
245 <filter>options_output['options_output_selector'] == '1' </filter>
246 </data>
247 <data format="tabular" name="outstructure" label="structure file with ${tool.name} on ${on_string}">
248 <filter>options_output['structure']</filter>
249 <filter>options_output['options_output_selector'] == '1' </filter>
250 </data>
251 <data format="txt" name="outplinkped" label="plink.bed file with ${tool.name} on ${on_string}">
252 <filter>options_output['plink']</filter>
253 <filter>options_output['options_output_selector'] == '1' </filter>
254 </data>
255 <data format="txt" name="outplinkmap" label="plink.map file with ${tool.name} on ${on_string}">
256 <filter>options_output['plink']</filter>
257 <filter>options_output['options_output_selector'] == '1' </filter>
258 </data>
259 <data format="txt" name="outgenepop" label="genepop file with ${tool.name} on ${on_string}">
260 <filter>options_output['genepop']</filter>
261 <filter>options_output['options_output_selector'] == '1' </filter>
262 </data>
263 <data format="zip" name="outphase" label="phased.zip PHASE/fastPHASE genotype files with ${tool.name} on ${on_string}">
264 <filter>options_output['phase']</filter>
265 <filter>options_output['options_output_selector'] == '1' </filter>
266 </data>
267 <data format="zip" name="outbeagle" label="unphase.zip Beagle genotype files with ${tool.name} on ${on_string}">
268 <filter>options_output['beagle']</filter>
269 <filter>options_output['options_output_selector'] == '1' </filter>
270 </data>
271 <data format="zip" name="outmarkers" label="markers.zip Genotype files with ${tool.name} on ${on_string}">
272 <filter>options_output['beagle']</filter>
273 <filter>options_output['options_output_selector'] == '1' </filter>
274 </data>
275
276
277 </outputs>
278
279 <stdio>
280 <exit_code range="1" level="fatal" description="Error in Stacks population execution" />
281 </stdio>
282
283 <help>
284
285 .. class:: infomark
286
287 **What it does**
288
289 This program will be executed in place of the genotypes program when a population is being processed through the pipeline. A map specifiying which individuals belong to which population is submitted to the program and the program will then calculate population genetics statistics, expected/observed heterzygosity, π, and FIS at each nucleotide position. The populations program will compare all populations pairwise to compute FST. If a set of data is reference aligned, then a kernel-smoothed FST will also be calculated.
290
291 --------
292
293 **Created by:**
294
295 Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko
296
297 --------
298
299 **Example:**
300
301 Input files:
302
303 FASTQ, FASTA, zip, tar.gz
304
305 - Population map::
306
307 indv_01 1
308 indv_02 1
309 indv_03 1
310 indv_04 2
311 indv_05 2
312 indv_06 2
313
314
315 Output files:
316
317 - XXX.tags.tsv file::
318
319 Column Name Description
320 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
321 2 Sample ID Each sample passed through Stacks gets a unique id for that sample.
322 3 Stack ID Each stack formed gets an ID.
323 4 Chromosome If aligned to a reference genome using pstacks, otherwise it is blank.
324 5 Basepair If aligned to ref genome using pstacks.
325 6 Strand If aligned to ref genome using pstacks.
326 7 Sequence Type Either 'consensus', 'primary' or 'secondary', see the Stacks paper for definitions of these terms.
327 8 Sequence ID The individual sequence read that was merged into this stack.
328 9 Sequence The raw sequencing read.
329 10 Deleveraged Flag If "1", this stack was processed by the deleveraging algorithm and was broken down from a larger stack.
330 11 Blacklisted Flag If "1", this stack was still confounded depsite processing by the deleveraging algorithm.
331 12 Lumberja ckstack Flag If "1", this stack was set aside due to having an extreme depth of coverage.
332
333 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
334
335
336 - XXX.snps.tsv file::
337
338 Column Name Description
339 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
340 2 Sample ID
341 3 Stack ID
342 4 SNP Column
343 5 Likelihood ratio From the SNP-calling model.
344 6 Rank_1 Majority nucleotide.
345 7 Rank_2 Alternative nucleotide.
346
347 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
348
349
350 - XXX.alleles.tsv file::
351
352 Column Name Description
353 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
354 2 Sample ID
355 3 Stack ID
356 4 Haplotype The haplotype, as constructed from the called SNPs at each locus.
357 5 Percent Percentage of reads that have this haplotype
358 6 Count Raw number of reads that have this haplotype
359
360
361 - XXX.matches.tsv file::
362
363 Column Name Description
364 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
365 2 Batch ID
366 3 Catalog ID
367 4 Sample ID
368 5 Stack ID
369 6 Haplotype
370 7 Stack Depth
371
372 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
373
374
375 - batch_X.sumstats.tsv Summary Statistics Output::
376
377 Batch ID The batch identifier for this data set.
378 Locus ID Catalog locus identifier.
379 Chromosome If aligned to a reference genome.
380 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
381 Column The nucleotide site within the catalog locus.
382 Population ID The ID supplied to the populations program, as written in the population map file.
383 P Nucleotide The most frequent allele at this position in this population.
384 Q Nucleotide The alternative allele.
385 Number of Individuals Number of individuals sampled in this population at this site.
386 P Frequency of most frequent allele.
387 Observed Heterozygosity The proportion of individuals that are heterozygotes in this population.
388 Observed Homozygosity The proportion of individuals that are homozygotes in this population.
389 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
390 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
391 pi An estimate of nucleotide diversity.
392 Smoothed pi A weighted average of p depending on the surrounding 3s of sequence in both directions.
393 Smoothed pi P-value If bootstrap resampling is enabled, a p-value ranking the significance of p within this population.
394 FIS The inbreeding coefficient of an individual (I) relative to the subpopulation (S).
395 Smoothed FIS A weighted average of FIS depending on the surrounding 3s of sequence in both directions.
396 Smoothed FIS P-value If bootstrap resampling is enabled, a p-value ranking the significance of FIS within this population.
397 Private allele True (1) or false (0), depending on if this allele is only occurs in this population.
398
399 - batch_X.fst_Y-Z.tsv Pairwise FST Output::
400
401 Batch ID The batch identifier for this data set.
402 Locus ID Catalog locus identifier.
403 Population ID 1 The ID supplied to the populations program, as written in the population map file.
404 Population ID 2 The ID supplied to the populations program, as written in the population map file.
405 Chromosome If aligned to a reference genome.
406 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
407 Column The nucleotide site within the catalog locus.
408 Overall pi An estimate of nucleotide diversity across the two populations.
409 FST A measure of population differentiation.
410 FET p-value P-value describing if the FST measure is statistically significant according to Fisher's Exact Test.
411 Odds Ratio Fisher's Exact Test odds ratio
412 CI High Fisher's Exact Test confidence interval.
413 CI Low Fisher's Exact Test confidence interval.
414 LOD Score Logarithm of odds score.
415 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
416 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
417 Corrected FST FST with either the FET p-value, or a window-size or genome size Bonferroni correction.
418 Smoothed FST A weighted average of FST depending on the surrounding 3s of sequence in both directions.
419 Smoothed FST P-value If bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.
420
421
422 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .
423
424 --------
425
426 **Output type:**
427
428 - Output type details::
429
430 No compression All files will be added in the current history.
431 Compressed by categories Files will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.
432 Compressed all outputs All files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.
433
434
435 --------
436
437 **Project links:**
438
439 `STACKS website &lt;http://creskolab.uoregon.edu/stacks/&gt;`_ .
440
441 `STACKS manual &lt;http://creskolab.uoregon.edu/stacks/stacks_manual.pdf&gt;`_ .
442
443 `STACKS google group &lt;https://groups.google.com/forum/#!forum/stacks-users&gt;`_ .
444
445 --------
446
447 **References:**
448
449 -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.
450
451 -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.
452
453 -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.
454
455 -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011.
456
457 -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.
458
459 -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.
460
461 --------
462
463 **Integrated by:**
464
465 Yvan Le Bras and Cyril Monjeaud
466
467 GenOuest Bio-informatics Core Facility
468
469 UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
470
471 support@genouest.org
472
473 If you use this tool in Galaxy, please cite :
474
475 `Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
476
477
478 </help>
479 <citations>
480 <citation type="doi">10.1111/mec.12354</citation>
481 <citation type="doi">10.1111/mec.12330</citation>
482 <citation type="doi">10.1534/g3.111.000240</citation>
483 <citation type="doi">10.1534/genetics.111.127324</citation>
484 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
485 <citation type="doi">10.1073/pnas.1006538107</citation>
486
487 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
488 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
489 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
490 booktitle = {JOBIM 2013 Proceedings},
491 year = {2013},
492 url = {https://www.e-biogenouest.org/resources/128},
493 pages = {97-106}
494 }</citation>
495 </citations>
496 </tool>
497