comparison STACKS_denovomap.xml @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 <tool id="STACKSdenovomap" name="STACKS : De novo map" force_history_refresh="True">
2 <description>Run the STACKS denovo_map.pl wrapper</description>
3
4 <configfiles>
5 <configfile name="parent_sequences">
6 #if str( $options_usage.options_usage_selector ) == "genetic"
7 #for $input in $options_usage.parent_sequence:
8 ${input.display_name}::${input}
9 #end for
10 #end if
11 </configfile>
12 <configfile name="progeny_sequences">
13 #if str( $options_usage.options_usage_selector ) == "genetic" and str( $options_usage.options_progeny.options_progeny_selector ) == "yes"
14 #for $input in $options_usage.options_progeny.progeny_sequence:
15 ${input.display_name}::${input}
16 #end for
17 #end if
18 </configfile>
19 <configfile name="individual_samples">
20 #if str( $options_usage.options_usage_selector ) == "population"
21 #for $input in $options_usage.individual_sample:
22 ${input.display_name}::${input}
23 #end for
24 #end if
25 </configfile>
26 </configfiles>
27
28 <requirements>
29 <requirement type="package" version="1.18">stacks</requirement>
30 </requirements>
31
32 <command interpreter="python">
33 STACKS_denovomap.py
34 #if str( $options_usage.options_usage_selector ) == "genetic"
35 -p $parent_sequences
36 -b $options_usage.paired
37 #if str( $options_usage.options_progeny.options_progeny_selector ) == "yes"
38 -r $progeny_sequences
39 #end if
40 #else
41 -s $individual_samples
42 #if str( $options_usage.options_popmap.popmap_selector) == "yes"
43 -O $options_usage.options_popmap.popmap
44 #end if
45 #end if
46 -m $advanced_options.minident
47 -P $advanced_options.minidentprogeny
48 -M $advanced_options.mismatchbetlociproc
49 -N $advanced_options.mismatchsecond
50 -n $advanced_options.mismatchbetlocibuild
51 -t $advanced_options.remove_hightly
52 -H $advanced_options.disable_calling
53 ## snp_model
54 #if str( $snp_options.select_model.model_type) == "bounded"
55 --bound_low $snp_options.select_model.boundlow
56 --bound_high $snp_options.select_model.boundhigh
57 --alpha $snp_options.select_model.alpha
58 #else
59 --alpha $snp_options.select_model.alpha
60 #end if
61 ## outputs
62 --catalogsnps $catalogsnps
63 --catalogalleles $catalogalleles
64 --catalogtags $catalogtags
65 --logfile $output
66 --compress_output $output_compress
67 ##additionnal outputs
68 --total_output $total_output
69 --tags_output $tags_output
70 --snps_output $snps_output
71 --alleles_output $alleles_output
72 --matches_output $matches_output
73
74 </command>
75
76 <inputs>
77 <conditional name="options_usage">
78 <param name="options_usage_selector" type="select" label="Select your usage">
79 <option value="genetic" selected="true">Genetic map</option>
80 <option value="population">Population</option>
81 </param>
82 <when value="genetic">
83 <param name="parent_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing parent sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing parent sequences from a mapping cross" />
84 <param name="paired" type="boolean" checked="false" default="false" label="Paired-end fastq files?" help="be careful, all files must have a paired-end friend"/>
85 <conditional name="options_progeny">
86 <param name="options_progeny_selector" type="select" label="Use progeny files">
87 <option value="yes" selected="true">Yes</option>
88 <option value="no">No</option>
89 </param>
90 <when value="yes">
91 <param name="progeny_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing progeny sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing progeny sequences from a mapping cross" />
92 </when>
93 <when value="no">
94 </when>
95 </conditional>
96 </when>
97 <when value="population">
98 <param name="individual_sample" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing an individual sample from a population" help="FASTQ/FASTA/ZIP/TAR.GZ files contiaining an individual sample from a population" />
99 <conditional name="options_popmap">
100 <param name="popmap_selector" type="select" label="Analyzing one or more populations?" >
101 <option value="no" selected="true">No</option>
102 <option value="yes">Yes</option>
103 </param>
104 <when value="no"></when>
105 <when value="yes">
106 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="If analyzing one or more populations, specify a population map" />
107 </when>
108 </conditional>
109
110 </when>
111 </conditional>
112 <!-- stack assembly options -->
113 <section name="advanced_options" title="advanced_options" expanded="False">
114 <param name="minident" type="integer" value="-1" label="Minimum number of identical raw reads required to create a stack" help="leave -1 if you don't use the parameter" />
115 <param name="minidentprogeny" type="integer" value="-1" label="Minimum number of identical raw reads required to create a stack (progeny)" help="leave -1 if you don't use the parameter" />
116 <param name="mismatchbetlociproc" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual"/>
117 <param name="mismatchsecond" type="integer" value="-1" label="Number of mismatches allowed when aligning secondary reads" help="leave -1 if you don't use the parameter" />
118 <param name="mismatchbetlocibuild" type="integer" value="0" label="specify the number of mismatches allowed between loci when building the catalog"/>
119 <param name="remove_hightly" type="boolean" checked="false" default="false" label="remove, or break up, highly repetitive RAD-Tags in the ustacks program" />
120 <param name="disable_calling" type="boolean" checked="false" default="false" label="disable calling haplotypes from secondary reads" />
121 </section>
122 <!-- SNP Model options -->
123 <section name="snp_options" title="SNP_Model_Options" expanded="False">
124 <conditional name="select_model">
125 <param name="model_type" type="select" label="Choose the model">
126 <option value="snp" selected="true">SNP</option>
127 <option value="bounded">Bounded</option>
128 </param>
129 <when value="snp">
130 <param name="alpha" type="float" value="0.05" min="0.001" max="0.1" label="chi square significance level required to call a heterozygote or homozygote" help="either 0.1, 0.05 (default), 0.01, or 0.001" />
131 </when>
132 <when value="bounded">
133 <param name="boundlow" type="float" value="0.0" min="0.0" max="1.0" label="lower bound for epsilon, the error rate" help="between 0 and 1.0"/>
134 <param name="boundhigh" type="float" value="1.0" min="0.0" max="1.0" label="upper bound for epsilon, the error rate" help="between 0 and 1.0" />
135 <param name="alpha" type="float" value="0.05" min="0.001" max="0.1" label="chi square significance level required to call a heterozygote or homozygote" help="either 0.1, 0.05 (default), 0.01, or 0.001" />
136 </when>
137 </conditional>
138 </section>
139 <!-- Output options -->
140 <param name="output_compress" type="select" label="Output type" help="please see below for details">
141 <option value="default" selected="true">No compression</option>
142 <option value="categories">Compressed by categories</option>
143 <option value="total">Compressed all outputs</option>
144 </param>
145 </inputs>
146 <outputs>
147
148 <data format="txt" name="output" label="result.log with ${tool.name} on ${on_string}" />
149 <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true">
150 <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" />
151 </data>
152 <data format="tabular" name="catalogsnps" label="catalog.snps with ${tool.name} on ${on_string}" />
153 <data format="tabular" name="catalogalleles" label="catalog.alleles with ${tool.name} on ${on_string}" />
154 <data format="tabular" name="catalogtags" label="catalog.tags with ${tool.name} on ${on_string}" />
155
156
157 <!-- additionnal output archives -->
158 <data format="zip" name="total_output" label="total_output.zip with ${tool.name} on ${on_string}" >
159 <filter>output_compress == "total"</filter>
160 </data>
161 <data format="zip" name="tags_output" label="tags_output.zip with ${tool.name} on ${on_string}" >
162 <filter>output_compress == "categories"</filter>
163 </data>
164 <data format="zip" name="snps_output" label="snps_output.zip with ${tool.name} on ${on_string}" >
165 <filter>output_compress == "categories"</filter>
166 </data>
167 <data format="zip" name="alleles_output" label="alleles_output.zip with ${tool.name} on ${on_string}" >
168 <filter>output_compress == "categories"</filter>
169 </data>
170 <data format="zip" name="matches_output" label="matches_output.zip with ${tool.name} on ${on_string}" >
171 <filter>output_compress == "categories"</filter>
172 </data>
173
174 </outputs>
175 <stdio>
176 <exit_code range="1" level="fatal" description="Error in Stacks Denovo execution" />
177 </stdio>
178 <help>
179
180 .. class:: infomark
181
182 **What it does**
183
184 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
185
186 --------
187
188 **Created by:**
189
190 Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko
191
192 --------
193
194 **Example:**
195
196 Input files:
197
198 FASTQ, FASTA, zip, tar.gz
199
200 - Population map::
201
202 indv_01 1
203 indv_02 1
204 indv_03 1
205 indv_04 2
206 indv_05 2
207 indv_06 2
208
209
210 Output files:
211
212 - XXX.tags.tsv file::
213
214 Column Name Description
215 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
216 2 Sample ID Each sample passed through Stacks gets a unique id for that sample.
217 3 Stack ID Each stack formed gets an ID.
218 4 Chromosome If aligned to a reference genome using pstacks, otherwise it is blank.
219 5 Basepair If aligned to ref genome using pstacks.
220 6 Strand If aligned to ref genome using pstacks.
221 7 Sequence Type Either 'consensus', 'primary' or 'secondary', see the Stacks paper for definitions of these terms.
222 8 Sequence ID The individual sequence read that was merged into this stack.
223 9 Sequence The raw sequencing read.
224 10 Deleveraged Flag If "1", this stack was processed by the deleveraging algorithm and was broken down from a larger stack.
225 11 Blacklisted Flag If "1", this stack was still confounded depsite processing by the deleveraging algorithm.
226 12 Lumberja ckstack Flag If "1", this stack was set aside due to having an extreme depth of coverage.
227
228 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
229
230
231 - XXX.snps.tsv file::
232
233 Column Name Description
234 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
235 2 Sample ID
236 3 Stack ID
237 4 SNP Column
238 5 Likelihood ratio From the SNP-calling model.
239 6 Rank_1 Majority nucleotide.
240 7 Rank_2 Alternative nucleotide.
241
242 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
243
244
245 - XXX.alleles.tsv file::
246
247 Column Name Description
248 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
249 2 Sample ID
250 3 Stack ID
251 4 Haplotype The haplotype, as constructed from the called SNPs at each locus.
252 5 Percent Percentage of reads that have this haplotype
253 6 Count Raw number of reads that have this haplotype
254
255
256 - XXX.matches.tsv file::
257
258 Column Name Description
259 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
260 2 Batch ID
261 3 Catalog ID
262 4 Sample ID
263 5 Stack ID
264 6 Haplotype
265 7 Stack Depth
266
267 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
268
269
270 - batch_X.sumstats.tsv Summary Statistics Output::
271
272 Batch ID The batch identifier for this data set.
273 Locus ID Catalog locus identifier.
274 Chromosome If aligned to a reference genome.
275 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
276 Column The nucleotide site within the catalog locus.
277 Population ID The ID supplied to the populations program, as written in the population map file.
278 P Nucleotide The most frequent allele at this position in this population.
279 Q Nucleotide The alternative allele.
280 Number of Individuals Number of individuals sampled in this population at this site.
281 P Frequency of most frequent allele.
282 Observed Heterozygosity The proportion of individuals that are heterozygotes in this population.
283 Observed Homozygosity The proportion of individuals that are homozygotes in this population.
284 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
285 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
286 pi An estimate of nucleotide diversity.
287 Smoothed pi A weighted average of p depending on the surrounding 3s of sequence in both directions.
288 Smoothed pi P-value If bootstrap resampling is enabled, a p-value ranking the significance of p within this population.
289 FIS The inbreeding coefficient of an individual (I) relative to the subpopulation (S).
290 Smoothed FIS A weighted average of FIS depending on the surrounding 3s of sequence in both directions.
291 Smoothed FIS P-value If bootstrap resampling is enabled, a p-value ranking the significance of FIS within this population.
292 Private allele True (1) or false (0), depending on if this allele is only occurs in this population.
293
294 - batch_X.fst_Y-Z.tsv Pairwise FST Output::
295
296 Batch ID The batch identifier for this data set.
297 Locus ID Catalog locus identifier.
298 Population ID 1 The ID supplied to the populations program, as written in the population map file.
299 Population ID 2 The ID supplied to the populations program, as written in the population map file.
300 Chromosome If aligned to a reference genome.
301 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
302 Column The nucleotide site within the catalog locus.
303 Overall pi An estimate of nucleotide diversity across the two populations.
304 FST A measure of population differentiation.
305 FET p-value P-value describing if the FST measure is statistically significant according to Fisher's Exact Test.
306 Odds Ratio Fisher's Exact Test odds ratio
307 CI High Fisher's Exact Test confidence interval.
308 CI Low Fisher's Exact Test confidence interval.
309 LOD Score Logarithm of odds score.
310 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
311 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
312 Corrected FST FST with either the FET p-value, or a window-size or genome size Bonferroni correction.
313 Smoothed FST A weighted average of FST depending on the surrounding 3s of sequence in both directions.
314 Smoothed FST P-value If bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.
315
316
317 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .
318
319 --------
320
321 **Output type:**
322
323 - Output type details::
324
325 No compression All files will be added in the current history.
326 Compressed by categories Files will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history.
327 Compressed all outputs All files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive.
328
329
330 --------
331
332 **Project links:**
333
334 `STACKS website &lt;http://creskolab.uoregon.edu/stacks/&gt;`_ .
335
336 `STACKS manual &lt;http://creskolab.uoregon.edu/stacks/stacks_manual.pdf&gt;`_ .
337
338 `STACKS google group &lt;https://groups.google.com/forum/#!forum/stacks-users&gt;`_ .
339
340 --------
341
342 **References:**
343
344 -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.
345
346 -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.
347
348 -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.
349
350 -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011.
351
352 -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.
353
354 -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.
355
356 --------
357
358 **Integrated by:**
359
360 Yvan Le Bras and Cyril Monjeaud
361
362 GenOuest Bio-informatics Core Facility
363
364 UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
365
366 support@genouest.org
367
368 </help>
369 <citations>
370 <citation type="doi">10.1111/mec.12354</citation>
371 <citation type="doi">10.1111/mec.12330</citation>
372 <citation type="doi">10.1534/g3.111.000240</citation>
373 <citation type="doi">10.1534/genetics.111.127324</citation>
374 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
375 <citation type="doi">10.1073/pnas.1006538107</citation>
376
377 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
378 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
379 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
380 booktitle = {JOBIM 2013 Proceedings},
381 year = {2013},
382 url = {https://www.e-biogenouest.org/resources/128},
383 pages = {97-106}
384 }</citation>
385 </citations>
386 </tool>
387