Mercurial > repos > cmonjeau > stacks
comparison STACKS_denovomap.xml @ 0:d6ba40f6c824
first commit
author | cmonjeau |
---|---|
date | Mon, 24 Aug 2015 09:29:12 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:d6ba40f6c824 |
---|---|
1 <tool id="STACKSdenovomap" name="STACKS : De novo map" force_history_refresh="True"> | |
2 <description>Run the STACKS denovo_map.pl wrapper</description> | |
3 | |
4 <configfiles> | |
5 <configfile name="parent_sequences"> | |
6 #if str( $options_usage.options_usage_selector ) == "genetic" | |
7 #for $input in $options_usage.parent_sequence: | |
8 ${input.display_name}::${input} | |
9 #end for | |
10 #end if | |
11 </configfile> | |
12 <configfile name="progeny_sequences"> | |
13 #if str( $options_usage.options_usage_selector ) == "genetic" and str( $options_usage.options_progeny.options_progeny_selector ) == "yes" | |
14 #for $input in $options_usage.options_progeny.progeny_sequence: | |
15 ${input.display_name}::${input} | |
16 #end for | |
17 #end if | |
18 </configfile> | |
19 <configfile name="individual_samples"> | |
20 #if str( $options_usage.options_usage_selector ) == "population" | |
21 #for $input in $options_usage.individual_sample: | |
22 ${input.display_name}::${input} | |
23 #end for | |
24 #end if | |
25 </configfile> | |
26 </configfiles> | |
27 | |
28 <requirements> | |
29 <requirement type="package" version="1.18">stacks</requirement> | |
30 </requirements> | |
31 | |
32 <command interpreter="python"> | |
33 STACKS_denovomap.py | |
34 #if str( $options_usage.options_usage_selector ) == "genetic" | |
35 -p $parent_sequences | |
36 -b $options_usage.paired | |
37 #if str( $options_usage.options_progeny.options_progeny_selector ) == "yes" | |
38 -r $progeny_sequences | |
39 #end if | |
40 #else | |
41 -s $individual_samples | |
42 #if str( $options_usage.options_popmap.popmap_selector) == "yes" | |
43 -O $options_usage.options_popmap.popmap | |
44 #end if | |
45 #end if | |
46 -m $advanced_options.minident | |
47 -P $advanced_options.minidentprogeny | |
48 -M $advanced_options.mismatchbetlociproc | |
49 -N $advanced_options.mismatchsecond | |
50 -n $advanced_options.mismatchbetlocibuild | |
51 -t $advanced_options.remove_hightly | |
52 -H $advanced_options.disable_calling | |
53 ## snp_model | |
54 #if str( $snp_options.select_model.model_type) == "bounded" | |
55 --bound_low $snp_options.select_model.boundlow | |
56 --bound_high $snp_options.select_model.boundhigh | |
57 --alpha $snp_options.select_model.alpha | |
58 #else | |
59 --alpha $snp_options.select_model.alpha | |
60 #end if | |
61 ## outputs | |
62 --catalogsnps $catalogsnps | |
63 --catalogalleles $catalogalleles | |
64 --catalogtags $catalogtags | |
65 --logfile $output | |
66 --compress_output $output_compress | |
67 ##additionnal outputs | |
68 --total_output $total_output | |
69 --tags_output $tags_output | |
70 --snps_output $snps_output | |
71 --alleles_output $alleles_output | |
72 --matches_output $matches_output | |
73 | |
74 </command> | |
75 | |
76 <inputs> | |
77 <conditional name="options_usage"> | |
78 <param name="options_usage_selector" type="select" label="Select your usage"> | |
79 <option value="genetic" selected="true">Genetic map</option> | |
80 <option value="population">Population</option> | |
81 </param> | |
82 <when value="genetic"> | |
83 <param name="parent_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing parent sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing parent sequences from a mapping cross" /> | |
84 <param name="paired" type="boolean" checked="false" default="false" label="Paired-end fastq files?" help="be careful, all files must have a paired-end friend"/> | |
85 <conditional name="options_progeny"> | |
86 <param name="options_progeny_selector" type="select" label="Use progeny files"> | |
87 <option value="yes" selected="true">Yes</option> | |
88 <option value="no">No</option> | |
89 </param> | |
90 <when value="yes"> | |
91 <param name="progeny_sequence" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing progeny sequences" help="FASTQ/FASTA/ZIP/TAR.GZ files containing progeny sequences from a mapping cross" /> | |
92 </when> | |
93 <when value="no"> | |
94 </when> | |
95 </conditional> | |
96 </when> | |
97 <when value="population"> | |
98 <param name="individual_sample" format="fastq,fasta,zip,tar.gz" type="data" multiple="true" label="Files containing an individual sample from a population" help="FASTQ/FASTA/ZIP/TAR.GZ files contiaining an individual sample from a population" /> | |
99 <conditional name="options_popmap"> | |
100 <param name="popmap_selector" type="select" label="Analyzing one or more populations?" > | |
101 <option value="no" selected="true">No</option> | |
102 <option value="yes">Yes</option> | |
103 </param> | |
104 <when value="no"></when> | |
105 <when value="yes"> | |
106 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="If analyzing one or more populations, specify a population map" /> | |
107 </when> | |
108 </conditional> | |
109 | |
110 </when> | |
111 </conditional> | |
112 <!-- stack assembly options --> | |
113 <section name="advanced_options" title="advanced_options" expanded="False"> | |
114 <param name="minident" type="integer" value="-1" label="Minimum number of identical raw reads required to create a stack" help="leave -1 if you don't use the parameter" /> | |
115 <param name="minidentprogeny" type="integer" value="-1" label="Minimum number of identical raw reads required to create a stack (progeny)" help="leave -1 if you don't use the parameter" /> | |
116 <param name="mismatchbetlociproc" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual"/> | |
117 <param name="mismatchsecond" type="integer" value="-1" label="Number of mismatches allowed when aligning secondary reads" help="leave -1 if you don't use the parameter" /> | |
118 <param name="mismatchbetlocibuild" type="integer" value="0" label="specify the number of mismatches allowed between loci when building the catalog"/> | |
119 <param name="remove_hightly" type="boolean" checked="false" default="false" label="remove, or break up, highly repetitive RAD-Tags in the ustacks program" /> | |
120 <param name="disable_calling" type="boolean" checked="false" default="false" label="disable calling haplotypes from secondary reads" /> | |
121 </section> | |
122 <!-- SNP Model options --> | |
123 <section name="snp_options" title="SNP_Model_Options" expanded="False"> | |
124 <conditional name="select_model"> | |
125 <param name="model_type" type="select" label="Choose the model"> | |
126 <option value="snp" selected="true">SNP</option> | |
127 <option value="bounded">Bounded</option> | |
128 </param> | |
129 <when value="snp"> | |
130 <param name="alpha" type="float" value="0.05" min="0.001" max="0.1" label="chi square significance level required to call a heterozygote or homozygote" help="either 0.1, 0.05 (default), 0.01, or 0.001" /> | |
131 </when> | |
132 <when value="bounded"> | |
133 <param name="boundlow" type="float" value="0.0" min="0.0" max="1.0" label="lower bound for epsilon, the error rate" help="between 0 and 1.0"/> | |
134 <param name="boundhigh" type="float" value="1.0" min="0.0" max="1.0" label="upper bound for epsilon, the error rate" help="between 0 and 1.0" /> | |
135 <param name="alpha" type="float" value="0.05" min="0.001" max="0.1" label="chi square significance level required to call a heterozygote or homozygote" help="either 0.1, 0.05 (default), 0.01, or 0.001" /> | |
136 </when> | |
137 </conditional> | |
138 </section> | |
139 <!-- Output options --> | |
140 <param name="output_compress" type="select" label="Output type" help="please see below for details"> | |
141 <option value="default" selected="true">No compression</option> | |
142 <option value="categories">Compressed by categories</option> | |
143 <option value="total">Compressed all outputs</option> | |
144 </param> | |
145 </inputs> | |
146 <outputs> | |
147 | |
148 <data format="txt" name="output" label="result.log with ${tool.name} on ${on_string}" /> | |
149 <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true"> | |
150 <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" /> | |
151 </data> | |
152 <data format="tabular" name="catalogsnps" label="catalog.snps with ${tool.name} on ${on_string}" /> | |
153 <data format="tabular" name="catalogalleles" label="catalog.alleles with ${tool.name} on ${on_string}" /> | |
154 <data format="tabular" name="catalogtags" label="catalog.tags with ${tool.name} on ${on_string}" /> | |
155 | |
156 | |
157 <!-- additionnal output archives --> | |
158 <data format="zip" name="total_output" label="total_output.zip with ${tool.name} on ${on_string}" > | |
159 <filter>output_compress == "total"</filter> | |
160 </data> | |
161 <data format="zip" name="tags_output" label="tags_output.zip with ${tool.name} on ${on_string}" > | |
162 <filter>output_compress == "categories"</filter> | |
163 </data> | |
164 <data format="zip" name="snps_output" label="snps_output.zip with ${tool.name} on ${on_string}" > | |
165 <filter>output_compress == "categories"</filter> | |
166 </data> | |
167 <data format="zip" name="alleles_output" label="alleles_output.zip with ${tool.name} on ${on_string}" > | |
168 <filter>output_compress == "categories"</filter> | |
169 </data> | |
170 <data format="zip" name="matches_output" label="matches_output.zip with ${tool.name} on ${on_string}" > | |
171 <filter>output_compress == "categories"</filter> | |
172 </data> | |
173 | |
174 </outputs> | |
175 <stdio> | |
176 <exit_code range="1" level="fatal" description="Error in Stacks Denovo execution" /> | |
177 </stdio> | |
178 <help> | |
179 | |
180 .. class:: infomark | |
181 | |
182 **What it does** | |
183 | |
184 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering. | |
185 | |
186 -------- | |
187 | |
188 **Created by:** | |
189 | |
190 Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko | |
191 | |
192 -------- | |
193 | |
194 **Example:** | |
195 | |
196 Input files: | |
197 | |
198 FASTQ, FASTA, zip, tar.gz | |
199 | |
200 - Population map:: | |
201 | |
202 indv_01 1 | |
203 indv_02 1 | |
204 indv_03 1 | |
205 indv_04 2 | |
206 indv_05 2 | |
207 indv_06 2 | |
208 | |
209 | |
210 Output files: | |
211 | |
212 - XXX.tags.tsv file:: | |
213 | |
214 Column Name Description | |
215 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded. | |
216 2 Sample ID Each sample passed through Stacks gets a unique id for that sample. | |
217 3 Stack ID Each stack formed gets an ID. | |
218 4 Chromosome If aligned to a reference genome using pstacks, otherwise it is blank. | |
219 5 Basepair If aligned to ref genome using pstacks. | |
220 6 Strand If aligned to ref genome using pstacks. | |
221 7 Sequence Type Either 'consensus', 'primary' or 'secondary', see the Stacks paper for definitions of these terms. | |
222 8 Sequence ID The individual sequence read that was merged into this stack. | |
223 9 Sequence The raw sequencing read. | |
224 10 Deleveraged Flag If "1", this stack was processed by the deleveraging algorithm and was broken down from a larger stack. | |
225 11 Blacklisted Flag If "1", this stack was still confounded depsite processing by the deleveraging algorithm. | |
226 12 Lumberja ckstack Flag If "1", this stack was set aside due to having an extreme depth of coverage. | |
227 | |
228 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence. | |
229 | |
230 | |
231 - XXX.snps.tsv file:: | |
232 | |
233 Column Name Description | |
234 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded. | |
235 2 Sample ID | |
236 3 Stack ID | |
237 4 SNP Column | |
238 5 Likelihood ratio From the SNP-calling model. | |
239 6 Rank_1 Majority nucleotide. | |
240 7 Rank_2 Alternative nucleotide. | |
241 | |
242 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one. | |
243 | |
244 | |
245 - XXX.alleles.tsv file:: | |
246 | |
247 Column Name Description | |
248 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded. | |
249 2 Sample ID | |
250 3 Stack ID | |
251 4 Haplotype The haplotype, as constructed from the called SNPs at each locus. | |
252 5 Percent Percentage of reads that have this haplotype | |
253 6 Count Raw number of reads that have this haplotype | |
254 | |
255 | |
256 - XXX.matches.tsv file:: | |
257 | |
258 Column Name Description | |
259 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded. | |
260 2 Batch ID | |
261 3 Catalog ID | |
262 4 Sample ID | |
263 5 Stack ID | |
264 6 Haplotype | |
265 7 Stack Depth | |
266 | |
267 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample. | |
268 | |
269 | |
270 - batch_X.sumstats.tsv Summary Statistics Output:: | |
271 | |
272 Batch ID The batch identifier for this data set. | |
273 Locus ID Catalog locus identifier. | |
274 Chromosome If aligned to a reference genome. | |
275 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand). | |
276 Column The nucleotide site within the catalog locus. | |
277 Population ID The ID supplied to the populations program, as written in the population map file. | |
278 P Nucleotide The most frequent allele at this position in this population. | |
279 Q Nucleotide The alternative allele. | |
280 Number of Individuals Number of individuals sampled in this population at this site. | |
281 P Frequency of most frequent allele. | |
282 Observed Heterozygosity The proportion of individuals that are heterozygotes in this population. | |
283 Observed Homozygosity The proportion of individuals that are homozygotes in this population. | |
284 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium. | |
285 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium. | |
286 pi An estimate of nucleotide diversity. | |
287 Smoothed pi A weighted average of p depending on the surrounding 3s of sequence in both directions. | |
288 Smoothed pi P-value If bootstrap resampling is enabled, a p-value ranking the significance of p within this population. | |
289 FIS The inbreeding coefficient of an individual (I) relative to the subpopulation (S). | |
290 Smoothed FIS A weighted average of FIS depending on the surrounding 3s of sequence in both directions. | |
291 Smoothed FIS P-value If bootstrap resampling is enabled, a p-value ranking the significance of FIS within this population. | |
292 Private allele True (1) or false (0), depending on if this allele is only occurs in this population. | |
293 | |
294 - batch_X.fst_Y-Z.tsv Pairwise FST Output:: | |
295 | |
296 Batch ID The batch identifier for this data set. | |
297 Locus ID Catalog locus identifier. | |
298 Population ID 1 The ID supplied to the populations program, as written in the population map file. | |
299 Population ID 2 The ID supplied to the populations program, as written in the population map file. | |
300 Chromosome If aligned to a reference genome. | |
301 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand). | |
302 Column The nucleotide site within the catalog locus. | |
303 Overall pi An estimate of nucleotide diversity across the two populations. | |
304 FST A measure of population differentiation. | |
305 FET p-value P-value describing if the FST measure is statistically significant according to Fisher's Exact Test. | |
306 Odds Ratio Fisher's Exact Test odds ratio | |
307 CI High Fisher's Exact Test confidence interval. | |
308 CI Low Fisher's Exact Test confidence interval. | |
309 LOD Score Logarithm of odds score. | |
310 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium. | |
311 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium. | |
312 Corrected FST FST with either the FET p-value, or a window-size or genome size Bonferroni correction. | |
313 Smoothed FST A weighted average of FST depending on the surrounding 3s of sequence in both directions. | |
314 Smoothed FST P-value If bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations. | |
315 | |
316 | |
317 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki <https://www.e-biogenouest.org/wiki/ManArchiveGalaxy>`_ . | |
318 | |
319 -------- | |
320 | |
321 **Output type:** | |
322 | |
323 - Output type details:: | |
324 | |
325 No compression All files will be added in the current history. | |
326 Compressed by categories Files will be compressed by categories (snps, allele, matches and tags) into 4 zip archives. These archives and batch files will be added in the current history. | |
327 Compressed all outputs All files will be compressed in an unique zip archive. Batch files will be added in the current history with the archive. | |
328 | |
329 | |
330 -------- | |
331 | |
332 **Project links:** | |
333 | |
334 `STACKS website <http://creskolab.uoregon.edu/stacks/>`_ . | |
335 | |
336 `STACKS manual <http://creskolab.uoregon.edu/stacks/stacks_manual.pdf>`_ . | |
337 | |
338 `STACKS google group <https://groups.google.com/forum/#!forum/stacks-users>`_ . | |
339 | |
340 -------- | |
341 | |
342 **References:** | |
343 | |
344 -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013. | |
345 | |
346 -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013. | |
347 | |
348 -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011. | |
349 | |
350 -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011. | |
351 | |
352 -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011. | |
353 | |
354 -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010. | |
355 | |
356 -------- | |
357 | |
358 **Integrated by:** | |
359 | |
360 Yvan Le Bras and Cyril Monjeaud | |
361 | |
362 GenOuest Bio-informatics Core Facility | |
363 | |
364 UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France) | |
365 | |
366 support@genouest.org | |
367 | |
368 </help> | |
369 <citations> | |
370 <citation type="doi">10.1111/mec.12354</citation> | |
371 <citation type="doi">10.1111/mec.12330</citation> | |
372 <citation type="doi">10.1534/g3.111.000240</citation> | |
373 <citation type="doi">10.1534/genetics.111.127324</citation> | |
374 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation> | |
375 <citation type="doi">10.1073/pnas.1006538107</citation> | |
376 | |
377 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013, | |
378 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.}, | |
379 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France}, | |
380 booktitle = {JOBIM 2013 Proceedings}, | |
381 year = {2013}, | |
382 url = {https://www.e-biogenouest.org/resources/128}, | |
383 pages = {97-106} | |
384 }</citation> | |
385 </citations> | |
386 </tool> | |
387 |