comparison STACKS_refmap.xml @ 0:d6ba40f6c824

first commit
author cmonjeau
date Mon, 24 Aug 2015 09:29:12 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:d6ba40f6c824
1 <tool id="STACKSrefmap" name="STACKS : Reference map" force_history_refresh="True">
2 <description>Run the STACKS ref_map.pl wrapper</description>
3
4 <configfiles>
5 <configfile name="parent_sequences">
6 #if str( $options_usage.options_usage_selector ) == "genetic"
7 #for $input in $options_usage.parent_sequence:
8 ${input.display_name}::${input}
9 #end for
10 #end if
11 </configfile>
12 <configfile name="progeny_sequences">
13 #if str( $options_usage.options_usage_selector ) == "genetic" and str( $options_usage.options_progeny.options_progeny_selector ) == "yes"
14 #for $input in $options_usage.options_progeny.progeny_sequence:
15 ${input.display_name}::${input}
16 #end for
17 #end if
18 </configfile>
19 <configfile name="individual_samples">
20 #if str( $options_usage.options_usage_selector ) == "population"
21 #for $input in $options_usage.individual_sample:
22 ${input.display_name}::${input}
23 #end for
24 #end if
25 </configfile>
26 </configfiles>
27 <requirements>
28 <requirement type="package" version="1.18">stacks</requirement>
29 </requirements>
30
31 <command interpreter="python">
32 STACKS_refmap.py
33 #if str( $options_usage.options_usage_selector ) == "genetic"
34 -p $parent_sequences
35 #if str( $options_usage.options_progeny.options_progeny_selector ) == "yes"
36 -r $progeny_sequences
37 #end if
38 #else
39 -s $individual_samples
40 #if str( $options_usage.options_popmap.popmap_selector) == "yes"
41 -O $options_usage.options_popmap.popmap
42 #end if
43 #end if
44 -n $mismatchbetlocibuild
45 -m $mincov
46 --bound_low $snp_options.boundlow
47 --bound_high $snp_options.boundhigh
48 --alpha $snp_options.alpha
49 --catalogsnps $catalogsnps
50 --catalogalleles $catalogalleles
51 --catalogtags $catalogtags
52 --logfile $output
53 --compress_output $output_compress
54 ##additionnal outputs
55 --total_output $total_output
56 --tags_output $tags_output
57 --snps_output $snps_output
58 --alleles_output $alleles_output
59 --matches_output $matches_output
60
61 </command>
62
63 <inputs>
64 <conditional name="options_usage">
65 <param name="options_usage_selector" type="select" label="Select your usage">
66 <option value="genetic" selected="true">Genetic map</option>
67 <option value="population">Population</option>
68 </param>
69 <when value="genetic">
70 <param name="parent_sequence" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing parent sequences" help="SAM/ZIP/TAR.GZ files" />
71 <conditional name="options_progeny">
72 <param name="options_progeny_selector" type="select" label="Use progeny files">
73 <option value="yes" selected="true">Yes</option>
74 <option value="no">No</option>
75 </param>
76 <when value="yes">
77 <param name="progeny_sequence" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing progeny sequences" help="SAM/ZIP/TAR.GZ files containing progeny sequences from a mapping cross" />
78 </when>
79 <when value="no">
80 </when>
81 </conditional>
82
83 </when>
84 <when value="population">
85 <param name="individual_sample" format="sam,zip,tar.gz" type="data" multiple="true" label="Files containing an individual sample from a population" help="SAM/ZIP/TAR.GZ files." />
86 <conditional name="options_popmap">
87 <param name="popmap_selector" type="select" label="Analyzing one or more populations?" >
88 <option value="no" selected="true">No</option>
89 <option value="yes">Yes</option>
90 </param>
91 <when value="no"></when>
92 <when value="yes">
93 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" help="If analyzing one or more populations, specify a population map" />
94 </when>
95 </conditional>
96 </when>
97 </conditional>
98
99 <param name="mismatchbetlocibuild" type="integer" value="0" label="specify the number of mismatches allowed between loci when building the catalog" />
100 <param name="mincov" type="integer" value="1" label="Minimum depth of coverage" help="specify the minimum depth of coverage to report a stack in pstacks" />
101 <!-- SNP Model options -->
102 <section name="snp_options" title="SNP_Model_Options" expanded="False">
103 <param name="boundlow" type="float" value="0.0" min="0.0" max="1.0" label="lower bound for epsilon, the error rate" help="between 0 and 1.0"/>
104 <param name="boundhigh" type="float" value="1.0" min="0.0" max="1.0" label="upper bound for epsilon, the error rate" help="between 0 and 1.0" />
105 <param name="alpha" type="float" value="0.05" min="0.001" max="0.1" label="chi square significance level required to call a heterozygote or homozygote" help="either 0.1, 0.05 (default), 0.01, or 0.001" />
106 </section>
107 <!-- Output options -->
108 <param name="output_compress" type="select" label="Output type" help="please see below for details">
109 <option value="default" selected="true">No compression</option>
110 <option value="categories">Compressed by categories</option>
111 <option value="total">Compressed all outputs</option>
112 </param>
113 </inputs>
114 <outputs>
115
116 <data format="txt" name="output" label="result.log with ${tool.name} on ${on_string}" />
117 <data format="txt" name="additional" label="additional file with ${tool.name}" hidden="true">
118 <discover_datasets pattern="__designation_and_ext__" directory="galaxy_outputs" visible="true" />
119 </data>
120
121 <data format="tabular" name="catalogsnps" label="catalog.snps with ${tool.name} on ${on_string}" />
122 <data format="tabular" name="catalogalleles" label="catalog.alleles with ${tool.name} on ${on_string}" />
123 <data format="tabular" name="catalogtags" label="catalog.tags with ${tool.name} on ${on_string}" />
124
125 <!-- additionnal output archives -->
126 <data format="zip" name="total_output" label="total_output.zip with ${tool.name} on ${on_string}" >
127 <filter>output_compress == "total"</filter>
128 </data>
129 <data format="zip" name="tags_output" label="tags_output.zip with ${tool.name} on ${on_string}" >
130 <filter>output_compress == "categories"</filter>
131 </data>
132 <data format="zip" name="snps_output" label="snps_output.zip with ${tool.name} on ${on_string}" >
133 <filter>output_compress == "categories"</filter>
134 </data>
135 <data format="zip" name="alleles_output" label="alleles_output.zip with ${tool.name} on ${on_string}" >
136 <filter>output_compress == "categories"</filter>
137 </data>
138 <data format="zip" name="matches_output" label="matches_output.zip with ${tool.name} on ${on_string}" >
139 <filter>output_compress == "categories"</filter>
140 </data>
141 </outputs>
142 <help>
143
144 .. class:: infomark
145
146 **What it does**
147
148 This program expects data that have been aligned to a reference genome, and can accept data directly from Bowtie, or from any aligner that can produce SAM format. To avoid datasets names problems, we recommand the use of the *Map with BWA for STACKS tool*. This program will execute each of the Stacks components: first, running pstacks on each of the samples specified, building loci (based on the reference alignment) and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci specified as 'parents' or 'samples' on the command line, again using alignment to match loci in the catalog. Finally, sstacks will be executed to match each sample against the catalog. The ref_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
149
150
151 --------
152
153
154 **Created by:**
155
156 Stacks was developed by Julian Catchen with contributions from Angel Amores, Paul Hohenlohe, and Bill Cresko
157
158 --------
159
160 **Example:**
161
162 Input files:
163
164 - SAM, zip, tar.gz
165
166 - Population map::
167
168 indv_01 1
169 indv_02 1
170 indv_03 1
171 indv_04 2
172 indv_05 2
173 indv_06 2
174
175 Output files:
176
177 - XXX.tags.tsv file::
178
179 Column Name Description
180 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
181 2 Sample ID Each sample passed through Stacks gets a unique id for that sample.
182 3 Stack ID Each stack formed gets an ID.
183 4 Chromosome If aligned to a reference genome using pstacks, otherwise it is blank.
184 5 Basepair If aligned to ref genome using pstacks.
185 6 Strand If aligned to ref genome using pstacks.
186 7 Sequence Type Either 'consensus', 'primary' or 'secondary', see the Stacks paper for definitions of these terms.
187 8 Sequence ID The individual sequence read that was merged into this stack.
188 9 Sequence The raw sequencing read.
189 10 Deleveraged Flag If "1", this stack was processed by the deleveraging algorithm and was broken down from a larger stack.
190 11 Blacklisted Flag If "1", this stack was still confounded depsite processing by the deleveraging algorithm.
191 12 Lumberja ckstack Flag If "1", this stack was set aside due to having an extreme depth of coverage.
192
193 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
194
195
196 - XXX.snps.tsv file::
197
198 Column Name Description
199 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
200 2 Sample ID
201 3 Stack ID
202 4 SNP Column
203 5 Likelihood ratio From the SNP-calling model.
204 6 Rank_1 Majority nucleotide.
205 7 Rank_2 Alternative nucleotide.
206
207 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
208
209
210 - XXX.alleles.tsv file::
211
212 Column Name Description
213 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
214 2 Sample ID
215 3 Stack ID
216 4 Haplotype The haplotype, as constructed from the called SNPs at each locus.
217 5 Percent Percentage of reads that have this haplotype
218 6 Count Raw number of reads that have this haplotype
219
220
221 - XXX.matches.tsv file::
222
223 Column Name Description
224 1 Sql ID This field will always be "0", however the MySQL database will assign an ID when it is loaded.
225 2 Batch ID
226 3 Catalog ID
227 4 Sample ID
228 5 Stack ID
229 6 Haplotype
230 7 Stack Depth
231
232 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
233
234
235 - batch_X.sumstats.tsv Summary Statistics Output::
236
237 Batch ID The batch identifier for this data set.
238 Locus ID Catalog locus identifier.
239 Chromosome If aligned to a reference genome.
240 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
241 Column The nucleotide site within the catalog locus.
242 Population ID The ID supplied to the populations program, as written in the population map file.
243 P Nucleotide The most frequent allele at this position in this population.
244 Q Nucleotide The alternative allele.
245 Number of Individuals Number of individuals sampled in this population at this site.
246 P Frequency of most frequent allele.
247 Observed Heterozygosity The proportion of individuals that are heterozygotes in this population.
248 Observed Homozygosity The proportion of individuals that are homozygotes in this population.
249 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
250 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
251 pi An estimate of nucleotide diversity.
252 Smoothed pi A weighted average of p depending on the surrounding 3s of sequence in both directions.
253 Smoothed pi P-value If bootstrap resampling is enabled, a p-value ranking the significance of p within this population.
254 FIS The inbreeding coefficient of an individual (I) relative to the subpopulation (S).
255 Smoothed FIS A weighted average of FIS depending on the surrounding 3s of sequence in both directions.
256 Smoothed FIS P-value If bootstrap resampling is enabled, a p-value ranking the significance of FIS within this population.
257 Private allele True (1) or false (0), depending on if this allele is only occurs in this population.
258
259 - batch_X.fst_Y-Z.tsv Pairwise FST Output::
260
261 Batch ID The batch identifier for this data set.
262 Locus ID Catalog locus identifier.
263 Population ID 1 The ID supplied to the populations program, as written in the population map file.
264 Population ID 2 The ID supplied to the populations program, as written in the population map file.
265 Chromosome If aligned to a reference genome.
266 Basepair If aligned to a reference genome. This is the alignment of the whole catalog locus. The exact basepair reported is aligned to the location of the RAD site (depending on whether alignment is to the positive or negative strand).
267 Column The nucleotide site within the catalog locus.
268 Overall pi An estimate of nucleotide diversity across the two populations.
269 FST A measure of population differentiation.
270 FET p-value P-value describing if the FST measure is statistically significant according to Fisher's Exact Test.
271 Odds Ratio Fisher's Exact Test odds ratio
272 CI High Fisher's Exact Test confidence interval.
273 CI Low Fisher's Exact Test confidence interval.
274 LOD Score Logarithm of odds score.
275 Expected Heterozygosity Heterozygosity expected under Hardy-Weinberg equilibrium.
276 Expected Homozygosity Homozygosity expected under Hardy-Weinberg equilibrium.
277 Corrected FST FST with either the FET p-value, or a window-size or genome size Bonferroni correction.
278 Smoothed FST A weighted average of FST depending on the surrounding 3s of sequence in both directions.
279 Smoothed FST P-value If bootstrap resampling is enabled, a p-value ranking the significance of FST within this pair of populations.
280
281
282 Instructions to add the functionality of archives management in Galaxy on the `eBiogenouest HUB wiki &lt;https://www.e-biogenouest.org/wiki/ManArchiveGalaxy&gt;`_ .
283
284
285 --------
286
287 **Project links:**
288
289 `STACKS website &lt;http://creskolab.uoregon.edu/stacks/&gt;`_ .
290
291 `STACKS manual &lt;http://creskolab.uoregon.edu/stacks/stacks_manual.pdf&gt;`_ .
292
293 `STACKS google group &lt;https://groups.google.com/forum/#!forum/stacks-users&gt;`_ .
294
295 --------
296
297 **References:**
298
299 -J. Catchen, P. Hohenlohe, S. Bassham, A. Amores, and W. Cresko. Stacks: an analysis tool set for population genomics. Molecular Ecology. 2013.
300
301 -J. Catchen, S. Bassham, T. Wilson, M. Currey, C. O'Brien, Q. Yeates, and W. Cresko. The population structure and recent colonization history of Oregon threespine stickleback determined using restriction-site associated DNA-sequencing. Molecular Ecology. 2013.
302
303 -J. Catchen, A. Amores, P. Hohenlohe, W. Cresko, and J. Postlethwait. Stacks: building and genotyping loci de novo from short-read sequences. G3: Genes, Genomes, Genetics, 1:171-182, 2011.
304
305 -A. Amores, J. Catchen, A. Ferrara, Q. Fontenot and J. Postlethwait. Genome evolution and meiotic maps by massively parallel DNA sequencing: Spotted gar, an outgroup for the teleost genome duplication. Genetics, 188:799'808, 2011.
306
307 -P. Hohenlohe, S. Amish, J. Catchen, F. Allendorf, G. Luikart. RAD sequencing identifies thousands of SNPs for assessing hybridization between rainbow trout and westslope cutthroat trout. Molecular Ecology Resources, 11(s1):117-122, 2011.
308
309 -K. Emerson, C. Merz, J. Catchen, P. Hohenlohe, W. Cresko, W. Bradshaw, C. Holzapfel. Resolving postglacial phylogeography using high-throughput sequencing. Proceedings of the National Academy of Science, 107(37):16196-200, 2010.
310
311 --------
312
313 **Integrated by:**
314
315 Yvan Le Bras and Cyril Monjeaud
316
317 GenOuest Bio-informatics Core Facility
318
319 UMR 6074 IRISA INRIA-CNRS-UR1 Rennes (France)
320
321 support@genouest.org
322
323 If you use this tool in Galaxy, please cite :
324
325 `Y. Le Bras, A. Roult, C. Monjeaud, M. Bahin, O. Quenez, C. Heriveau, A. Bretaudeau, O. Sallou, O. Collin, Towards a Life Sciences Virtual Research Environment : an e-Science initiative in Western France. JOBIM 2013. &lt;https://www.e-biogenouest.org/resources/128&gt;`_
326
327
328 </help>
329 <citations>
330 <citation type="doi">10.1111/mec.12354</citation>
331 <citation type="doi">10.1111/mec.12330</citation>
332 <citation type="doi">10.1534/g3.111.000240</citation>
333 <citation type="doi">10.1534/genetics.111.127324</citation>
334 <citation type="doi">10.1111/j.1755-0998.2010.02967.x</citation>
335 <citation type="doi">10.1073/pnas.1006538107</citation>
336
337 <citation type="bibtex">@INPROCEEDINGS{JOBIM2013,
338 author = {Le Bras, Y. and ROULT, A. and Monjeaud, C. and Bahin, M. and Quenez, O. and Heriveau, C. and Bretaudeau, A. and Sallou, O. and Collin, O.},
339 title = {Towards a Life Sciences Virtual Research Environment: An e-Science initiative in Western France},
340 booktitle = {JOBIM 2013 Proceedings},
341 year = {2013},
342 url = {https://www.e-biogenouest.org/resources/128},
343 pages = {97-106}
344 }</citation>
345 </citations>
346 </tool>
347