comparison stacks_populations.xml @ 0:c1faa67441e9 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:04:11 -0400
parents
children 2ac5c9616748
comparison
equal deleted inserted replaced
-1:000000000000 0:c1faa67441e9
1 <tool id="stacks_populations" name="Stacks: populations" version="@WRAPPER_VERSION@.0">
2 <description>analyze a population of individual samples ('populations' program)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9
10 mkdir stacks_outputs
11
12 &&
13
14 #for $input_file in $options_usage.input_col:
15 #set $ext = ""
16 #if not str($input_file.name).endswith('.tsv'):
17 #set $ext = ".tsv"
18 #end if
19 ln -s "${input_file}" "stacks_outputs/${input_file.name}${ext}" &&
20 #end for
21
22 populations
23
24 -t \${GALAXY_SLOTS:-1}
25
26 -P stacks_outputs
27 -M "$options_usage.popmap"
28
29 ## Data filtering
30 $options_filtering.write_single_snp
31 $options_filtering.write_random_snp
32
33 #if str($options_filtering.lnl):
34 --lnl_lim $options_filtering.lnl
35 #end if
36
37 -r $options_filtering.minperc
38 -p $options_filtering.minpop
39 -m $options_filtering.mindepth
40
41 #if str($options_filtering.max_obs_het):
42 --max_obs_het $options_filtering.max_obs_het
43 #end if
44
45 --min_maf $options_filtering.minminor
46 #if str( $options_filtering.correction_select.correction ) != "no_corr":
47 -f $options_filtering.correction_select.correction
48 --p_value_cutoff $options_filtering.correction_select.pcutoff
49 #end if
50
51 ## Fstats
52 $fstats
53
54 #if $options_kernel.kernel:
55 -k
56 --window_size $options_kernel.window
57 #end if
58
59 ## Bootstrap resampling options
60 #if $bootstrap_resampling.bootstrap_resampling_mode.bootstrap_all:
61 --bootstrap
62 #else:
63 $bootstrap_resampling.bootstrap_resampling_mode.bootstrap_pifis
64 $bootstrap_resampling.bootstrap_resampling_mode.bootstrap_fst
65 $bootstrap_resampling.bootstrap_resampling_mode.bootstrap_div
66 $bootstrap_resampling.bootstrap_resampling_mode.bootstrap_phist
67 #end if
68
69 #if str($bootstrap_resampling.bootstrap_reps):
70 --bootstrap_reps $bootstrap_resampling.bootstrap_reps
71 #end if
72 #if $bootstrap_resampling.bootstrap_wl:
73 --bootstrap_wl "$bootstrap_resampling.bootstrap_wl"
74 #end if
75
76 ## output section
77 $populations_output.ordered_export
78 $populations_output.vcf
79 $populations_output.vcf_haplotypes
80 $populations_output.genepop
81 $populations_output.structure
82 $populations_output.fasta
83 $populations_output.fasta_strict
84 $populations_output.hzar
85 $populations_output.phase
86 $populations_output.fastphase
87 $populations_output.beagle
88 $populations_output.beagle_phased
89 $populations_output.plink
90 $populations_output.phylip
91 $populations_output.phylip_var
92 $populations_output.phylip_var_all
93 $populations_output.treemix
94
95 #if $populations_output.options_genomic.genomic:
96 --genomic
97 -e $populations_output.options_genomic.enzyme
98 #end if
99
100 ## output SQL file (as denovo/refmap) and fst/phi components
101 -s
102 --log_fst_comp
103
104 ## Advanced options
105 #if $advanced_options.blacklist:
106 -B "$advanced_options.blacklist"
107 #end if
108 #if $advanced_options.whitelist:
109 -W "$advanced_options.whitelist"
110 #end if
111 -b $advanced_options.batchid
112 ]]></command>
113
114 <inputs>
115 <section name="options_usage" title="Input" expanded="true">
116 <param name="input_col" format="tabular,txt" type="data_collection" collection_type="list" label="Output from previous Stacks pipeline steps (e.g. denovo_map or refmap)" />
117 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" argument="-M" />
118 </section>
119
120 <section name="options_filtering" title="Data filtering options" expanded="true">
121
122 <param name="minperc" argument="-r" type="float" value="0.5" min="0" max="1" label="Minimum percentage of individuals in a population required to process a locus for that population" />
123 <param name="minpop" argument="-p" type="integer" value="2" label="Minimum number of populations a locus must be present in to process a locus" />
124 <param name="mindepth" argument="-m" type="integer" value="1" label="Specify a minimum stack depth required for individuals at a locus" />
125 <param name="minminor" argument="--min_maf" type="float" value="0.25" label="Specify a minimum minor allele frequency required before calculating Fst at a locus (between 0 and 0.5)" />
126 <param name="max_obs_het" argument="--max_obs_het" type="float" value="" min="0" max="1" optional="true" label="Maximum observed heterozygosity required to process a nucleotide site at a locus." />
127
128 <conditional name="correction_select">
129 <param name="correction" type="select" label="Correction type" help="specify a correction to be applied to Fst values: 'p_value', 'bonferroni_win', or 'bonferroni_gen'" >
130 <option value="no_corr">No correction</option>
131 <option value="p_value">p_value</option>
132 <option value="bonferroni_win">bonferroni_win</option>
133 <option value="bonferroni_gen">bonferroni_gen</option>
134 </param>
135 <when value="no_corr"></when>
136 <when value="p_value">
137 <param name="pcutoff" type="float" value="0.05" label="P-value cutoff" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
138 </when>
139 <when value="bonferroni_win">
140 <param name="pcutoff" type="float" value="0.05" label="P-value cutoff" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
141 </when>
142 <when value="bonferroni_gen">
143 <param name="pcutoff" type="float" value="0.05" label="P-value cutoff" help="required p-value to keep an Fst measurement (0.05 by default). Also used as base for Bonferroni correction" />
144 </when>
145 </conditional>
146
147 <param name="lnl" type="float" value="" optional="true" argument="--lnl_lim" label="Filter loci with log likelihood values below this threshold" />
148
149 <param name="write_single_snp" argument="--write_single_snp" truevalue="--write_single_snp" falsevalue="" type="boolean" checked="false" label="Restrict data analysis to only the first SNP per locus." />
150 <param name="write_random_snp" argument="--write_random_snp" truevalue="--write_random_snp" falsevalue="" type="boolean" checked="false" label="Restrict data analysis to one random SNP per locus." />
151 </section>
152
153 <section name="populations_output" title="Output options" expanded="true">
154 <param name="ordered_export" argument="--ordered_export" truevalue="--ordered_export" falsevalue="" type="boolean" checked="false" label="If data is reference aligned, exports will be ordered; only a single representative of each overlapping site." />
155 <param name="vcf" argument="--vcf" truevalue="--vcf" falsevalue="" type="boolean" checked="false" label="Output results in Variant Call Format (VCF)" />
156 <param name="vcf_haplotypes" argument="--vcf_haplotypes" truevalue="--vcf_haplotypes" falsevalue="" type="boolean" checked="false" label="Output haplotypes in Variant Call Format (VCF)." />
157 <param name="genepop" argument="--genepop" truevalue="--genepop" falsevalue="" type="boolean" checked="false" label="Output results in GenePop Format" />
158 <param name="structure" argument="--structure" truevalue="--structure" falsevalue="" type="boolean" checked="false" label="Output results in Structure Format" />
159 <param name="fasta" argument="--fasta" truevalue="--fasta" falsevalue="" type="boolean" checked="false" label="Output full sequence for each unique haplotype, from each sample locus in FASTA format, regardless of plausibility." />
160 <param name="fasta_strict" argument="--fasta_strict" truevalue="--fasta_strict" falsevalue="" type="boolean" checked="false" label="Output full sequence for each haplotype, from each sample locus in FASTA format, only for biologically plausible loci." />
161 <param name="hzar" argument="--hzar" truevalue="--hzar" falsevalue="" type="boolean" checked="false" label="Output genotypes in Hybrid Zone Analysis using R (HZAR) format." />
162 <param name="phase" argument="--phase" truevalue="--phase" falsevalue="" type="boolean" checked="false" label="Output genotypes in PHASE format" />
163 <param name="fastphase" argument="--fastphase" truevalue="--fastphase" falsevalue="" type="boolean" checked="false" label="Output genotypes in fastPHASE format" />
164 <param name="beagle" argument="--beagle" truevalue="--beagle" falsevalue="" type="boolean" checked="false" label="Output genotypes in Beagle format" />
165 <param name="beagle_phased" argument="--beagle_phased" truevalue="--beagle_phased" falsevalue="" type="boolean" checked="false" label="Output haplotypes in Beagle format" />
166 <param name="plink" argument="--plink" truevalue="--plink" falsevalue="" type="boolean" checked="false" label="Output genotypes in PLINK format" />
167 <param name="phylip" argument="--phylip" truevalue="--phylip" falsevalue="" type="boolean" checked="false" label="Output nucleotides that are fixed-within, and variant among populations in Phylip format for phylogenetic tree construction" />
168 <param name="phylip_var" argument="--phylip_var" truevalue="--phylip_var" falsevalue="" type="boolean" checked="false" label="Include variable sites in the phylip output encoded using IUPAC notation." />
169 <param name="phylip_var_all" argument="--phylip_var_all" truevalue="--phylip_var_all" falsevalue="" type="boolean" checked="false" label="Include all sequence as well as variable sites in the phylip output encoded using IUPAC notation." />
170 <param name="treemix" argument="--treemix" truevalue="--treemix" falsevalue="" type="boolean" checked="false" label="Output SNPs in a format useable for the TreeMix program (Pickrell and Pritchard)." />
171
172 <conditional name="options_genomic">
173 <param name="genomic" argument="--genomic" truevalue="--genomic" falsevalue="" type="boolean" checked="false" label="Output each nucleotide position (fixed or polymorphic) in all population members to a file" />
174 <when value="--genomic">
175 <param name="enzyme" argument="-e" type="select" label="Provide the restriction enzyme used" help="required if generating genomic output" >
176 <expand macro="enzymes"/>
177 </param>
178 </when>
179 <when value="">
180 </when>
181 </conditional>
182 </section>
183
184 <param name="fstats" argument="--fstats" truevalue="--fstats" falsevalue="" type="boolean" checked="false" label="Enable SNP and haplotype-based F statistics" />
185
186 <conditional name="options_kernel">
187 <param name="kernel" type="boolean" checked="false" truevalue="-k" falsevalue="" label="enable kernel-smoothed FIS, π, and FST calculations" />
188 <when value="-k">
189 <param name="window" type="integer" value="150" label="window size" help="distance over which to average values (sigma, default 150Kb)" />
190 </when>
191 <when value="">
192 </when>
193 </conditional>
194
195 <section name="bootstrap_resampling" title="Bootstrap resampling" expanded="false">
196 <conditional name="bootstrap_resampling_mode">
197 <param name="bootstrap_all" argument="--bootstrap" type="boolean" checked="false" truevalue="--bootstrap" falsevalue="" label="Enable bootstrap resampling for all smoothed statistics" />
198 <when value="--bootstrap">
199 </when>
200 <when value="">
201 <param name="bootstrap_pifis" argument="--bootstrap_pifis" type="boolean" checked="false" truevalue="--bootstrap_pifis" falsevalue="" label="Enable boostrap resampling for smoothed SNP-based Pi and Fis calculations" />
202 <param name="bootstrap_fst" argument="--bootstrap_fst" type="boolean" checked="false" truevalue="--bootstrap_fst" falsevalue="" label="Enable boostrap resampling for smoothed Fst calculations based on pairwise population comparison of SNPs" />
203 <param name="bootstrap_div" argument="--bootstrap_div" type="boolean" checked="false" truevalue="--bootstrap_div" falsevalue="" label="Enable boostrap resampling for smoothed haplotype diveristy and gene diversity calculations based on haplotypes" />
204 <param name="bootstrap_phist" argument="--bootstrap_phist" type="boolean" checked="false" truevalue="--bootstrap_phist" falsevalue="" label="Enable boostrap resampling for smoothed Phi_st calculations based on haplotypes." />
205 </when>
206 </conditional>
207 <param name="bootstrap_reps" argument="--bootstrap_reps" type="integer" value="100" optional="true" label="Number of bootstrap resamplings to calculate" />
208 <param name="bootstrap_wl" argument="--bootstrap_wl" format="txt,tabular" type="data" optional="true" label="Only bootstrap loci contained in this whitelist" />
209 </section>
210
211 <!-- Output options -->
212 <section name="advanced_options" title="advanced options" expanded="False">
213 <param name="whitelist" argument="-W" format="txt,tabular" type="data" optional="true" label="Specify a file containing Whitelisted markers to include in the export" />
214 <param name="blacklist" argument="-B" format="txt,tabular" type="data" optional="true" label="Specify a file containing Blacklisted markers to be excluded from the export" />
215
216 <param name="batchid" type="integer" value="1" label="Batch ID to examine when exporting from the catalog" help="Only useful if you analyse data that was processed outside galaxy" />
217 </section>
218 </inputs>
219 <outputs>
220 <expand macro="populations_output_full"/>
221 </outputs>
222
223 <tests>
224 <test>
225 <param name="options_usage|input_col">
226 <collection type="list">
227 <element name="batch_1.catalog.alleles.tsv" ftype="tabular" value="genotypes/batch_1.catalog.alleles.tsv" />
228 <element name="batch_1.catalog.snps.tsv" ftype="tabular" value="genotypes/batch_1.catalog.snps.tsv" />
229 <element name="batch_1.catalog.tags.tsv" ftype="tabular" value="genotypes/batch_1.catalog.tags.tsv" />
230 <element name="PopA_01.alleles.tsv" ftype="tabular" value="genotypes/PopA_01.alleles.tsv" />
231 <element name="PopA_01.matches.tsv" ftype="tabular" value="genotypes/PopA_01.matches.tsv" />
232 <element name="PopA_01.snps.tsv" ftype="tabular" value="genotypes/PopA_01.snps.tsv" />
233 <element name="PopA_01.tags.tsv" ftype="tabular" value="genotypes/PopA_01.tags.tsv" />
234 <element name="PopA_02.alleles.tsv" ftype="tabular" value="genotypes/PopA_02.alleles.tsv" />
235 <element name="PopA_02.matches.tsv" ftype="tabular" value="genotypes/PopA_02.matches.tsv" />
236 <element name="PopA_02.snps.tsv" ftype="tabular" value="genotypes/PopA_02.snps.tsv" />
237 <element name="PopA_02.tags.tsv" ftype="tabular" value="genotypes/PopA_02.tags.tsv" />
238 </collection>
239 </param>
240 <param name="options_usage|popmap" ftype="tabular" value="denovo_map/popmap.tsv" />
241 <param name="options_filtering|correction_select|correction" value="p_value" />
242
243 <param name="populations_output|ordered_export" value="true" />
244 <param name="populations_output|vcf" value="true" />
245 <param name="populations_output|vcf_haplotypes" value="true" />
246 <param name="populations_output|genepop" value="true" />
247 <param name="populations_output|structure" value="true" />
248 <param name="populations_output|fasta" value="true" />
249 <param name="populations_output|fasta_strict" value="true" />
250 <param name="populations_output|hzar" value="true" />
251 <param name="populations_output|phase" value="true" />
252 <param name="populations_output|fastphase" value="true" />
253 <param name="populations_output|beagle" value="true" />
254 <param name="populations_output|beagle_phased" value="true" />
255 <param name="populations_output|plink" value="true" />
256 <param name="populations_output|phylip" value="true" />
257 <param name="populations_output|phylip_var" value="true" />
258 <param name="populations_output|phylip_var_all" value="true" />
259 <param name="populations_output|treemix" value="true" />
260
261 <param name="populations_output|options_genomic|genomic" value="true" />
262 <param name="populations_output|options_genomic|enzyme" value="ecoRI" />
263
264 <!-- populations -->
265 <output name="out_haplotypes">
266 <assert_contents>
267 <has_text text="PopA_01" />
268 </assert_contents>
269 </output>
270 <output name="out_hapstats">
271 <assert_contents>
272 <has_text text="Smoothed Gene Diversity" />
273 </assert_contents>
274 </output>
275 <output name="out_populations_log">
276 <assert_contents>
277 <has_text text="populations version" />
278 </assert_contents>
279 </output>
280 <output name="out_sumstats_sum">
281 <assert_contents>
282 <has_text text="Polymorphic Sites" />
283 </assert_contents>
284 </output>
285 <output name="out_sumstats">
286 <assert_contents>
287 <has_text text="Smoothed Pi" />
288 </assert_contents>
289 </output>
290 <output name="out_vcf">
291 <assert_contents>
292 <has_text text="fileformat=VCFv4.0" />
293 </assert_contents>
294 </output>
295 <output name="out_treemix_pop">
296 <assert_contents>
297 <has_text text="TreeMix v1.1;" />
298 </assert_contents>
299 </output>
300 <output name="out_fasta">
301 <assert_contents>
302 <has_text text="AATTCGTTTGCTGCTTCAGGAATCTCTCGTATAATCTGAGTATGTGCGTACGTACGCTATTTAGATGGATAACCGACGCTGCCAGACGCGAGAC" />
303 </assert_contents>
304 </output>
305 </test>
306 </tests>
307 <help>
308 <![CDATA[
309 .. class:: infomark
310
311 **What it does**
312
313 This program will be executed in place of the genotypes program when a population is being processed through the pipeline. A map specifiying which individuals belong to which population is submitted to the program and the program will then calculate population genetics statistics, expected/observed heterzygosity, π, and FIS at each nucleotide position. The populations program will compare all populations pairwise to compute FST. If a set of data is reference aligned, then a kernel-smoothed FST will also be calculated.
314
315 --------
316
317 **Input files**
318
319 Output from denovo_map or ref_map
320
321 - Population map::
322
323 indv_01 1
324 indv_02 1
325 indv_03 1
326 indv_04 2
327 indv_05 2
328 indv_06 2
329
330
331 **Output files**
332
333 - XXX.tags.tsv file::
334
335 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
336
337 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
338
339
340 - XXX.snps.tsv file::
341
342 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
343
344 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
345
346
347 - XXX.alleles.tsv file::
348
349 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
350
351
352 - XXX.matches.tsv file::
353
354 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
355
356 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
357
358
359 - other files:
360
361 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
362
363 @STACKS_INFOS@
364 ]]>
365 </help>
366 <expand macro="citation" />
367 </tool>