comparison stacks_denovomap.xml @ 0:9d588fb2aee5 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:07:29 -0400
parents
children bab631cc9f64
comparison
equal deleted inserted replaced
-1:000000000000 0:9d588fb2aee5
1 <tool id="stacks_denovomap" name="Stacks: de novo map" version="@WRAPPER_VERSION@.0">
2 <description>the Stacks pipeline without a reference genome (denovo_map.pl)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9 #from os.path import splitext
10 #import re
11
12 #if str( $options_usage.rad_analysis_type ) == "genetic":
13 #for $input_parent in $options_usage.parent_sequences:
14
15 #if $input_parent.ext == "fastqsanger":
16 #set $data_path = splitext($input_parent.name)[0]
17 #set $data_path = re.sub(r'\.1$', '', $data_path)
18 #set $data_path = $data_path + ".fq"
19 #else:
20 #set $data_path = splitext($input_parent.name)[0]
21 #set $data_path = re.sub(r'\.1$', '', $data_path)
22 #set $data_path = $data_path + ".fa"
23 #end if
24
25 ln -s "${input_parent}" "${data_path}" &&
26 #end for
27
28 #for $input_progeny in $options_usage.progeny_sequences:
29
30 #if $input_progeny.ext == "fastqsanger":
31 #set $data_path = splitext($input_progeny.name)[0]
32 #set $data_path = re.sub(r'\.1$', '', $data_path)
33 #set $data_path = $data_path + ".fq"
34 #else:
35 #set $data_path = splitext($input_progeny.name)[0]
36 #set $data_path = re.sub(r'\.1$', '', $data_path)
37 #set $data_path = $data_path + ".fa"
38 #end if
39
40 ln -s "${input_progeny}" "${data_path}" &&
41 #end for
42 #else:
43 #for $input_indiv in $options_usage.individual_sample:
44
45 #if $input_indiv.ext == "fastqsanger":
46 #set $data_path = splitext($input_indiv.name)[0]
47 #set $data_path = re.sub(r'\.1$', '', $data_path)
48 #set $data_path = $data_path + ".fq"
49 #else:
50 #set $data_path = splitext($input_indiv.name)[0]
51 #set $data_path = re.sub(r'\.1$', '', $data_path)
52 #set $data_path = $data_path + ".fa"
53 #end if
54
55 ln -s "${input_indiv}" "${data_path}" &&
56 #end for
57 #end if
58
59 mkdir stacks_outputs
60
61 &&
62
63 denovo_map.pl
64
65 -T \${GALAXY_SLOTS:-1}
66
67 #if str( $options_usage.rad_analysis_type ) == "genetic":
68 #for $input_parent in $options_usage.parent_sequences:
69 #if $input_parent.ext == "fastqsanger":
70 #set $data_path = splitext($input_parent.name)[0]
71 #set $data_path = re.sub(r'\.1$', '', $data_path)
72 #set $data_path = $data_path + ".fq"
73 #else:
74 #set $data_path = splitext($input_parent.name)[0]
75 #set $data_path = re.sub(r'\.1$', '', $data_path)
76 #set $data_path = $data_path + ".fa"
77 #end if
78
79 -p "${data_path}"
80 #end for
81
82 -A $options_usage.cross_type
83
84 #for $input_progeny in $options_usage.progeny_sequences:
85 #if $input_progeny.ext == "fastqsanger":
86 #set $data_path = splitext($input_progeny.name)[0]
87 #set $data_path = re.sub(r'\.1$', '', $data_path)
88 #set $data_path = $data_path + ".fq"
89 #else:
90 #set $data_path = splitext($input_progeny.name)[0]
91 #set $data_path = re.sub(r'\.1$', '', $data_path)
92 #set $data_path = $data_path + ".fa"
93 #end if
94
95 -r "${data_path}"
96 #end for
97
98 #if str($assembly_options.P):
99 -P $assembly_options.P
100 #end if
101 #else:
102 #for $i_indiv, $input_indiv in enumerate($options_usage.individual_sample):
103
104 #if $input_indiv.ext == "fastqsanger":
105 #set $data_path = splitext($input_indiv.name)[0]
106 #set $data_path = re.sub(r'\.1$', '', $data_path)
107 #set $data_path = $data_path + ".fq"
108 #else:
109 #set $data_path = splitext($input_indiv.name)[0]
110 #set $data_path = re.sub(r'\.1$', '', $data_path)
111 #set $data_path = $data_path + ".fa"
112 #end if
113
114 -s "${data_path}"
115 #end for
116 -O "$options_usage.popmap"
117 #end if
118
119 #if str($assembly_options.m):
120 -m $assembly_options.m
121 #end if
122 #if str($assembly_options.N):
123 -N $assembly_options.N
124 #end if
125 -M $assembly_options.M
126 -n $assembly_options.n
127 -t $assembly_options.t
128 -H $assembly_options.H
129
130 ## Batch description
131 -b 1
132
133 ## No SQL recording
134 -S
135
136 ## snp_model
137 #if str( $snp_options.select_model.model_type) == "bounded":
138 --bound_low $snp_options.select_model.bound_low
139 --bound_high $snp_options.select_model.bound_high
140 --alpha $snp_options.select_model.alpha
141 #else if str( $snp_options.select_model.model_type) == "snp":
142 --alpha $snp_options.select_model.alpha
143 #end if
144
145 -o stacks_outputs
146
147 #if str( $options_usage.rad_analysis_type ) == "genetic":
148 @NORM_GENOTYPES_OUTPUT_LIGHT@
149 #end if
150 ]]></command>
151
152 <inputs>
153 <conditional name="options_usage">
154 <param name="rad_analysis_type" type="select" label="Select your usage">
155 <option value="genetic" selected="true">Genetic map</option>
156 <option value="population">Population</option>
157 </param>
158 <when value="genetic">
159 <param name="parent_sequences" argument="-p" format="fastqsanger,fasta" type="data" multiple="true" label="Files containing parent sequences" help="Files containing parent sequences from a mapping cross" />
160
161 <param name="progeny_sequences" argument="-r" format="fastqsanger,fasta" type="data" multiple="true" optional="true" label="Files containing progeny sequences" help="files containing progeny sequences from a mapping cross" />
162
163 <param name="cross_type" argument="-A" type="select" label="Cross type">
164 <expand macro="cross_types"/>
165 </param>
166 </when>
167 <when value="population">
168 <param name="individual_sample" argument="-s" format="fastqsanger,fasta" type="data" multiple="true" label="Files containing an individual sample from a population" help="files containing an individual sample from a population" />
169 <param name="popmap" argument="-O" type="data" format="tabular,txt" label="Specify a population map" />
170 </when>
171 </conditional>
172
173 <!-- stack assembly options -->
174 <section name="assembly_options" title="Assembly options" expanded="false">
175 <param name="m" argument="-m" type="integer" value="" optional="true" label="Minimum number of identical raw reads required to create a stack" />
176 <param name="P" argument="-P" type="integer" value="" optional="true" label="Minimum number of identical, raw reads required to create a stack in 'progeny' individuals" />
177 <param name="M" argument="-M" type="integer" value="2" label="Number of mismatches allowed between loci when processing a single individual"/>
178 <param name="N" argument="-N" type="integer" value="" optional="true" label="Number of mismatches allowed when aligning secondary reads" help="default is [-M]+2" />
179 <param name="n" argument="-n" type="integer" value="1" label="specify the number of mismatches allowed between loci when building the catalog"/>
180
181 <param name="t" argument="-t" type="boolean" checked="false" truevalue="-t" falsevalue="" label="Remove, or break up, highly repetitive RAD-Tags in the ustacks program" />
182 <param name="H" argument="-H" type="boolean" checked="false" truevalue="-H" falsevalue="" label="Disable calling haplotypes from secondary reads" />
183 </section>
184
185 <!-- SNP Model options -->
186 <section name="snp_options" title="SNP Model Options (ustacks options)" expanded="False">
187 <expand macro="snp_options"/>
188 </section>
189 </inputs>
190 <outputs>
191 <data format="txt" name="output_log" label="denovo_map.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/denovo_map.log" />
192
193 <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
194 <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
195 <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
196
197 <expand macro="genotypes_output_light"/>
198 <expand macro="populations_output_light"/>
199
200 <collection name="tags" type="list" label="Assembled loci from ${on_string}">
201 <discover_datasets pattern="(?P&lt;name&gt;.+\.tags)\.tsv" ext="tabular" directory="stacks_outputs" />
202 </collection>
203
204 <collection name="snps" type="list" label="Model calls from each locus on ${on_string}">
205 <discover_datasets pattern="(?P&lt;name&gt;.+\.snps)\.tsv" ext="tabular" directory="stacks_outputs" />
206 </collection>
207
208 <collection name="alleles" type="list" label="Haplotypes/alleles recorded from each locus on ${on_string}">
209 <discover_datasets pattern="(?P&lt;name&gt;.+\.alleles)\.tsv" ext="tabular" directory="stacks_outputs" />
210 </collection>
211
212 <collection name="matches" type="list" label="Matches to the catalog on ${on_string}">
213 <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv" ext="tabular" directory="stacks_outputs" />
214 </collection>
215
216 <collection name="all_output" type="list" label="Full output from denovo_map on ${on_string}">
217 <discover_datasets pattern="(?P&lt;name&gt;.+\.(tags|snps|alleles|matches))\.tsv" ext="tabular" directory="stacks_outputs" />
218 <discover_datasets pattern="(?P&lt;name&gt;.+\.(haplotypes|genotypes|markers|hapstats|sumstats|sumstats_summary))\.tsv" ext="tabular" directory="stacks_outputs" />
219 <discover_datasets pattern="(?P&lt;name&gt;.+\.(genotypes))\.(loc|txt)" ext="txt" directory="stacks_outputs" />
220 </collection>
221 </outputs>
222
223 <tests>
224 <test>
225 <param name="options_usage|rad_analysis_type" value="genetic"/>
226 <param name="options_usage|parent_sequences" value="demultiplexed/PopA_01.1.fq" ftype="fastqsanger" />
227 <param name="options_usage|progeny_sequences" value="demultiplexed/PopA_02.1.fq" ftype="fastqsanger" />
228 <output name="output_log">
229 <assert_contents>
230 <has_text text="denovo_map.pl completed" />
231 </assert_contents>
232 </output>
233
234 <!-- catalog -->
235 <output name="catalogtags">
236 <assert_contents>
237 <has_text text="catalog generated on" />
238 </assert_contents>
239 </output>
240 <output name="catalogsnps">
241 <assert_contents>
242 <has_text text="catalog generated on" />
243 </assert_contents>
244 </output>
245 <output name="catalogalleles">
246 <assert_contents>
247 <has_text text="catalog generated on" />
248 </assert_contents>
249 </output>
250
251 <!-- genotypes -->
252 <output name="out_generic_haplo">
253 <assert_contents>
254 <has_text text="Catalog ID" />
255 </assert_contents>
256 </output>
257 <output name="out_sql_markers">
258 <assert_contents>
259 <has_text text="Total Genotypes" />
260 </assert_contents>
261 </output>
262 <output name="out_joinmap">
263 <assert_contents>
264 <has_text text="batch_1.genotypes_" />
265 </assert_contents>
266 </output>
267 <output name="out_sql_genotypes">
268 <assert_contents>
269 <has_text text="SQL ID" />
270 </assert_contents>
271 </output>
272
273 <!-- samples -->
274 <output_collection name="tags">
275 <element name="PopA_01.tags">
276 <assert_contents>
277 <has_text text="generated on " />
278 </assert_contents>
279 </element>
280 </output_collection>
281 <output_collection name="snps">
282 <element name="PopA_01.snps">
283 <assert_contents>
284 <has_text text="generated on " />
285 </assert_contents>
286 </element>
287 </output_collection>
288 <output_collection name="alleles">
289 <element name="PopA_01.alleles">
290 <assert_contents>
291 <has_text text="generated on " />
292 </assert_contents>
293 </element>
294 </output_collection>
295 <output_collection name="matches">
296 <element name="PopA_01.matches">
297 <assert_contents>
298 <has_text text="generated on " />
299 </assert_contents>
300 </element>
301 </output_collection>
302 </test>
303 <test>
304 <param name="options_usage|rad_analysis_type" value="population"/>
305 <param name="options_usage|individual_sample" value="demultiplexed/PopA_01.1.fq,demultiplexed/PopA_02.1.fq,demultiplexed/PopA_03.1.fq,demultiplexed/PopA_04.1.fq,demultiplexed/PopB_01.1.fq,demultiplexed/PopB_02.1.fq,demultiplexed/PopB_03.1.fq,demultiplexed/PopB_04.1.fq" ftype="fastqsanger" />
306 <param name="options_usage|popmap" value="denovo_map/popmap.tsv" />
307 <output name="output_log">
308 <assert_contents>
309 <has_text text="denovo_map.pl completed" />
310 </assert_contents>
311 </output>
312
313 <!-- catalog -->
314 <output name="catalogtags">
315 <assert_contents>
316 <has_text text="catalog generated on" />
317 </assert_contents>
318 </output>
319 <output name="catalogsnps">
320 <assert_contents>
321 <has_text text="catalog generated on" />
322 </assert_contents>
323 </output>
324 <output name="catalogalleles">
325 <assert_contents>
326 <has_text text="catalog generated on" />
327 </assert_contents>
328 </output>
329
330 <!-- populations -->
331 <output name="out_haplotypes">
332 <assert_contents>
333 <has_text text="PopA_01" />
334 </assert_contents>
335 </output>
336 <output name="out_hapstats">
337 <assert_contents>
338 <has_text text="Smoothed Gene Diversity" />
339 </assert_contents>
340 </output>
341 <output name="out_populations_log">
342 <assert_contents>
343 <has_text text="populations version" />
344 </assert_contents>
345 </output>
346 <output name="out_sumstats_sum">
347 <assert_contents>
348 <has_text text="Polymorphic Sites" />
349 </assert_contents>
350 </output>
351 <output name="out_sumstats">
352 <assert_contents>
353 <has_text text="Smoothed Pi" />
354 </assert_contents>
355 </output>
356
357 <!-- samples -->
358 <output_collection name="tags">
359 <element name="PopA_01.tags">
360 <assert_contents>
361 <has_text text="generated on " />
362 </assert_contents>
363 </element>
364 </output_collection>
365 <output_collection name="snps">
366 <element name="PopA_01.snps">
367 <assert_contents>
368 <has_text text="generated on " />
369 </assert_contents>
370 </element>
371 </output_collection>
372 <output_collection name="alleles">
373 <element name="PopA_01.alleles">
374 <assert_contents>
375 <has_text text="generated on " />
376 </assert_contents>
377 </element>
378 </output_collection>
379 <output_collection name="matches">
380 <element name="PopA_01.matches">
381 <assert_contents>
382 <has_text text="generated on " />
383 </assert_contents>
384 </element>
385 </output_collection>
386 </test>
387 </tests>
388
389 <help>
390 <![CDATA[
391 .. class:: infomark
392
393 **What it does**
394
395 This program will run each of the Stacks components: first, running ustacks on each of the samples specified, building loci and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci that were marked as 'parents' or 'samples' on the command line, and finally, sstacks will be executed to match each sample against the catalog. A bit more detail on this process can be found in the FAQ. The denovo_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching, the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
396
397 --------
398
399 **Input files**
400
401 FASTQ, FASTA
402
403 - Population map::
404
405 indv_01 1
406 indv_02 1
407 indv_03 1
408 indv_04 2
409 indv_05 2
410 indv_06 2
411
412
413 **Output files**
414
415
416 - XXX.tags.tsv file:
417
418 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
419
420 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
421
422
423 - XXX.snps.tsv file:
424
425 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
426
427 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
428
429
430 - XXX.alleles.tsv file:
431
432 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
433
434
435 - XXX.matches.tsv file:
436
437 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
438
439 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
440
441
442 - other files:
443
444 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
445
446 @STACKS_INFOS@
447 ]]>
448 </help>
449 <expand macro="citation" />
450 </tool>
451