comparison stacks_refmap.xml @ 0:66d3c1ae3937 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks commit f3a59c91c231cc1582479109e776d05602b7f24d-dirty
author iuc
date Tue, 14 Jun 2016 14:06:44 -0400
parents
children 26ad8a52d9fd
comparison
equal deleted inserted replaced
-1:000000000000 0:66d3c1ae3937
1 <tool id="stacks_refmap" name="Stacks: reference map" version="@WRAPPER_VERSION@.0">
2 <description>the Stacks pipeline with a reference genome (ref_map.pl)</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="stdio"/>
8 <command><![CDATA[
9 #from os.path import splitext
10 #import re
11
12 #if str( $options_usage.rad_analysis_type ) == "genetic":
13 #for $input_parent in $options_usage.parent_alignments:
14 #if $input_parent.ext == "sam":
15 #set $data_path = splitext($input_parent.name)[0]
16 #set $data_path = re.sub(r'\.1$', '', $data_path)
17 #set $data_path = $data_path + ".sam"
18 #else:
19 #set $data_path = splitext($input_parent.name)[0]
20 #set $data_path = re.sub(r'\.1$', '', $data_path)
21 #set $data_path = $data_path + ".bam"
22 #end if
23
24 ln -s "${input_parent}" "${data_path}" &&
25 #end for
26
27 #for $input_progeny in $options_usage.progeny_alignments:
28
29 #if $input_progeny.ext == "sam":
30 #set $data_path = splitext($input_progeny.name)[0]
31 #set $data_path = re.sub(r'\.1$', '', $data_path)
32 #set $data_path = $data_path + ".sam"
33 #else:
34 #set $data_path = splitext($input_progeny.name)[0]
35 #set $data_path = re.sub(r'\.1$', '', $data_path)
36 #set $data_path = $data_path + ".bam"
37 #end if
38
39 ln -s "${input_progeny}" "${data_path}" &&
40 #end for
41 #else:
42 #for $input_indiv in $options_usage.individual_sample:
43
44 #if $input_indiv.ext == "sam":
45 #set $data_path = splitext($input_indiv.name)[0]
46 #set $data_path = re.sub(r'\.1$', '', $data_path)
47 #set $data_path = $data_path + ".sam"
48 #else:
49 #set $data_path = splitext($input_indiv.name)[0]
50 #set $data_path = re.sub(r'\.1$', '', $data_path)
51 #set $data_path = $data_path + ".bam"
52 #end if
53
54 ln -s "${input_indiv}" "${data_path}" &&
55 #end for
56 #end if
57
58 mkdir stacks_outputs
59
60 &&
61
62 ref_map.pl
63
64 -T \${GALAXY_SLOTS:-1}
65
66 #if str( $options_usage.rad_analysis_type ) == "genetic":
67 #for $input_parent in $options_usage.parent_alignments:
68 #if $input_parent.ext == "sam":
69 #set $data_path = splitext($input_parent.name)[0]
70 #set $data_path = re.sub(r'\.1$', '', $data_path)
71 #set $data_path = $data_path + ".sam"
72 #else:
73 #set $data_path = splitext($input_parent.name)[0]
74 #set $data_path = re.sub(r'\.1$', '', $data_path)
75 #set $data_path = $data_path + ".bam"
76 #end if
77
78 -p "${data_path}"
79 #end for
80
81 -A $options_usage.cross_type
82
83 #for $input_progeny in $options_usage.progeny_alignments:
84 #if $input_progeny.ext == "sam":
85 #set $data_path = splitext($input_progeny.name)[0]
86 #set $data_path = re.sub(r'\.1$', '', $data_path)
87 #set $data_path = $data_path + ".sam"
88 #else:
89 #set $data_path = splitext($input_progeny.name)[0]
90 #set $data_path = re.sub(r'\.1$', '', $data_path)
91 #set $data_path = $data_path + ".bam"
92 #end if
93
94 -r "${data_path}"
95 #end for
96 #else:
97 #for $input_indiv in $options_usage.individual_sample:
98
99 #if $input_indiv.ext == "sam":
100 #set $data_path = splitext($input_indiv.name)[0]
101 #set $data_path = re.sub(r'\.1$', '', $data_path)
102 #set $data_path = $data_path + ".sam"
103 #else:
104 #set $data_path = splitext($input_indiv.name)[0]
105 #set $data_path = re.sub(r'\.1$', '', $data_path)
106 #set $data_path = $data_path + ".bam"
107 #end if
108
109 -s "${data_path}"
110 #end for
111 -O "$options_usage.popmap"
112 #end if
113
114 #if str($m):
115 -m $m
116 #end if
117 #if str($P):
118 -P $P
119 #end if
120
121 ## Batch description
122 -b 1
123
124 ## No SQL recording
125 -S
126
127 ## snp_model
128 #if str( $snp_options.select_model.model_type) == "bounded":
129 --bound_low $snp_options.select_model.bound_low
130 --bound_high $snp_options.select_model.bound_high
131 --alpha $snp_options.select_model.alpha
132 #else if str( $snp_options.select_model.model_type) == "snp":
133 --alpha $snp_options.select_model.alpha
134 #end if
135
136 -o stacks_outputs
137
138 #if str( $options_usage.rad_analysis_type ) == "genetic":
139 @NORM_GENOTYPES_OUTPUT_LIGHT@
140 #end if
141
142 ## If input is in bam format, stacks will output gzipped files (no option to control this)
143 && gunzip stacks_outputs/*.gz
144 ]]></command>
145
146 <inputs>
147 <conditional name="options_usage">
148 <param name="rad_analysis_type" type="select" label="Select your usage">
149 <option value="genetic" selected="true">Genetic map</option>
150 <option value="population">Population</option>
151 </param>
152 <when value="genetic">
153 <param name="parent_alignments" format="sam,bam" type="data" multiple="true" label="Files containing parent alignments" argument="-p" />
154 <param name="progeny_alignments" format="sam,bam" type="data" multiple="true" optional="true" label="Files containing progeny alignments" argument="-r" />
155
156 <param name="cross_type" argument="-A" type="select" label="Cross type">
157 <expand macro="cross_types"/>
158 </param>
159 </when>
160 <when value="population">
161 <param name="individual_sample" format="sam,bam" type="data" multiple="true" label="Files containing an individual sample from a population" argument="-s" />
162 <param name="popmap" type="data" format="tabular,txt" label="Specify a population map" argument="-O" />
163 </when>
164 </conditional>
165
166 <param name="m" type="integer" value="3" label="Minimum depth of coverage" help="specify the minimum depth of coverage to report a stack in pstacks" argument="-m" />
167 <param name="P" type="integer" value="" optional="true" label="Minimum depth of coverage in 'progeny' individuals" help="specify the minimum depth of coverage to report a stack in pstacks for 'progeny' individuals" argument="-P" />
168
169 <!-- SNP Model options -->
170 <section name="snp_options" title="SNP_Model_Options" expanded="False">
171 <expand macro="snp_options"/>
172 </section>
173 </inputs>
174 <outputs>
175 <data format="txt" name="output_log" label="ref_map.log with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/ref_map.log" />
176
177 <data format="tabular" name="catalogtags" label="Catalog assembled loci (tags) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.tags.tsv" />
178 <data format="tabular" name="catalogsnps" label="Catalog model calls (snps) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.snps.tsv" />
179 <data format="tabular" name="catalogalleles" label="Catalog haplotypes (alleles) with ${tool.name} on ${on_string}" from_work_dir="stacks_outputs/batch_1.catalog.alleles.tsv" />
180
181 <expand macro="genotypes_output_light"/>
182 <expand macro="populations_output_light"/>
183
184 <collection name="tags" type="list" label="Assembled loci (tags) from ${on_string}">
185 <discover_datasets pattern="(?P&lt;name&gt;.+\.tags)\.tsv" ext="tabular" directory="stacks_outputs" />
186 </collection>
187
188 <collection name="snps" type="list" label="Model calls (snps) from each locus on ${on_string}">
189 <discover_datasets pattern="(?P&lt;name&gt;.+\.snps)\.tsv" ext="tabular" directory="stacks_outputs" />
190 </collection>
191
192 <collection name="alleles" type="list" label="Haplotypes (alleles) recorded from each locus on ${on_string}">
193 <discover_datasets pattern="(?P&lt;name&gt;.+\.alleles)\.tsv" ext="tabular" directory="stacks_outputs" />
194 </collection>
195
196 <collection name="matches" type="list" label="Matches to the catalog on ${on_string}">
197 <discover_datasets pattern="(?P&lt;name&gt;.+\.matches)\.tsv" ext="tabular" directory="stacks_outputs" />
198 </collection>
199
200 <collection name="all_output" type="list" label="Full output from ref_map on ${on_string}">
201 <discover_datasets pattern="(?P&lt;name&gt;.+\.(tags|snps|alleles|matches))\.tsv" ext="tabular" directory="stacks_outputs" />
202 <discover_datasets pattern="(?P&lt;name&gt;.+\.(haplotypes|genotypes|markers|hapstats|sumstats|sumstats_summary))\.tsv" ext="tabular" directory="stacks_outputs" />
203 <discover_datasets pattern="(?P&lt;name&gt;.+\.(genotypes))\.(loc|txt)" ext="txt" directory="stacks_outputs" />
204 </collection>
205 </outputs>
206
207 <tests>
208 <test>
209 <param name="options_usage|rad_analysis_type" value="genetic"/>
210 <param name="options_usage|parent_alignments" value="refmap/PopA_01.bam" />
211 <param name="options_usage|progeny_alignments" value="refmap/PopA_02.bam" />
212 <output name="output_log">
213 <assert_contents>
214 <has_text text="ref_map.pl completed" />
215 </assert_contents>
216 </output>
217
218 <!-- catalog -->
219 <output name="catalogsnps">
220 <assert_contents>
221 <has_text text="catalog generated" />
222 </assert_contents>
223 </output>
224 <output name="catalogalleles">
225 <assert_contents>
226 <has_text text="catalog generated" />
227 </assert_contents>
228 </output>
229 <output name="catalogtags">
230 <assert_contents>
231 <has_text text="catalog generated" />
232 </assert_contents>
233 </output>
234
235 <!-- genotypes -->
236 <output name="out_generic_haplo">
237 <assert_contents>
238 <has_text text="Catalog ID" />
239 </assert_contents>
240 </output>
241 <output name="out_sql_markers">
242 <assert_contents>
243 <has_text text="Total Genotypes" />
244 </assert_contents>
245 </output>
246 <output name="out_joinmap">
247 <assert_contents>
248 <has_text text="batch_1.genotypes_" />
249 </assert_contents>
250 </output>
251 <output name="out_sql_genotypes">
252 <assert_contents>
253 <has_text text="SQL ID" />
254 </assert_contents>
255 </output>
256 <output name="out_generic_haplo">
257 <assert_contents>
258 <has_text text="Seg Dist" />
259 </assert_contents>
260 </output>
261 <output name="out_sql_markers">
262 <assert_contents>
263 <has_text text="Total Genotypes" />
264 </assert_contents>
265 </output>
266
267 <!-- samples -->
268 <output_collection name="tags">
269 <element name="PopA_01.tags">
270 <assert_contents>
271 <has_text text="generated on " />
272 </assert_contents>
273 </element>
274 </output_collection>
275 <output_collection name="snps">
276 <element name="PopA_01.snps">
277 <assert_contents>
278 <has_text text="generated on " />
279 </assert_contents>
280 </element>
281 </output_collection>
282 <output_collection name="alleles">
283 <element name="PopA_01.alleles">
284 <assert_contents>
285 <has_text text="generated on " />
286 </assert_contents>
287 </element>
288 </output_collection>
289 <output_collection name="matches">
290 <element name="PopA_01.matches">
291 <assert_contents>
292 <has_text text="generated on " />
293 </assert_contents>
294 </element>
295 </output_collection>
296 </test>
297 <test>
298 <param name="options_usage|rad_analysis_type" value="population"/>
299 <param name="options_usage|individual_sample" value="refmap/PopA_01.bam,refmap/PopA_02.bam,refmap/PopA_03.bam,refmap/PopA_04.bam,refmap/PopB_01.bam,refmap/PopB_02.bam,refmap/PopB_03.bam,refmap/PopB_04.bam" />
300 <param name="options_usage|popmap" value="denovo_map/popmap.tsv" />
301 <output name="output_log">
302 <assert_contents>
303 <has_text text="ref_map.pl completed" />
304 </assert_contents>
305 </output>
306
307 <!-- catalog -->
308 <output name="catalogtags">
309 <assert_contents>
310 <has_text text="catalog generated on" />
311 </assert_contents>
312 </output>
313 <output name="catalogsnps">
314 <assert_contents>
315 <has_text text="catalog generated on" />
316 </assert_contents>
317 </output>
318 <output name="catalogalleles">
319 <assert_contents>
320 <has_text text="catalog generated on" />
321 </assert_contents>
322 </output>
323
324 <!-- populations -->
325 <output name="out_haplotypes">
326 <assert_contents>
327 <has_text text="PopA_01" />
328 </assert_contents>
329 </output>
330 <output name="out_hapstats">
331 <assert_contents>
332 <has_text text="Smoothed Gene Diversity" />
333 </assert_contents>
334 </output>
335 <output name="out_populations_log">
336 <assert_contents>
337 <has_text text="populations version" />
338 </assert_contents>
339 </output>
340 <output name="out_sumstats_sum">
341 <assert_contents>
342 <has_text text="Polymorphic Sites" />
343 </assert_contents>
344 </output>
345 <output name="out_sumstats">
346 <assert_contents>
347 <has_text text="Smoothed Pi" />
348 </assert_contents>
349 </output>
350
351 <!-- samples -->
352 <output_collection name="tags">
353 <element name="PopA_01.tags">
354 <assert_contents>
355 <has_text text="generated on " />
356 </assert_contents>
357 </element>
358 </output_collection>
359 <output_collection name="snps">
360 <element name="PopA_01.snps">
361 <assert_contents>
362 <has_text text="generated on " />
363 </assert_contents>
364 </element>
365 </output_collection>
366 <output_collection name="alleles">
367 <element name="PopA_01.alleles">
368 <assert_contents>
369 <has_text text="generated on " />
370 </assert_contents>
371 </element>
372 </output_collection>
373 <output_collection name="matches">
374 <element name="PopA_01.matches">
375 <assert_contents>
376 <has_text text="generated on " />
377 </assert_contents>
378 </element>
379 </output_collection>
380 </test>
381 </tests>
382
383 <help>
384 <![CDATA[
385 .. class:: infomark
386
387 **What it does**
388
389 This program expects data that have been aligned to a reference genome, and can accept data directly from Bowtie, or from any aligner that can produce SAM format. To avoid datasets names problems, we recommand the use of the *Map with BWA for STACKS tool*. This program will execute each of the Stacks components: first, running pstacks on each of the samples specified, building loci (based on the reference alignment) and calling SNPs in each. Second, cstacks will be run to create a catalog of all loci specified as 'parents' or 'samples' on the command line, again using alignment to match loci in the catalog. Finally, sstacks will be executed to match each sample against the catalog. The ref_map.pl program will also load the results of each stage of the analysis: individual loci, the catalog, and matches against the catalog into the database (although this can be disabled). After matching the program will build a database index to speed up access (index_radtags.pl) and enable web-based filtering.
390
391 --------
392
393 **Input files**
394
395 - SAM, BAM
396
397 - Population map::
398
399 indv_01 1
400 indv_02 1
401 indv_03 1
402 indv_04 2
403 indv_05 2
404 indv_06 2
405
406 **Output files**
407
408 - XXX.tags.tsv file:
409
410 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
411
412 Notes: For the tags file, each stack will start in the file with a consensus sequence for the entire stack followed by the flags for that stack. Then, each individual read that was merged into that stack will follow. The next stack will start with another consensus sequence.
413
414
415 - XXX.snps.tsv file:
416
417 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
418
419 Notes: If a stack has two SNPs called within it, then there will be two lines in this file listing each one.
420
421
422 - XXX.alleles.tsv file:
423
424 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
425
426
427 - XXX.matches.tsv file:
428
429 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
430
431 Notes: Each line in this file records a match between a catalog locus and a locus in an individual, for a particular haplotype. The Batch ID plus the Catalog ID together represent a unique locus in the entire population, while the Sample ID and the Stack ID together represent a unique locus in an individual sample.
432
433
434 - other files:
435
436 See `Stacks output description <http://catchenlab.life.illinois.edu/stacks/manual/#files>`_
437
438 @STACKS_INFOS@
439 ]]>
440 </help>
441 <expand macro="citation" />
442 </tool>