comparison stacks_gstacks.xml @ 0:d35cb34f2b85 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit b395fa36fa826e26085820ba3a9faacaeddcb460
author iuc
date Mon, 01 Jul 2019 10:59:14 -0400
parents
children 27359c6bf3e3
comparison
equal deleted inserted replaced
-1:000000000000 0:d35cb34f2b85
1 <tool id="stacks2_gstacks" name="Stacks2: gstacks" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
2 <description>Call variants, genotypes and haplotype</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements">
7 <requirement type="package" version="1.9">samtools</requirement>
8 </expand>
9 <expand macro="version_cmd"/>
10 <command detect_errors="aggressive"><![CDATA[
11 @FASTQ_INPUT_FUNCTIONS@
12
13 mkdir bam_inputs stacks_outputs &&
14 #if $mode_cond.mode_select == "denovo" and not $popmap:
15 ## since collections have no len .. yet
16 #try:
17 #set count = len($input_bam)
18 #except:
19 #set count = len($input_bam.keys())
20 #end try
21 #if count == 1:
22 #for $bam in $input_bam:
23 ln -s '$bam' bam_inputs/catalog.bam &&
24 #end for
25 #else
26 >&2 echo "exactly one (merged) bam file is needed in denovo mode if no population map is given" &&
27 exit 1 &&
28 #end if
29 #else
30 @BAM_INPUT@
31 #end if
32
33 gstacks
34
35 #if $mode_cond.mode_select == "denovo":
36 -P bam_inputs
37 $mode_cond.ignore_pe_reads
38 #if $mode_cond.advanced_cond.advanced_select == "yes":
39 --kmer-length $mode_cond.advanced_cond.kmer_length
40 --max-debruijn-reads $mode_cond.advanced_cond.max_debruijn_reads
41 --min-kmer-cov $mode_cond.advanced_cond.min_kmer_cov
42 $mode_cond.advanced_cond.write_alignments
43 #end if
44 #else:
45 #if $popmap
46 -I bam_inputs
47 #else
48 $bamlist
49 #end if
50 #if $mode_cond.paired_cond.paired_select == ''
51 $mode_cond.paired_cond.rm_unpaired_reads
52 $mode_cond.paired_cond.rm_pcr_duplicates
53 #else:
54 $mode_cond.paired_cond.paired_select
55 #end if
56 #if $mode_cond.advanced_cond.advanced_select == "yes":
57 --min-mapq $mode_cond.advanced_cond.min_mapq
58 --max-clipped $mode_cond.advanced_cond.max_clipped
59 --max-insert-len $mode_cond.advanced_cond.max_insert_len
60 $mode_cond.advanced_cond.details
61 --phasing-cooccurrences-thr-range $mode_cond.advanced_cond.phasing_cooccurrences_thr_min,$mode_cond.advanced_cond.phasing_cooccurrences_thr_max
62 $mode_cond.advanced_cond.phasing_dont_prune_hets
63 #end if
64 #end if
65 #if $popmap
66 -M '$popmap'
67 #end if
68 -O stacks_outputs
69 -t \${GALAXY_SLOTS:-1}
70
71 ##Model options:
72 --model $model_cond.model
73 --var-alpha $model_cond.var_alpha
74 --gt-alpha $model_cond.gt_alpha
75
76
77 ## the bam files generated by gstacks (--write-alignments) are seemingly buggy
78 ## (https://groups.google.com/d/msg/stacks-users/CazwJY1DPGA/7vuahiB2GgAJ)
79 ## so we fix them temporarily by piping them through samtools view (disabling all
80 ## exit codes and stderr output) this adds the samtools requirement
81 ## for later versions where this is fixed the output bam files could just be moved
82 ## to stacks_outputs if this is still necessary
83 #if $mode_cond.mode_select == "denovo" and $mode_cond.advanced_cond.advanced_select == "yes" and $mode_cond.advanced_cond.write_alignments != ""
84 #if $popmap:
85 && for b in bam_inputs/*alns.bam; do (samtools view -b "\$b" || true) 2> /dev/null > stacks_outputs/\$(basename "\$b"); done
86 #else
87 && (samtools view -b bam_inputs/alignments.bam || true) 2> /dev/null > stacks_outputs/alignments.bam
88 #end if
89 #end if
90
91
92 ## annoyingly gstacks creates stacks_output/gstacks.log
93 ## instead of just writing to stderr as the other tools
94 ## hence we do not use the tokens and return populations.log as log file and take the stderr
95 #if $output_log
96 && mv stacks_outputs/gstacks.log $output_log
97 #end if
98
99 @EXTRACT_VCF@
100
101 ## TODO extract individual distributions from stacks_outputs/gstacks.log.distribs
102 ## alternative extra tool
103 ## for i in \$(stacks-dist-extract stacks_outputs/gstacks.log.distribs)
104 ## do
105 ## stacks-dist-extract stacks_outputs/gstacks.log.distribs $i > stacks_outputs/gstacks.log.\$i.tsv
106 ## done
107 ## TODO make optional output collection
108 ]]></command>
109
110 <inputs>
111 <expand macro="bam_input_macro"/>
112 <param name="popmap" type="data" format="tabular,txt" label="Population map" help="If set, matching will be done only for samples listed in this file" optional="true" argument="-M" />
113
114 <conditional name="mode_cond">
115 <param name="mode_select" type="select" label="Mode">
116 <option value="denovo" selected="true">De novo mode</option>
117 <option value="refbased">Reference-based</option>
118 </param>
119 <when value="denovo">
120 <param argument="--ignore-pe-reads" name="ignore_pe_reads" type="boolean" checked="false" truevalue="--ignore-pe-reads" falsevalue="" label="Ignore paired-end reads" help="ignore paired-end reads even if present in the input" />
121 <conditional name="advanced_cond">
122 <param name="advanced_select" type="select" label="Advanced options">
123 <option value="no">No</option>
124 <option value="yes">Yes</option>
125 </param>
126 <when value="yes">
127 <param argument="--kmer-length" name="kmer_length" type="integer" value="31" min="2" max="31" label="K-mer length for the de Bruijn graph" />
128 <param argument="--max-debruijn-reads" name="max_debruijn_reads" type="integer" value="1000" min="1" label="Maximum number of reads to use in the de Bruijn graph" />
129 <param argument="--min-kmer-cov" name="min_kmer_cov" type="integer" value="2" label="Minimum coverage to consider a kmer" />
130 <param argument="--write-alignments" name="write_alignments" type="boolean" checked="false" truevalue="--write-alignments" falsevalue="" label="save read alignments" help="heavy BAM files"/>
131 </when>
132 <when value="no"/>
133 </conditional>
134 </when>
135 <when value="refbased">
136 <conditional name="paired_cond">
137 <param name="paired_select" type="select" label="Paired end options" help="select single/paired for single end data or to select advanced paired end options, --unpaired: treat reverse reads as if they were forward reads; --ignore-pe-reads: ignore paired-end reads even if present in the input">
138 <option value="" selected="true">single/paired</option>
139 <option value="--unpaired" selected="true">ignore read pairing (--unpaired)</option>
140 <option value="--ignore-pe-reads" selected="true">ignore paired-end reads (--ignore-pe-reads)</option>
141 </param>
142 <when value="">
143 <param argument="--rm-unpaired-reads" name="rm_unpaired_reads" type="boolean" checked="false" truevalue="--rm-unpaired-reads" falsevalue="" label="Discard unpaired reads" />
144 <param argument="--rm-pcr-duplicates" name="rm_pcr_duplicates" type="boolean" checked="false" truevalue="--rm-pcr-duplicates" falsevalue="" label="Remove read pairs of the same sample that have the same insert length" help="implies --rm-unpaired-reads" />
145 </when>
146 <when value="--unpaired"/>
147 <when value="--ignore-pe-reads"/>
148 </conditional>
149 <conditional name="advanced_cond">
150 <param name="advanced_select" type="select" label="Advanced options">
151 <option value="no">No</option>
152 <option value="yes">Yes</option>
153 </param>
154 <when value="yes">
155 <param argument="--min-mapq" name="min_mapq" type="integer" value="10" min="0" max="255" label="Minimum PHRED-scaled mapping quality to consider a read" />
156 <param argument="--max-clipped" name="max_clipped" type="float" value="0.2" min="0.0" max="1.1" label="Maximum soft-clipping level" help="in fraction of read length" />
157 <param argument="--max-insert-len" name="max_insert_len" type="integer" value="1000" min="0" label="Maximum allowed sequencing insert length" />
158 <param argument="--details" type="boolean" checked="false" truevalue="--details" falsevalue="" label="Write a heaview output" />
159 <param name="phasing_cooccurrences_thr_min" type="integer" value="1" min="0" label="Edge coverage min" help="(--phasing-cooccurrences-thr-range)" />
160 <param name="phasing_cooccurrences_thr_max" type="integer" value="2" min="0" label="Edge coverage max" help="range of edge coverage thresholds to iterate over when building the graph of allele cooccurrences for SNP phasing (--phasing-cooccurrences-thr-range)"/>
161 <param argument="--phasing-dont-prune-hets" name="phasing_dont_prune_hets" type="boolean" checked="false" truevalue="--phasing-dont-prune-hets" falsevalue="" label="Don't try to ignore dubious heterozygote genotypes during phasing" />
162 </when>
163 <when value="no"/>
164 </conditional>
165 </when>
166 </conditional>
167
168 <conditional name="model_cond">
169 <param argument="--model" type="select" label="Model to use to call variants and genotypes">
170 <option value="marukilow" selected="true">marukilow</option>
171 <option value="marukihigh">marukihigh</option>
172 <option value="snp">snp</option>
173 </param>
174 <when value="marukilow">
175 <expand macro="variant_calling_options_vg" varalpha_default="0.01"/>
176 </when>
177 <when value="marukihigh">
178 <expand macro="variant_calling_options_vg"/>
179 </when>
180 <when value="snp">
181 <expand macro="variant_calling_options_vg"/>
182 </when>
183 </conditional>
184 <param name="add_log_distribs" type="boolean" checked="false" truevalue="yes" falsevalue="no" label="Add log distribs output as dataset" />
185 <expand macro="in_log"/>
186 </inputs>
187 <outputs>
188 <expand macro="out_log"/>
189 <expand macro="gstacks_outputs_full_macro"/>
190 </outputs>
191
192 <tests>
193 <!-- denovomode, w popmap -->
194 <test expect_num_outputs="3">
195 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.matches.bam,tsv2bam/PopA_02.matches.bam"/>
196 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
197 <conditional name="mode_cond">
198 <param name="mode_select" value="denovo"/>
199 </conditional>
200 <param name="add_log" value="yes" />
201 <param name="add_log_distribs" value="yes" />
202 <output name="output_log" ftype="txt" file="gstacks/gstacks.log" lines_diff="8"/>
203 <output name="distribs" ftype="txt" file="gstacks/gstacks.log.distribs" compare="sim_size"/>
204 <output_collection name="gstacks_out" type="list" count="2">
205 <element name="catalog.calls.vcf" file="gstacks/catalog.calls.vcf" ftype="vcf" lines_diff="2"/>
206 <element name="catalog.fa.gz" file="gstacks/catalog.fa.gz" ftype="fasta.gz"/>
207 </output_collection>
208 </test>
209 <!-- denovomode, w popmap, write alignments -->
210 <test expect_num_outputs="3">
211 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.matches.bam,tsv2bam/PopA_02.matches.bam"/>
212 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
213 <conditional name="mode_cond">
214 <param name="mode_select" value="denovo"/>
215 <conditional name="advanced_cond">
216 <param name="advanced_select" value="yes"/>
217 <param name="write_alignments" value="--write-alignments" />
218 </conditional>
219 </conditional>
220 <param name="add_log" value="yes" />
221 <assert_command>
222 <has_text text="--write-alignments" />
223 </assert_command>
224 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output>
225 <output_collection name="gstacks_out" type="list" count="2"/>
226 <output_collection name="gstacks_alns_out" type="list" count="2">
227 <element name="PopA_01" file="gstacks/PopA_01.alns.bam" ftype="bam" />
228 <element name="PopA_02" file="gstacks/PopA_02.alns.bam" ftype="bam" />
229 </output_collection>
230 </test>
231 <!-- denovomode, wo popmap (allows for only one input), ignore PE, advanced, alt model -->
232 <test expect_num_outputs="3">
233 <param name="input_bam" value="tsv2bam/PopA_01.matches.bam" ftype="bam"/>
234 <conditional name="mode_cond">
235 <param name="mode_select" value="denovo" />
236 <param name="ignore_pe_reads" value="--ignore-pe-reads" />
237 <conditional name="advanced_cond">
238 <param name="advanced_select" value="yes"/>
239 <param name="kmer_length" value="23" />
240 <param name="max_debruijn_reads" value="666"/>
241 <param name="min_kmer_cov" value="3" />
242 <param name="write_alignments" value="--write-alignments" />
243 </conditional>
244 </conditional>
245 <conditional name="model_cond">
246 <param name="model" value="marukihigh"/>
247 <param name="var_alpha" value="0.1" />
248 <param name="gt_alpha" value="0.1" />
249 </conditional>
250 <param name="add_log" value="yes" />
251 <assert_command>
252 <has_text text="--ignore-pe-reads" />
253 <has_text text="--rm-pcr-duplicates" />
254 <has_text text="--kmer-length 23" />
255 <has_text text="--max-debruijn-reads 666" />
256 <has_text text="--min-kmer-cov 3" />
257 <has_text text="--write-alignments" />
258 <has_text text="--model marukihigh" />
259 <has_text text="--var-alpha 0.1" />
260 <has_text text="--gt-alpha 0.1" />
261 </assert_command>
262 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output>
263 <output_collection name="gstacks_out" type="list" count="2"/>
264 <output name="gstacks_aln_out" ftype="bam" file="gstacks/alignments.bam" />
265 </test>
266 <!-- refbased wo popmap, paired options, removing all unpaired reads results in an error -->
267 <test expect_failure="true" expect_exit_code="1">
268 <param name="input_bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/>
269 <conditional name="mode_cond">
270 <param name="mode_select" value="refbased"/>
271 <conditional name="paired_cond">
272 <param name="paired_select" value=""/>
273 <!--<param name="rm_unpaired_reads" value="\-\-rm-unpaired-reads" /> removes to much of the test data and gstacks fails-->
274 <param name="rm_pcr_duplicates" value="--rm-pcr-duplicates" />
275 </conditional>
276 </conditional>
277 <param name="add_log" value="yes" />
278 <assert_command>
279 <has_text text="-I bam_inputs" />
280 <not_has_text text="-B " />
281 <has_text text="--rm-unpaired-reads" />
282 <has_text text="--rm-pcr-duplicates" />
283 </assert_command>
284 </test>
285 <!-- refbased w popmap (here bam names need to be equal to sample names in popmap), \-\-unpaired, advanced, snp model -->
286 <test expect_num_outputs="2">
287 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/>
288 <param name="popmap" ftype="tabular" value="denovo_map/popmap_cstacks.tsv" />
289 <conditional name="mode_cond">
290 <param name="mode_select" value="refbased"/>
291 <conditional name="paired_cond">
292 <param name="paired_select" value="--unpaired"/>
293 </conditional>
294 <conditional name="advanced_cond">
295 <param name="advanced_select" value="yes" />
296 <param name="min_mapq" value="23" />
297 <param name="max_clipped" value="0.23" />
298 <param name="max_insert_len" value="666" />
299 <param name="details" value="--detailed"/>
300 <param name="phasing_cooccurrences_thr_min" value="2"/>
301 <param name="phasing_cooccurrences_thr_max" value="3"/>
302 <param name="phasing_dont_prune_hets" value="--phasing-dont-prune-hets" />
303 </conditional>
304 </conditional>
305 <param name="model_cond|model" value="snp"/>
306 <param name="model_cond|gt_alpha" value="0.1" />
307 <param name="model_cond|var_alpha" value="0.1" />
308 <param name="add_log" value="yes" />
309 <assert_command>
310 <not_has_text text="-I bam_inputs" />
311 <has_text text="-B " />
312 <has_text text="--unpaired" />
313 <has_text text="--min-mapq 23" />
314 <has_text text="--max-clipped 0.23" />
315 <has_text text="--max-insert-len 666" />
316 <has_text text="--detailed" />
317 <has_text text="--phasing-cooccurrences-thr-range 2,3" />
318 <has_text text="--phasing-dont-prune-hets" />
319 <has_text text="--model snp" />
320 <has_text text="--gt-alpha 0.1" />
321 </assert_command>
322 <output name="output_log" ftype="txt"><assert_contents><has_text text="done." /></assert_contents></output>
323 <output_collection name="gstacks_out" type="list" count="2"/>
324 </test>
325 <!-- refbased wo popmap (here bam names don't matter), \-\-ignorepe -->
326 <test expect_num_outputs="2">
327 <param name="input_bam" ftype="bam" value="tsv2bam/PopA_01.bam,tsv2bam/PopA_02.bam"/>
328 <conditional name="mode_cond">
329 <param name="mode_select" value="refbased"/>
330 <conditional name="paired_cond">
331 <param name="paired_select" value="--ignore-pe-reads"/>
332 </conditional>
333 </conditional>
334 <param name="add_log" value="yes" />
335 <assert_command>
336 <has_text text="-I bam_inputs" />
337 <not_has_text text="-B " />
338 <has_text text="--ignore-pe-reads" />
339 </assert_command>
340 <output name="output_log"><assert_contents><has_text text="gstacks is done." /></assert_contents></output>
341 <output_collection name="gstacks_out" type="list" count="2"/>
342 </test>
343 </tests>
344
345 <help>
346 <![CDATA[
347 .. class:: infomark
348
349 **What it does**
350
351 For de novo analyses, this program will pull in paired-end reads, if available,
352 assemble the paired-end contig and merge it with the single-end locus, align
353 reads to the locus, and call SNPs.
354
355 For reference-aligned analyses, this program will build loci from the single
356 and/or paired-end reads before calling SNPs. The single- and paired-end reads
357 must be aligned and stored together in the intput BAM or SAM files and the
358 reads must be sorted. The gstacks program will detect if single- or paired-end
359 reads are present.
360
361 In either mode, gstacks is able to remove PCR duplicates if requested.
362
363 --------
364
365 **Input files**
366
367 If a population map is given BAM records must be assigned to samples using BAM "reads groups"
368 (gstacks uses the ID/identifier and SM/sample name fields). Read groups
369 must be consistent if repeated different files.
370 Otherwise read groups are unneeded and ignored.
371
372 **Output files**
373
374 - Assembled contigs and variant sites
375
376 - Optional outputs: Read alignments and log.distribs
377
378 @STACKS_INFOS@
379 ]]>
380 </help>
381 <expand macro="citation" />
382 </tool>