comparison lofreq_call.xml @ 0:31216d510164 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit 9efcb813ab17041c7f5aad834dfff45bd7046c60"
author iuc
date Tue, 17 Dec 2019 17:27:17 -0500
parents
children dfadc322b065
comparison
equal deleted inserted replaced
-1:000000000000 0:31216d510164
1 <tool id="lofreq_call" name="Call variants" version="@WRAPPER_VERSION@0">
2 <description>with LoFreq</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements" />
7 <command detect_errors="exit_code"><![CDATA[
8 ## prepare reference genome and mapped reads input
9 @PREPARE_REF@
10 ln -s '$reads' reads.bam &&
11 ln -s -f '${reads.metadata.bam_index}' reads.bam.bai &&
12
13 ## call variants with lofreq
14
15 ## make lofreq stick to tool contract by
16 ## generating tmp output inside job working dir
17 mkdir pp-tmp &&
18 export TMPDIR=pp-tmp &&
19
20 lofreq call-parallel --pp-threads \${GALAXY_SLOTS:-1} --verbose
21
22 --ref '$reference_fasta_fn' --out variants.vcf $variant_types
23
24 #if str($regions.restrict_to_region) == 'regions_from_file':
25 --bed '$regions.bed'
26 #end if
27
28 #if str($call_control.set_call_options) == 'yes':
29 --min-cov $call_control.coverage.min_cov
30 --max-depth $call_control.coverage.max_depth
31 $call_control.pe.use_orphan
32 --min-bq $call_control.bc_quals.min_bq
33 --min-alt-bq $call_control.bc_quals.min_alt_bq
34 --def-alt-bq $call_control.bc_quals.def_alt_bq
35 ${call_control.align_quals.alnqual.use_alnqual}
36 #if str($call_control.align_quals.alnqual.use_alnqual) != '-A -B':
37 ${call_control.align_quals.alnqual.alnqual_choice.alnquals_to_use}
38 ${call_control.align_quals.alnqual.alnqual_choice.extended_baq}
39 #end if
40 --min-mq $call_control.map_quals.min_mq
41 --max-mq $call_control.map_quals.use_mq.max_mq
42 $call_control.map_quals.use_mq.no_mq
43 #if str($call_control.source_qual.use_src_qual.src_qual):
44 $call_control.source_qual.use_src_qual.src_qual
45 #set $ign_vcfs = ','.join([str($ign_vcf) for $ign_vcf in $call_control.source_qual.use_src_qual.ign_vcf if $ign_vcf])
46 #if $ign_vcfs:
47 --ign-vcf "$ign_vcfs"
48 #end if
49 --def-nm-q $call_control.source_qual.use_src_qual.def_nm_q
50 #end if
51 --min-jq $call_control.joint_qual.min_jq
52 --min-alt-jq $call_control.joint_qual.min_alt_jq
53 --def-alt-jq $call_control.joint_qual.def_alt_jq
54 #end if
55
56 --sig $filter_control.sig
57 #set $bonf_factor = $filter_control.bonf or 'dynamic'
58 --bonf $bonf_factor
59 $filter_control.others
60
61 reads.bam 2>&1
62
63 ## in case of errors add the log files produced
64 ## by the parallel workers to stderr
65 || (tool_exit_code=\$? && cat pp-tmp/lofreq2_call_parallel*/*.log 1>&2 && exit \$tool_exit_code)
66
67 ## work around a bug in lofreq call-parallel
68 ## https://github.com/CSB5/lofreq/issues/85
69 ## that causes the output format to be vcf.gz with certain filter
70 ## combinations.
71 #if str($bonf_factor) != 'dynamic':
72 #if '--no-default-filter' in str($filter_control.others):
73 && ln -s variants.vcf variants.vcf.gz
74 && gzip -df variants.vcf.gz
75 #end if
76 #end if
77 ]]></command>
78 <inputs>
79 <param type="data" name="reads" format="bam" label="Input reads in BAM format" />
80 <expand macro="reference_interface" />
81 <conditional name="regions">
82 <param name="restrict_to_region" type="select"
83 label="Call variants across">
84 <option value="genome">Whole reference</option>
85 <option value="regions_from_file">Regions specified in BED</option>
86 </param>
87 <when value="genome" />
88 <when value="regions_from_file">
89 <param argument="--bed" type="data" format="bed"
90 label="BED dataset with regions to examine" />
91 </when>
92 </conditional>
93 <param name="variant_types" type="select"
94 label="Types of variants to call"
95 help="Note: When including indels in the called variants you should preprocess your input data to include indel alignment qualities">
96 <option value="--call-indels">SNVs and indels</option>
97 <option value="" selected="True">Only SNVs</option>
98 <option value="--only-indels">Only indels</option>
99 </param>
100 <conditional name="call_control">
101 <param name="set_call_options" type="select"
102 label="Variant calling parameters">
103 <option value="no">Use default settings</option>
104 <option value="yes">Configure settings</option>
105 </param>
106 <when value="no" />
107 <when value="yes">
108 <section name="coverage" title="Coverage" expanded="true">
109 <param name="min_cov" argument="--min-cov" type="integer" value="1" min="1"
110 label="Minimal coverage"
111 help="Do not attempt variant calling at sites that are not covered by at least this number of reads (default: 1)" />
112 <param name="max_depth" argument="--max-depth" type="integer" value="1000000" min="1"
113 label="Coverage cap"
114 help="For efficiency, don not consider more than this number of reads at any site (default: 1,000,000)" />
115 </section>
116 <section name="pe" title="Paired reads" expanded="true">
117 <param name="use_orphan" argument="--use-orphan" type="boolean" truevalue="--use-orphan" falsevalue="" checked="False"
118 label="Use reads from anomalously mapped pairs"
119 help="Applies to paired-end reads only. If set to true, reads from pairs that are flagged as non-proper pairs (SAM/BAM FLAG field 2) will be used in variant calling. The default is to ignore such reads." />
120 </section>
121 <section name="bc_quals" title="Base-calling quality" expanded="true">
122 <param name="min_bq" argument="--min-bq" type="integer" value="6"
123 label="Minimum baseQ"
124 help="For variant calling at any given site, do not consider reads for which the base at that site has a base quality less than this value (default: 6)" />
125 <param name="min_alt_bq" argument="--min-alt-bq" type="integer" min="0" value="6"
126 label="Minimum baseQ for alternate bases"
127 help="For variant calling at any given site, do not consider reads that support a non-reference allele at the site if that base has a base quality less than this value (default: 6). Note: this setting will have no effect if the specified value is less than the general Minimum baseQ above." />
128 <param name="def_alt_bq" argument="--def-alt-bq" type="integer" min="0" value="0"
129 label="Overwrite baseQs of alternate bases with this value"
130 help="After filtering reads according to the Minimum baseQ settings above, set the base quality of the non-reference bases in the surviving reads to this new value (default: 0 = keep the original base quality)." />
131 </section>
132 <section name="align_quals" title="Base alignment quality" expanded="true"
133 help="Choose here whether you want to incorporate base and/or indel alignment qualities into lofreq's joint quality model. If you have previously computed and stored (using lofreq alnqual) any of these quality scores into your input dataset, you can tell the tool to reuse them. Alternatively, the tool can calculate the necessary scores on the fly.">
134 <conditional name="alnqual">
135 <param name="use_alnqual" type="select"
136 label="Consider base/indel alignment qualities during variant calling?">
137 <option value="">Yes, and prefer existing alignment qualities encoded in input</option>
138 <option value="--del-baq">Yes, (re)calculate alignment qualities on the fly and use them</option>
139 <option value="-A -B">No, don't make use of alignment qualities</option>
140 </param>
141 <when value="">
142 <expand macro="handle_existing_alnqual" />
143 </when>
144 <when value="--del-baq">
145 <expand macro="handle_alnqual" mode="Add and use" />
146 </when>
147 <when value="-A -B" />
148 </conditional>
149 </section>
150 <section name="map_quals" title="Mapping quality" expanded="true">
151 <param name="min_mq" argument="--min-mq" type="integer" value="0"
152 label="Minimum mapping quality"
153 help="For variant calling at any given site, do not consider reads with a mapping quality (MAPQ) less than this value (default: 0 = do not filter on read mapping quality)." />
154 <conditional name="use_mq">
155 <param name="no_mq" argument="--no-mq" type="select"
156 label="Consider mapping quality during variant calling?">
157 <option value="">Yes, incorporate MAPQ into joint quality score</option>
158 <option value="--no-mq">No, ignore MAPQ scores during variant calling</option>
159 </param>
160 <when value="">
161 <param name="max_mq" argument="--max-mq" type="integer" value="255"
162 label="Maximum mapping quality"
163 help="For the joint quality model at any site, cap the mapping quality of reads at this value (default: 255 = do not cap mapping qualities). Note: The special MAPQ value 255 is used by many tools to indicate undefined mapping quality, and lofreq call will ignore such reads during variant calling. Capping, however, will turn 255 into a regular MAPQ score. Thus, if you need to avoid using such reads, you should filter out MAPQ 255 reads from your input data with other tools before using lofreq call with MAPQ capping."/>
164 </when>
165 <when value="--no-mq">
166 <param name="max_mq" type="hidden" value="" />
167 </when>
168 </conditional>
169 </section>
170 <section name="source_qual" title="Source quality" expanded="true">
171 <conditional name="use_src_qual">
172 <param argument="--src-qual" name="src_qual" type="select"
173 label="Compute source quality and consider it during variant calling">
174 <option value="">No, don't incorporate source quality into joint quality score</option>
175 <option value="--src-qual">Yes, compute source quality and merge it into joint quality score</option>
176 </param>
177 <when value="" />
178 <when value="--src-qual">
179 <param name="def_nm_q" argument="--def-nm-q" type="integer" min="-1" value="-1"
180 label="Replace non-match base qualities with this value"
181 help="For the calculation of the source quality, replace all base qualities of non-match bases with this value (default: -1 = use original base qualities)." />
182 <param name="ign_vcf" argument="--ign-vcf" type="data" format="vcf" optional="true" multiple="true"
183 label="VCF input(s) with variants to ignore for source quality computation"
184 help="Mismatches caused by known true variants in your samples should not lower the source quality estimate. If any read in your input has a base that is non-reference, but supports one of the known variants in the specified VCF datasets, that base will not be treated as a mismatch." />
185 </when>
186 </conditional>
187 </section>
188 <section name="joint_qual" title="Joint quality" expanded="true">
189 <param name="min_jq" argument="--min-jq" type="integer" min="0" value="0"
190 label="Minimum joinedQ"
191 help="At any site, do not use reads for variant calling, if their calculated joint quality at that site is lower than this value (default: 0 = do not filter based on joint quality)" />
192 <param name="min_alt_jq" argument="--min-alt-jq" type="integer" min="0" value="0"
193 label="Minimum joinedQ for alternate bases"
194 help="At any site, do not use reads for variant calling, if they support a non-reference allele at that site and their calculated joint quality at the site is lower than this value (default: 0 = do not filter based on joint quality). Note: this setting has no effect if the specified value is smaller than the general Minimum joinedQ set above." />
195 <!-- def-alt-jq==-1, though documented, is currently not implemented -->
196 <param name="def_alt_jq" argument="--def-alt-jq" type="integer" min="0" value="0"
197 label="Overwrite joinedQs of alternate bases with this value"
198 help="After filtering according to the Minimum joinedQ settings above, set the joint quality values for all reads surviving filtering and supporting a non-reference allele to this value (default: 0 = use the original calculated joint quality)." />
199 </section>
200 </when>
201 </conditional>
202 <conditional name="filter_control">
203 <param name="filter_type" type="select"
204 label="Variant filter parameters">
205 <option value="set_all_off">Strictly no filtering</option>
206 <option value="set_no_default">Preset QUAL score-based filtering</option>
207 <option value="set_lofreq_standard" selected="true">Preset filtering on QUAL score + coverage + strand bias (lofreq call default)</option>
208 <option value="set_custom">Custom filter settings/combinations</option>
209 </param>
210 <when value="set_all_off">
211 <param name="sig" type="hidden" value="1" />
212 <param name="bonf" type="hidden" value="1" />
213 <param name="others" type="hidden" value="--no-default-filter" />
214 </when>
215 <when value="set_no_default">
216 <param name="sig" type="hidden" value="0.01" />
217 <param name="bonf" type="hidden" value="dynamic" />
218 <param name="others" type="hidden" value="--no-default-filter" />
219 </when>
220 <when value="set_lofreq_standard">
221 <param name="sig" type="hidden" value="0.01" />
222 <param name="bonf" type="hidden" value="dynamic" />
223 <param name="others" type="hidden" value="" />
224 </when>
225 <when value="set_custom">
226 <param name="sig" type="float" value="0.01" min="0" max="1"
227 label="Significance threshold for calls"/>
228 <param name="bonf" type="integer" value="0" min="0"
229 label="Bonferroni correction factor for multiple testing"
230 help="Set to zero to determine dynamically from actual number of variant tests performed. Dynamic detection will calculate separate correction factors for SNVs and indels" />
231 <param name="others" type="boolean" truevalue="" falsevalue="--no-default-filter" checked="true"
232 label="Apply default coverage and strand-bias filter?"
233 help="" />
234 </when>
235 </conditional>
236 </inputs>
237 <outputs>
238 <data name="variants" from_work_dir="variants.vcf" format="vcf" />
239 </outputs>
240 <tests>
241 <test>
242 <param name="reads" ftype="bam" value="lofreq-in1.bam" />
243 <param name="ref_selector" value="history" />
244 <param name="ref" ftype="fasta" value="pBR322.fa" />
245 <output name="variants" file="call-out1.vcf" lines_diff="4" />
246 </test>
247 <test>
248 <param name="reads" ftype="bam" value="lofreq-in1.bam" />
249 <param name="ref_selector" value="history" />
250 <param name="ref" ftype="fasta" value="pBR322.fa" />
251 <conditional name="filter_control">
252 <param name="filter_type" value="set_all_off" />
253 </conditional>
254 <output name="variants" file="call-out2.vcf" lines_diff="4" />
255 </test>
256 </tests>
257 <help><![CDATA[
258 lofreq call: call variants from BAM file
259
260 LoFreq is a fast and sensitive variant-caller for inferring SNVs and indels
261 from next-generation sequencing data. It makes full use of base-call qualities
262 and other sources of errors inherent in sequencing, which are usually ignored
263 by other methods or only used for filtering.
264
265 LoFreq can run on almost any type of aligned sequencing data since no machine-
266 or sequencing-technology dependent thresholds are used. It automatically adapts
267 to changes in coverage and sequencing quality and can therefore be applied to a
268 variety of data-sets e.g. viral/quasispecies, bacterial, metagenomics or
269 somatic data.
270
271 While the tool will often give reasonable results with default settings a
272 variety of options let you control its exact behavior. These advanced options
273 can be subdivided into those affecting variant calling and those affecting
274 posterior filtering of the results.
275
276 **Variant calling paramters**
277
278 At the heart of LoFreq's variant caller is a **joint quality score** that is
279 computed for every site in every read (that survives filtering) and that
280 combines some or all of the following read and base quality measures:
281
282 - Base/indel quality
283
284 For any read, this is the Phred-scaled likelihood that the base mapped to a
285 given site does not represent a sequencing error. For every base, this score
286 got computed by the base caller of your sequencing platform and got
287 incorporated into your input dataset during read alignment.
288
289 For insertions/deletions this is defined, analogously, as the Phred-scaled
290 likelihood that any inserted/deleted base is real, however, you are
291 responsible for adding indel qualitites, which are required for indel
292 calling with lofreq, to your input.
293
294 For doing so, you can use ``lofreq indelqual`` or GATK's BQSR.
295
296 - Base/indel alignment quality
297
298 For any read, this is the Phred-scaled likelihood that the read's base or
299 indel mapped to a given reference genome position is mapped to this position
300 correctly.
301
302 The tool can calculate these scores for you on the fly. Alternatively, you
303 can precalculate them using ``lofreq alnqual``, which will incorporate them
304 into your input dataset.
305
306 - Mapping quality
307
308 The Phred-scaled likelihood that the read got mapped to the correct place
309 in the reference genome. This score got incorporated into your input dataset
310 by the aligner you used to map your reads.
311
312 - Source quality
313
314 This is the Phred-scaled likelihood that the given read comes from the
315 reference genome. The tool can calculate this score for you.
316
317
318 **Variant filter parameters**
319
320 After generating a list of called variants, the tool can filter this list
321 based on:
322
323 - the statistical significance of the variant calls
324 - strand-bias of reads supporting the variant
325 - coverage of the variant site
326
327 While posterior filtering can help reduce false-positive variant calls, please
328 note that the separate ``lofreq filter``, which can be run on the output of
329 ``lofreq call`` has many more options for configuring filters.
330
331 These are the different filter settings supported by the tool:
332
333 *Preset filtering on QUAL score + coverage + strand bias*
334
335 For variants to pass this filter, the following is required:
336
337 - statistical signficance of the variant call with a pvalue < 0.01 based on the
338 retransformed QUAL score of the variant and multiple-testing corrected using
339 a dynamically determined Bonferroni factor (based on the number of overall
340 variants considered during calling).
341
342 - A strand-bias in supporting reads not significant under a FDR-corrected p
343 value of 0.001 and 85% of supporting reads mapped to the same strand of the
344 genome.
345
346 - A coverage of the variant site of at least 10x.
347
348 *Preset QUAL score-based filtering*
349
350 Same QUAL-based significance filter as the default, but without the strand-bias
351 and coverage criteria
352
353 *Strictly no filtering*
354
355 Do not apply any filters, but produce the original list of all called variants.
356 You will almost always want to use ``lofreq filter`` to process the resulting
357 output.
358
359 *Custom filter settings/combinations*
360
361 Lets you define your own QUAL-based significance filter and, optionally,
362 combine it with the default starnd-bias and coverage filters.
363 ]]></help>
364 <expand macro="citations" />
365 </tool>