comparison lofreq_filter.xml @ 0:6f9ffff040ce draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/lofreq commit 9efcb813ab17041c7f5aad834dfff45bd7046c60"
author iuc
date Tue, 17 Dec 2019 17:26:32 -0500
parents
children fdba1586551d
comparison
equal deleted inserted replaced
-1:000000000000 0:6f9ffff040ce
1 <tool id="lofreq_filter" name="Lofreq filter" version="@TOOL_VERSION@">
2 <description>called variants posteriorly</description>
3 <macros>
4 <import>macros.xml</import>
5 <xml name="snvqual_filter_config">
6 <conditional name="snvqual_filter">
7 <param name="snvqual" type="select"
8 label="Filter SNVs based on call quality?">
9 <option value="no">No, don't apply call quality filter</option>
10 <option value="min-phred">Yes, filter on explicit QUAL threshold</option>
11 <option value="mtc">Yes, filter on multiple testing corrected p-value</option>
12 </param>
13 <when value="no">
14 </when>
15 <when value="min-phred">
16 <param argument="-Q" name="snvqual_thresh" type="integer" min="0" value="0"
17 label="Minimum QUAL value"
18 help="Specify the minimum value of the QUAL field required to retain a variant" />
19 </when>
20 <when value="mtc">
21 <param argument="-r" name="snvqual_alpha" type="float" min="0" max="1" value="1"
22 label="Multiple-testing corrected p-value threshold" />
23 <param argument="-q" name="snvqual_mtc" type="select"
24 label="Multiple testing correction method">
25 <option value="bonf">Bonferroni</option>
26 <option value="holm">Holm-Sidak</option>
27 <option value="fdr">False-discovery rate</option>
28 </param>
29 <param argument="-s" name="snvqual_ntests" type="integer" min="1" value="1"
30 label="Estimate of number of tests performed"
31 help="Ideally, this would be the number of SNVs considered during variant calling. The lofreq variant caller emits this number as part of its output. For other variant callers, all you will typically have is a lower bound estimate given by the number of SNV records in your VCF input." />
32 </when>
33 </conditional>
34 </xml>
35 <xml name="indelqual_filter_config">
36 <conditional name="indelqual_filter">
37 <param name="indelqual" type="select"
38 label="Filter indels based on call quality?">
39 <option value="no">No, don't apply call quality filter</option>
40 <option value="min-phred">Yes, filter on explicit QUAL threshold</option>
41 <option value="mtc">Yes, filter on multiple testing corrected p-value</option>
42 </param>
43 <when value="no">
44 </when>
45 <when value="min-phred">
46 <param argument="-K" name="indelqual_thresh" type="integer" min="0" value="0"
47 label="Minimum QUAL value"
48 help="Specify the minimum value of the QUAL field required to retain a variant" />
49 </when>
50 <when value="mtc">
51 <param argument="-l" name="indelqual_alpha" type="float" min="0" max="1" value="1"
52 label="Multiple-testing corrected p-value threshold" />
53 <param argument="-k" name="indelqual_mtc" type="select"
54 label="Multiple testing correction method">
55 <option value="bonf">Bonferroni</option>
56 <option value="holm">Holm-Sidak</option>
57 <option value="fdr">False-discovery rate</option>
58 </param>
59 <param argument="-m" name="indelqual_ntests" type="integer" min="1" value="1"
60 label="Estimate of number of tests performed"
61 help="Ideally, this would be the number of indels considered during variant calling. The lofreq variant caller emits this number as part of its output. For other variant callers, all you will typically have is a lower bound estimate given by the number of indel records in your VCF input." />
62 </when>
63 </conditional>
64 </xml>
65 </macros>
66 <expand macro="requirements" />
67 <command detect_errors="exit_code"><![CDATA[
68 ## filter variants with lofreq
69 lofreq filter -i $invcf --no-defaults --verbose
70 $flag_or_drop
71 ${filter_by_type.keep_only}
72 #if str($filter_by_type.keep_only) in ['', '--only-snvs']:
73 #if str($filter_by_type.qual.snvqual_filter.snvqual) == 'min-phred':
74 -Q ${filter_by_type.qual.snvqual_filter.snvqual_thresh}
75 #elif str($filter_by_type.qual.snvqual_filter.snvqual) == 'mtc':
76 -q ${filter_by_type.qual.snvqual_filter.snvqual_mtc}
77 -r ${filter_by_type.qual.snvqual_filter.snvqual_alpha}
78 -s ${filter_by_type.qual.snvqual_filter.snvqual_ntests}
79 #end if
80 #end if
81 #if str($filter_by_type.keep_only) in ['', '--only-indels']:
82 #if str($filter_by_type.qual.indelqual_filter.indelqual) == 'min-phred':
83 -K ${filter_by_type.qual.indelqual_filter.indelqual_thresh}
84 #elif str($filter_by_type.qual.indelqual_filter.indelqual) == 'mtc':
85 -k ${filter_by_type.qual.indelqual_filter.indelqual_mtc}
86 -l ${filter_by_type.qual.indelqual_filter.indelqual_alpha}
87 -m ${filter_by_type.qual.indelqual_filter.indelqual_ntests}
88 #end if
89 #end if
90 -v ${coverage.cov_min}
91 -V ${coverage.cov_max}
92 -a ${af.af_min}
93 -A ${af.af_max}
94 #if str($sb.sb_filter.strand_bias) == 'max-phred':
95 -B ${sb.sb_filter.sb_thresh}
96 #elif str($sb.sb_filter.strand_bias) == 'mtc':
97 -b ${sb.sb_filter.sb_mtc}
98 -c ${sb.sb_filter.sb_alpha}
99 #end if
100 #if str($sb.sb_filter.strand_bias) != 'no':
101 ${sb.sb_filter.sb_compound}
102 ${sb.sb_filter.sb_indels}
103 #end if
104 -o filtered.vcf
105 ]]></command>
106 <inputs>
107 <param argument="-i" name="invcf" type="data" format="vcf,vcf_bgzip"
108 label="List of variants to filter" />
109 <conditional name="filter_by_type">
110 <param name="keep_only" type="select"
111 label="Types of variants to keep">
112 <option value="">SNVs and Indels</option>
113 <option value="--only-snvs">SNVs only</option>
114 <option value="--only-indels">Indels only</option>
115 </param>
116 <when value="">
117 <section name="qual" title="Quality-based filter options" expanded="True">
118 <expand macro="snvqual_filter_config" />
119 <expand macro="indelqual_filter_config" />
120 </section>
121 </when>
122 <when value="--only-snvs">
123 <section name="qual" title="Quality-based filter options" expanded="True">
124 <expand macro="snvqual_filter_config" />
125 </section>
126 </when>
127 <when value="--only-indels">
128 <section name="qual" title="Quality-based filter options" expanded="True">
129 <expand macro="indelqual_filter_config" />
130 </section>
131 </when>
132 </conditional>
133 <section name="coverage" title="Coverage-based filter options" expanded="True"
134 help="You can specify a range of acceptable coverage values at variant sites by setting minimum and maximum coverage. Set either value to zero to make the range unbounded on the corresponding side, or disable filtering based on coverage by setting both values to zero.">
135 <param argument="-v" name="cov_min" type="integer" min="0" value="10"
136 label="Minimum coverage"
137 help="The minimum coverage at a site required to keep variants" />
138 <param argument="-V" name="cov_max" type="integer" min="0" value="0"
139 label="Maximum coverage"
140 help="The maximum coverage at a site allowed to keep variants" />
141 </section>
142 <section name="af" title="Allele frequency filter options" expanded="True"
143 help="You can specify a range of acceptable allele frequencies at variant sites by setting minimum and maximum AF values. Set either value to zero to make the range unbounded on the corresponding side, or disable filtering based on allele frequency by setting both values to zero.">
144 <param argument="-a" name="af_min" type="float" min="0" value="0"
145 label="Minimum allele frequency"
146 help="To keep a variant its allele needs to be observed at this frequency at least." />
147 <param argument="-A" name="af_max" type="float" min="0" value="0"
148 label="Maximum allele frequency"
149 help="To keep a variant its allele is allowed to be observed at this frequency at most." />
150 </section>
151 <section name="sb" title="Strand bias filter options" expanded="True">
152 <conditional name="sb_filter">
153 <param name="strand_bias" type="select"
154 label="Filter variants based on supporting strand bias?">
155 <option value="no">No, don't apply strand-bias filter</option>
156 <option value="max-phred">Yes, filter on explicit SB threshold</option>
157 <option value="mtc" selected="True">Yes, filter on multiple testing corrected strand-bias p-value (lofreq default)</option>
158 </param>
159 <when value="no">
160 </when>
161 <when value="max-phred">
162 <param argument="-B" name="sb_thresh" type="integer" min="0" value="0"
163 label="" />
164 </when>
165 <when value="mtc">
166 <param argument="-c" name="sb_alpha" type="float" min="0" max="1" value="0.001"
167 label="Multiple-testing corrected p-value threshold" />
168 <param argument="-b" name="sb_mtc" type="select"
169 label="Multiple testing correction method">
170 <option value="bonf">Bonferroni</option>
171 <option value="holm">Holm-Sidak</option>
172 <option value="fdr" selected="True">False-discovery rate</option>
173 </param>
174 <param name="sb_compound" type="boolean" checked="True" truevalue="" falsevalue="--sb-no-compound"
175 label="Use compound strand-bias filter?"
176 help="With compound filtering a variant is filtered only if it fails the strand-bias filter configured above AND has 85% of its supporting reads mapped to one reference genome strand. This guards against filtering based on statistically significant, but minor strand bias effects at high-coverage sites." />
177 <param argument="--sb-incl-indels" name="sb_indels" type="boolean" checked="False" truevalue="--sb-incl-indels" falsevalue=""
178 label="Apply to indels?"
179 help="By default, indels are not filtered based on strand bias." />
180 </when>
181 </conditional>
182 </section>
183 <param argument="--print-all" name="flag_or_drop" type="select" display="radio"
184 label="Action to be taken for variants that do not pass the filters defined above">
185 <option value="">Drop variants not passing one or more filters</option>
186 <option value="--print-all">Keep variants, but indicate failed filters in output FILTER column</option>
187 </param>
188 </inputs>
189 <outputs>
190 <data name="outvcf" format="vcf" from_work_dir="filtered.vcf"/>
191 </outputs>
192 <tests>
193 <test>
194 <!-- Mimick lofreq call default filtering -->
195 <param name="invcf" ftype="vcf" value="call-out2.vcf" />
196 <conditional name="filter_by_type">
197 <section name="qual">
198 <conditional name="snvqual_filter">
199 <param name="snvqual" value="min-phred" />
200 <param name="snvqual_thresh" value="38" />
201 </conditional>
202 <conditional name="indelqual_filter">
203 <param name="indelqual" value="min-phred" />
204 <param name="indelqual_thresh" value="20" />
205 </conditional>
206 </section>
207 </conditional>
208 <!-- lofreq call writes duplicate ##FILTER declarations, while
209 lofreq filter behaves correctly. => two additional diff lines -->
210 <output name="outvcf" file="call-out1.vcf" lines_diff="6" />
211 </test>
212 <test>
213 <!-- Express lofreq call default filtering as a
214 multiple testing correction filter -->
215 <param name="invcf" ftype="vcf" value="call-out2.vcf" />
216 <conditional name="filter_by_type">
217 <param name="keep_only" value="--only-snvs" />
218 <section name="qual">
219 <conditional name="snvqual_filter">
220 <param name="snvqual" value="mtc" />
221 <param name="snvqual_alpha" value="0.01" />
222 <param name="snvqual_mtc" value="bonf" />
223 <param name="snvqual_ntests" value="66" />
224 </conditional>
225 </section>
226 </conditional>
227 <!-- expect additional diff lines because of different
228 ##FILTER declarations -->
229 <output name="outvcf" file="call-out1.vcf" lines_diff="9" />
230 </test>
231 <test>
232 <!-- Test print-all option -->
233 <param name="invcf" ftype="vcf" value="call-out2.vcf" />
234 <conditional name="filter_by_type">
235 <section name="qual">
236 <conditional name="snvqual_filter">
237 <param name="snvqual" value="min-phred" />
238 <param name="snvqual_thresh" value="38" />
239 </conditional>
240 <conditional name="indelqual_filter">
241 <param name="indelqual" value="min-phred" />
242 <param name="indelqual_thresh" value="20" />
243 </conditional>
244 </section>
245 </conditional>
246 <param name="flag_or_drop" value="--print-all" />
247 <!-- All variants should be retained with print-all,
248 but variants failing filters should be flagged with the names
249 of those filters -->
250 <output name="outvcf">
251 <assert_contents>
252 <has_line_matching expression="pBR322&#009;1134&#009;.&#009;C&#009;T&#009;49314&#009;PASS&#009;.+" />
253 <has_line_matching expression="pBR322&#009;1193&#009;.&#009;G&#009;A&#009;0&#009;min_snvqual_38&#009;.+" />
254 </assert_contents>
255 </output>
256 </test>
257 </tests>
258 <help><![CDATA[
259 **What it does**
260
261 **Lofreq filter** tries to eliminate false-positive calls from a list of
262 variants in VCF format.
263
264 To this end, it applies a variety of user-configurable filters to the input
265 variants, which all operate on variant attributes expected to be embedded in
266 the VCF input.
267
268 Specifically, certain filters expect:
269
270 - the `QUAL` field of the variant records to be set
271 - any of the following subfields of a variant's `INFO` field:
272
273 * `DP` (required for coverage-based filtering)
274 * `AF` (required for filtering based on variant allele frequency)
275 * `SB` (required for filtering on strand bias)
276 * `DP4` (required for the compound strand bias filter)
277
278 ------
279
280 **Note**:
281
282 .. class:: Warning mark
283
284 This tool is optimized for posterior filtering of variants called with
285 `Lofreq call`, which outputs all variant attributes required by the various
286 configurable filters.
287
288 If you are using `Lofreq filter` to filter VCF variant lists produced with
289 other tools, be prepared for surprises.
290
291 In general, if any piece of variant information required for applying a
292 given filter is missing from the input data, the tool will try to disable
293 that filter. Watch out for corresponding warnings in the tool's standard
294 output.
295
296 In addition, any p-value based filtering on variant qualities may behave
297 incorrectly since different variant callers might use different QUAL scales.
298 ]]></help>
299 <expand macro="citations" />
300 </tool>