comparison FilterMutectCalls.xml @ 0:c51c08cc9fcc draft

"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gatk4 commit 408454e8d10befcc76f38ab446091778537d4f31"
author artbio
date Wed, 29 Dec 2021 01:36:41 +0000
parents
children 646e6943bcd2
comparison
equal deleted inserted replaced
-1:000000000000 0:c51c08cc9fcc
1 <tool id="filtermutectcalls" name="gatk4 FilterMutectCalls" version="@WRAPPER_VERSION@" profile="18.05">
2 <description>Filter variants in a GATK4 Mutect2 VCF callset</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_cmd"/>
8 <command detect_errors="exit_code">
9 <![CDATA[
10 #set ref_flag='--reference="reference.fa"'
11
12 #if str($reference_source.reference_source_selector) == 'history'
13 ln -s '$reference_source.reference_sequence' reference.fa &&
14 samtools faidx reference.fa &&
15 gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" &&
16 #else if str($reference_source.reference_source_selector) == 'cached'
17 ln -s '$reference_source.reference_sequence.fields.path' reference.fa &&
18 samtools faidx reference.fa &&
19 gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" &&
20 #else
21 #set ref_flag=''
22 #end if
23
24 #if str($input_options.input_options_selector) == 'vcf'
25 ln -s '$input_options.unfiltered_vcf_input' input.vcf &&
26 ln -s '$gatk_vcf_stats' input.vcf.stats &&
27 #else if str($input_options.input_options_selector) == 'vcf_bgzip'
28 ln -s '$input_options.unfiltered_vcf_input' input.vcf.gz &&
29 ln -s '$gatk_vcf_stats' input.vcf.gz.stats &&
30 gatk IndexFeatureFile --input input.vcf.gz &&
31 #end if
32
33 gatk FilterMutectCalls --QUIET $ref_flag
34 --variant
35 #if str($input_options.input_options_selector) == 'vcf'
36 input.vcf
37 #else if str($input_options.input_options_selector) == 'vcf_bgzip'
38 input.vcf.gz
39 #end if
40 --output
41 #if str($input_options.input_options_selector) == 'vcf'
42 filtered.vcf
43 #else if str($input_options.input_options_selector) == 'vcf_bgzip'
44 filtered.vcf.gz
45 #end if
46 ]]>
47 </command>
48 <inputs>
49 <conditional name="reference_source">
50 <param name="reference_source_selector" type="select" label="Choose the source for the reference list">
51 <option value="cached">Locally cached</option>
52 <option value="history" selected="true">History</option>
53 </param>
54 <when value="cached">
55 <param name="reference_sequence" type="select" label="Reference" help="Reference sequence file." >
56 <options from_data_table="all_fasta" >
57 <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" />
58 </options>
59 </param>
60 </when>
61 <when value="history">
62 <param name="reference_sequence" type="data" format="fasta" label="Reference" help="Reference sequence file." />
63 </when>
64 </conditional>
65 <conditional name="input_options">
66 <param name="input_options_selector" type="select" label="Format of input variant dataset">
67 <option value="vcf" selected="true">vcf</option>
68 <option value="vcf_bgzip">vcf_bgzip</option>
69 </param>
70 <when value="vcf">
71 <param name="unfiltered_vcf_input" type="data" format="vcf" label="vcf input file." />
72 </when>
73 <when value="vcf_bgzip">
74 <param name="unfiltered_vcf_input" type="data" format="vcf_bgzip" label="vcf_bgzip input file" />
75 </when>
76 </conditional>
77 <param format="tabular" name="gatk_vcf_stats" type="data" label="gatk vcf stats" help="this stats file is generated by the Mutect2 tool"/>
78 </inputs>
79 <outputs>
80 <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string}: Filtered vcf" from_work_dir="filtered.vcf" >
81 <filter>input_options['input_options_selector'] == 'vcf'</filter>
82 </data>
83 <data format="tabular" name="output_vcf_stats" label="${tool.name}: Filtered vcf statistics" from_work_dir="filtered.vcf.filteringStats.tsv" >
84 <filter>input_options['input_options_selector'] == 'vcf'</filter>
85 </data>
86 <data format="vcf_bgzip" name="output_vcf_bgzip" label="${tool.name} on ${on_string}: Filtered vcf (bgzip)" from_work_dir="filtered.vcf.gz" >
87 <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter>
88 </data>
89 <data format="tabular" name="output_vcf_bgzip_stats" label="${tool.name}: Filtered vcf (bgzip) statistics" from_work_dir="filtered.vcf.gz.filteringStats.tsv" >
90 <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter>
91 </data>
92 </outputs>
93 <tests>
94 <test>
95 <param name="reference_source_selector" value="history" />
96 <param name="reference_sequence" ftype="fasta" value="reference.fa" />
97 <conditional name="input_options">
98 <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out1.vcf" />
99 </conditional>
100 <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out1.vcf.stats" />
101 <output name="output_vcf" file="filtered_Mutect2-out1.vcf" lines_diff="2" />
102 <output name="output_vcf_stats" file="filtered_Mutect2-out1_stats.tsv" />
103 </test>
104 <test>
105 <param name="reference_source_selector" value="history" />
106 <param name="reference_sequence" ftype="fasta" value="chr20.fa" />
107 <conditional name="input_options">
108 <param name="input_options_selector" value="vcf_bgzip" />
109 <param name="unfiltered_vcf_input" ftype="vcf_bgzip" value="Mutect2-out6.vcf_bgzip" />
110 </conditional>
111 <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" />
112 <output name="output_vcf" file="filtered_Mutect2-out6.vcf_bgzip" compare="sim_size" />
113 <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" />
114 </test>
115 <test>
116 <param name="reference_source_selector" value="history" />
117 <param name="reference_sequence" ftype="fasta" value="chr20.fa" />
118 <conditional name="input_options">
119 <param name="input_options_selector" value="vcf" />
120 <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out6.vcf" />
121 </conditional>
122 <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" />
123 <output name="output_vcf" file="filtered_Mutect2-out6.vcf" lines_diff="2" />
124 <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" />
125 </test>
126 </tests>
127 <help><![CDATA[
128 Usage examples
129 ~~~~~~~~~~~~~~
130
131
132 ::
133
134 gatk FilterMutectCalls \\
135
136 -R reference.fasta \\
137
138 -V somatic.vcf.gz \\
139
140 --contamination-table contamination.table \\
141
142 --tumor-segmentation segments.tsv \\
143
144 -O filtered.vcf.gz
145
146
147 When running on unfiltered output of Mutect2 in --mitochondria mode, setting the advanced
148 option --autosomal-coverage argument (default 0) activates a recommended filter against
149 likely erroneously mapped NuMTs (nuclear mitochondrial DNA segments -- https://en.wikipedia.org/wiki/NUMT).
150 For the value, provide the median coverage expected in autosomal regions with coverage.
151
152
153 Usage
154 ~~~~~
155
156
157 ::
158
159 USAGE: FilterMutectCalls [arguments]
160
161 Filter somatic SNVs and indels called by Mutect2
162 Version:4.1.7.0
163
164
165 Required Arguments:
166
167 --output,-O:String The output filtered VCF file Required.
168
169 --reference,-R:GATKPathSpecifier
170 Reference sequence file Required.
171
172 --variant,-V:String A VCF file containing variants Required.
173
174
175 Optional Arguments:
176
177 --add-output-sam-program-record,-add-output-sam-program-record:Boolean
178 If true, adds a PG tag to created SAM/BAM/CRAM files. Default value: true. Possible
179 values: {true, false}
180
181 --add-output-vcf-command-line,-add-output-vcf-command-line:Boolean
182 If true, adds a command line header line to created VCF files. Default value: true.
183 Possible values: {true, false}
184
185 --arguments_file:File read one or more arguments files and add them to the command line This argument may be
186 specified 0 or more times. Default value: null.
187
188 --cloud-index-prefetch-buffer,-CIPB:Integer
189 Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to
190 cloudPrefetchBuffer if unset. Default value: -1.
191
192 --cloud-prefetch-buffer,-CPB:Integer
193 Size of the cloud-only prefetch buffer (in MB; 0 to disable). Default value: 40.
194
195 --contamination-estimate:Double
196 Estimate of contamination. Default value: 0.0.
197
198 --contamination-table:File Tables containing contamination information. This argument may be specified 0 or more
199 times. Default value: null.
200
201 --create-output-bam-index,-OBI:Boolean
202 If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file. Default
203 value: true. Possible values: {true, false}
204
205 --create-output-bam-md5,-OBM:Boolean
206 If true, create a MD5 digest for any BAM/SAM/CRAM file created Default value: false.
207 Possible values: {true, false}
208
209 --create-output-variant-index,-OVI:Boolean
210 If true, create a VCF index when writing a coordinate-sorted VCF file. Default value:
211 true. Possible values: {true, false}
212
213 --create-output-variant-md5,-OVM:Boolean
214 If true, create a a MD5 digest any VCF file created. Default value: false. Possible
215 values: {true, false}
216
217 --disable-bam-index-caching,-DBIC:Boolean
218 If true, don't cache bam indexes, this will reduce memory requirements but may harm
219 performance if many intervals are specified. Caching is automatically disabled if there
220 are no intervals specified. Default value: false. Possible values: {true, false}
221
222 --disable-read-filter,-DF:String
223 Read filters to be disabled before analysis This argument may be specified 0 or more
224 times. Default value: null. Possible Values: {WellformedReadFilter}
225
226 --disable-sequence-dictionary-validation,-disable-sequence-dictionary-validation:Boolean
227 If specified, do not check the sequence dictionaries from our inputs for compatibility.
228 Use at your own risk! Default value: false. Possible values: {true, false}
229
230 --distance-on-haplotype:Integer
231 On second filtering pass, variants with same PGT and PID tags as a filtered variant within
232 this distance are filtered. Default value: 100.
233
234 --exclude-intervals,-XL:StringOne or more genomic intervals to exclude from processing This argument may be specified 0
235 or more times. Default value: null.
236
237 --f-score-beta:Double F score beta, the relative weight of recall to precision, used if OPTIMAL_F_SCORE strategy
238 is chosen Default value: 1.0.
239
240 --false-discovery-rate:Double Maximum false discovery rate allowed if FALSE_DISCOVERY_RATE threshold strategy is chosen
241 Default value: 0.05.
242
243 --filtering-stats:String The output filtering stats file Default value: null.
244
245 --gatk-config-file:String A configuration file to use with the GATK. Default value: null.
246
247 --gcs-max-retries,-gcs-retries:Integer
248 If the GCS bucket channel errors out, how many times it will attempt to re-initiate the
249 connection Default value: 20.
250
251 --gcs-project-for-requester-pays:String
252 Project to bill when accessing "requester pays" buckets. If unset, these buckets cannot be
253 accessed. Default value: .
254
255 --help,-h:Boolean display the help message Default value: false. Possible values: {true, false}
256
257 --initial-threshold:Double Initial artifact probability threshold used in first iteration Default value: 0.1.
258
259 --input,-I:String BAM/SAM/CRAM file containing reads This argument may be specified 0 or more times.
260 Default value: null.
261
262 --interval-exclusion-padding,-ixp:Integer
263 Amount of padding (in bp) to add to each interval you are excluding. Default value: 0.
264
265 --interval-merging-rule,-imr:IntervalMergingRule
266 Interval merging rule for abutting intervals Default value: ALL. Possible values: {ALL,
267 OVERLAPPING_ONLY}
268
269 --interval-padding,-ip:IntegerAmount of padding (in bp) to add to each interval you are including. Default value: 0.
270
271 --interval-set-rule,-isr:IntervalSetRule
272 Set merging approach to use for combining interval inputs Default value: UNION. Possible
273 values: {UNION, INTERSECTION}
274
275 --intervals,-L:String One or more genomic intervals over which to operate This argument may be specified 0 or
276 more times. Default value: null.
277
278 --lenient,-LE:Boolean Lenient processing of VCF files Default value: false. Possible values: {true, false}
279
280 --log-artifact-prior:Double Initial ln prior probability that a called site is not a technical artifact Default
281 value: -2.302585092994046.
282
283 --log-indel-prior:Double Initial ln prior probability that a site has a somatic indel Default value:
284 -16.11809565095832.
285
286 --log-snv-prior:Double Initial ln prior probability that a site has a somatic SNV Default value:
287 -13.815510557964275.
288
289 --long-indel-length:Integer Indels of this length or greater are treated specially by the mapping quality filter.
290 Default value: 5.
291
292 --max-alt-allele-count:IntegerMaximum alt alleles per site. Default value: 1.
293
294 --max-events-in-region:IntegerMaximum events in a single assembly region. Filter all variants if exceeded. Default
295 value: 2.
296
297 --max-median-fragment-length-difference:Integer
298 Maximum difference between median alt and ref fragment lengths Default value: 10000.
299
300 --max-n-ratio:Double Maximum fraction of non-ref bases in the pileup that are N (unknown) Default value:
301 Infinity.
302
303 --min-allele-fraction:Double Minimum allele fraction required Default value: 0.0.
304
305 --min-median-base-quality:Integer
306 Minimum median base quality of alt reads Default value: 20.
307
308 --min-median-mapping-quality:Integer
309 Minimum median mapping quality of alt reads Default value: 30.
310
311 --min-median-read-position:Integer
312 Minimum median distance of variants from the end of reads Default value: 1.
313
314 --min-reads-per-strand:IntegerMinimum alt reads required on both forward and reverse strands Default value: 0.
315
316 --min-slippage-length:Integer Minimum number of reference bases in an STR to suspect polymerase slippage Default value:
317 8.
318
319 --mitochondria-mode:Boolean Set filters to mitochondrial defaults Default value: false. Possible values: {true,
320 false}
321
322 --normal-p-value-threshold:Double
323 P value threshold for normal artifact filter Default value: 0.001.
324
325 --orientation-bias-artifact-priors,-ob-priors:File
326 One or more .tar.gz files containing tables of prior artifact probabilities for the read
327 orientation filter model, one table per tumor sample This argument may be specified 0 or
328 more times. Default value: null.
329
330 --pcr-slippage-rate:Double The frequency of polymerase slippage in contexts where it is suspected Default value:
331 0.1.
332
333 --QUIET:Boolean Whether to suppress job-summary info on System.err. Default value: false. Possible
334 values: {true, false}
335
336 --read-filter,-RF:String Read filters to be applied before analysis This argument may be specified 0 or more
337 times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter,
338 AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator,
339 FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter,
340 HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter,
341 MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter,
342 MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter,
343 MateDistantReadFilter, MateOnSameContigOrNoMappedMateReadFilter,
344 MateUnmappedAndUnmappedReadFilter, MetricsReadFilter,
345 NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter,
346 NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter,
347 NotOpticalDuplicateReadFilter, NotProperlyPairedReadFilter,
348 NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter,
349 OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter,
350 PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter,
351 ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter,
352 ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter,
353 ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter,
354 SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter,
355 WellformedReadFilter}
356
357 --read-index,-read-index:String
358 Indices to use for the read inputs. If specified, an index must be provided for every read
359 input and in the same order as the read inputs. If this argument is not specified, the
360 path to the index for each input will be inferred automatically. This argument may be
361 specified 0 or more times. Default value: null.
362
363 --read-validation-stringency,-VS:ValidationStringency
364 Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default
365 stringency value SILENT can improve performance when processing a BAM file in which
366 variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default
367 value: SILENT. Possible values: {STRICT, LENIENT, SILENT}
368
369 --seconds-between-progress-updates,-seconds-between-progress-updates:Double
370 Output traversal statistics every time this many seconds elapse Default value: 10.0.
371
372 --sequence-dictionary,-sequence-dictionary:String
373 Use the given sequence dictionary as the master/canonical sequence dictionary. Must be a
374 .dict file. Default value: null.
375
376 --sites-only-vcf-output:Boolean
377 If true, don't emit genotype fields when writing vcf file output. Default value: false.
378 Possible values: {true, false}
379
380 --stats:String The Mutect stats file output by Mutect2 Default value: null.
381
382 --threshold-strategy:Strategy The method for optimizing the posterior probability threshold Default value:
383 OPTIMAL_F_SCORE. Possible values: {CONSTANT, FALSE_DISCOVERY_RATE, OPTIMAL_F_SCORE}
384
385 --tmp-dir:GATKPathSpecifier Temp directory to use. Default value: null.
386
387 --tumor-segmentation:File Tables containing tumor segments' minor allele fractions for germline hets emitted by
388 CalculateContamination This argument may be specified 0 or more times. Default value:
389 null.
390
391 --unique-alt-read-count,-unique:Integer
392 Minimum unique (i.e. deduplicated) reads supporting the alternate allele Default value:
393 0.
394
395 --use-jdk-deflater,-jdk-deflater:Boolean
396 Whether to use the JdkDeflater (as opposed to IntelDeflater) Default value: false.
397 Possible values: {true, false}
398
399 --use-jdk-inflater,-jdk-inflater:Boolean
400 Whether to use the JdkInflater (as opposed to IntelInflater) Default value: false.
401 Possible values: {true, false}
402
403 --verbosity,-verbosity:LogLevel
404 Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING,
405 INFO, DEBUG}
406
407 --version:Boolean display the version number for this tool Default value: false. Possible values: {true,
408 false}
409
410
411 Advanced Arguments:
412
413 --disable-tool-default-read-filters,-disable-tool-default-read-filters:Boolean
414 Disable all tool default read filters (WARNING: many tools will not function correctly
415 without their default read filters on) Default value: false. Possible values: {true,
416 false}
417
418 --showHidden,-showHidden:Boolean
419 display hidden arguments Default value: false. Possible values: {true, false}
420
421 Conditional Arguments for readFilter:
422
423 Valid only if "AmbiguousBaseReadFilter" is specified:
424 --ambig-filter-bases:Integer Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise,
425 overrides threshold fraction. Default value: null. Cannot be used in conjuction with
426 argument(s) maxAmbiguousBaseFraction
427
428 --ambig-filter-frac:Double Threshold fraction of ambiguous bases Default value: 0.05. Cannot be used in conjuction
429 with argument(s) maxAmbiguousBases
430
431 Valid only if "FragmentLengthReadFilter" is specified:
432 --max-fragment-length:Integer Maximum length of fragment (insert size) Default value: 1000000.
433
434 --min-fragment-length:Integer Minimum length of fragment (insert size) Default value: 0.
435
436 Valid only if "IntervalOverlapReadFilter" is specified:
437 --keep-intervals:String One or more genomic intervals to keep This argument must be specified at least once.
438 Required.
439
440 Valid only if "LibraryReadFilter" is specified:
441 --library,-library:String Name of the library to keep This argument must be specified at least once. Required.
442
443 Valid only if "MappingQualityReadFilter" is specified:
444 --maximum-mapping-quality:Integer
445 Maximum mapping quality to keep (inclusive) Default value: null.
446
447 --minimum-mapping-quality:Integer
448 Minimum mapping quality to keep (inclusive) Default value: 10.
449
450 Valid only if "MateDistantReadFilter" is specified:
451 --mate-too-distant-length:Integer
452 Minimum start location difference at which mapped mates are considered distant Default
453 value: 1000.
454
455 Valid only if "OverclippedReadFilter" is specified:
456 --dont-require-soft-clips-both-ends:Boolean
457 Allow a read to be filtered out based on having only 1 soft-clipped block. By default,
458 both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped
459 block Default value: false. Possible values: {true, false}
460
461 --filter-too-short:Integer Minimum number of aligned bases Default value: 30.
462
463 Valid only if "PlatformReadFilter" is specified:
464 --platform-filter-name:String Platform attribute (PL) to match This argument must be specified at least once. Required.
465
466 Valid only if "PlatformUnitReadFilter" is specified:
467 --black-listed-lanes:String Platform unit (PU) to filter out This argument must be specified at least once. Required.
468
469 Valid only if "ReadGroupBlackListReadFilter" is specified:
470 --read-group-black-list:StringA read group filter expression in the form "attribute:value", where "attribute" is a two
471 character read group attribute such as "RG" or "PU". This argument must be specified at
472 least once. Required.
473
474 Valid only if "ReadGroupReadFilter" is specified:
475 --keep-read-group:String The name of the read group to keep Required.
476
477 Valid only if "ReadLengthReadFilter" is specified:
478 --max-read-length:Integer Keep only reads with length at most equal to the specified value Required.
479
480 --min-read-length:Integer Keep only reads with length at least equal to the specified value Default value: 1.
481
482 Valid only if "ReadNameReadFilter" is specified:
483 --read-name:String Keep only reads with this read name Required.
484
485 Valid only if "ReadStrandFilter" is specified:
486 --keep-reverse-strand-only:Boolean
487 Keep only reads on the reverse strand Required. Possible values: {true, false}
488
489 Valid only if "SampleReadFilter" is specified:
490 --sample,-sample:String The name of the sample(s) to keep, filtering out all others This argument must be
491 specified at least once. Required.
492
493 Valid only if "SoftClippedReadFilter" is specified:
494 --invert-soft-clip-ratio-filter:Boolean
495 Inverts the results from this filter, causing all variants that would pass to fail and
496 visa-versa. Default value: false. Possible values: {true, false}
497
498 --soft-clipped-leading-trailing-ratio:Double
499 Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases
500 in read for read to be filtered. Default value: null. Cannot be used in conjuction with
501 argument(s) minimumSoftClippedRatio
502
503 --soft-clipped-ratio-threshold:Double
504 Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in
505 read for read to be filtered. Default value: null. Cannot be used in conjuction with
506 argument(s) minimumLeadingTrailingSoftClippedRatio
507
508
509 ]]></help>
510 <citations>
511 <expand macro="citations"/>
512 </citations>
513 </tool>