Mercurial > repos > artbio > gatk4
view FilterMutectCalls.xml @ 2:646e6943bcd2 draft default tip
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/main/tools/gatk4 commit 9c42369510cce59bf0dcb0edb440322d17b18339
author | artbio |
---|---|
date | Sun, 15 Oct 2023 12:06:24 +0000 |
parents | c51c08cc9fcc |
children |
line wrap: on
line source
<tool id="filtermutectcalls" name="gatk4 FilterMutectCalls" version="@WRAPPER_VERSION@" profile="21.01"> <description>Filter variants in a GATK4 Mutect2 VCF callset</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> <expand macro="version_cmd"/> <command detect_errors="exit_code"> <![CDATA[ #set ref_flag='--reference="reference.fa"' #if str($reference_source.reference_source_selector) == 'history' ln -s '$reference_source.reference_sequence' reference.fa && samtools faidx reference.fa && gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && #else if str($reference_source.reference_source_selector) == 'cached' ln -s '$reference_source.reference_sequence.fields.path' reference.fa && samtools faidx reference.fa && gatk CreateSequenceDictionary --REFERENCE="reference.fa" --OUTPUT="reference.dict" && #else #set ref_flag='' #end if #if str($input_options.input_options_selector) == 'vcf' ln -s '$input_options.unfiltered_vcf_input' input.vcf && ln -s '$gatk_vcf_stats' input.vcf.stats && #else if str($input_options.input_options_selector) == 'vcf_bgzip' ln -s '$input_options.unfiltered_vcf_input' input.vcf.gz && ln -s '$gatk_vcf_stats' input.vcf.gz.stats && gatk IndexFeatureFile --input input.vcf.gz && #end if gatk FilterMutectCalls --QUIET $ref_flag --variant #if str($input_options.input_options_selector) == 'vcf' input.vcf #else if str($input_options.input_options_selector) == 'vcf_bgzip' input.vcf.gz #end if --output #if str($input_options.input_options_selector) == 'vcf' filtered.vcf #else if str($input_options.input_options_selector) == 'vcf_bgzip' filtered.vcf.gz #end if ]]> </command> <inputs> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Choose the source for the reference list"> <option value="cached">Locally cached</option> <option value="history" selected="true">History</option> </param> <when value="cached"> <param name="reference_sequence" type="select" label="Reference" help="Reference sequence file." > <options from_data_table="all_fasta" > <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file" /> </options> </param> </when> <when value="history"> <param name="reference_sequence" type="data" format="fasta" label="Reference" help="Reference sequence file." /> </when> </conditional> <conditional name="input_options"> <param name="input_options_selector" type="select" label="Format of input variant dataset"> <option value="vcf" selected="true">vcf</option> <option value="vcf_bgzip">vcf_bgzip</option> </param> <when value="vcf"> <param name="unfiltered_vcf_input" type="data" format="vcf" label="vcf input file." /> </when> <when value="vcf_bgzip"> <param name="unfiltered_vcf_input" type="data" format="vcf_bgzip" label="vcf_bgzip input file" /> </when> </conditional> <param format="tabular" name="gatk_vcf_stats" type="data" label="gatk vcf stats" help="this stats file is generated by the Mutect2 tool"/> </inputs> <outputs> <data format="vcf" name="output_vcf" label="${tool.name} on ${on_string}: Filtered vcf" from_work_dir="filtered.vcf" > <filter>input_options['input_options_selector'] == 'vcf'</filter> </data> <data format="tabular" name="output_vcf_stats" label="${tool.name}: Filtered vcf statistics" from_work_dir="filtered.vcf.filteringStats.tsv" > <filter>input_options['input_options_selector'] == 'vcf'</filter> </data> <data format="vcf_bgzip" name="output_vcf_bgzip" label="${tool.name} on ${on_string}: Filtered vcf (bgzip)" from_work_dir="filtered.vcf.gz" > <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter> </data> <data format="tabular" name="output_vcf_bgzip_stats" label="${tool.name}: Filtered vcf (bgzip) statistics" from_work_dir="filtered.vcf.gz.filteringStats.tsv" > <filter>input_options['input_options_selector'] == 'vcf_bgzip'</filter> </data> </outputs> <tests> <test expect_num_outputs="2"> <param name="reference_source_selector" value="history" /> <param name="reference_sequence" ftype="fasta" value="reference.fa" /> <conditional name="input_options"> <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out1.vcf" /> </conditional> <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out1.vcf.stats" /> <output name="output_vcf" file="filtered_Mutect2-out1.vcf" lines_diff="2" /> <output name="output_vcf_stats" file="filtered_Mutect2-out1_stats.tsv" /> </test> <test expect_num_outputs="2"> <param name="reference_source_selector" value="history" /> <param name="reference_sequence" ftype="fasta" value="chr20.fa" /> <conditional name="input_options"> <param name="input_options_selector" value="vcf_bgzip" /> <param name="unfiltered_vcf_input" ftype="vcf_bgzip" value="Mutect2-out6.vcf_bgzip" /> </conditional> <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" /> <output name="output_vcf" file="filtered_Mutect2-out6.vcf_bgzip" compare="sim_size" /> <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" /> </test> <test expect_num_outputs="2"> <param name="reference_source_selector" value="history" /> <param name="reference_sequence" ftype="fasta" value="chr20.fa" /> <conditional name="input_options"> <param name="input_options_selector" value="vcf" /> <param name="unfiltered_vcf_input" ftype="vcf" value="Mutect2-out6.vcf" /> </conditional> <param name="gatk_vcf_stats" ftype="tabular" value="Mutect2-out6.vcf_bgzip.stats" /> <output name="output_vcf" file="filtered_Mutect2-out6.vcf" lines_diff="2" /> <output name="output_vcf_stats" file="filtered_Mutect2-out6_stats.tsv" /> </test> </tests> <help><![CDATA[ Usage examples ~~~~~~~~~~~~~~ :: gatk FilterMutectCalls \\ -R reference.fasta \\ -V somatic.vcf.gz \\ --contamination-table contamination.table \\ --tumor-segmentation segments.tsv \\ -O filtered.vcf.gz When running on unfiltered output of Mutect2 in --mitochondria mode, setting the advanced option --autosomal-coverage argument (default 0) activates a recommended filter against likely erroneously mapped NuMTs (nuclear mitochondrial DNA segments -- https://en.wikipedia.org/wiki/NUMT). For the value, provide the median coverage expected in autosomal regions with coverage. Usage ~~~~~ :: USAGE: FilterMutectCalls [arguments] Filter somatic SNVs and indels called by Mutect2 Version:4.1.7.0 Required Arguments: --output,-O:String The output filtered VCF file Required. --reference,-R:GATKPathSpecifier Reference sequence file Required. --variant,-V:String A VCF file containing variants Required. Optional Arguments: --add-output-sam-program-record,-add-output-sam-program-record:Boolean If true, adds a PG tag to created SAM/BAM/CRAM files. Default value: true. Possible values: {true, false} --add-output-vcf-command-line,-add-output-vcf-command-line:Boolean If true, adds a command line header line to created VCF files. Default value: true. Possible values: {true, false} --arguments_file:File read one or more arguments files and add them to the command line This argument may be specified 0 or more times. Default value: null. --cloud-index-prefetch-buffer,-CIPB:Integer Size of the cloud-only prefetch buffer (in MB; 0 to disable). Defaults to cloudPrefetchBuffer if unset. Default value: -1. --cloud-prefetch-buffer,-CPB:Integer Size of the cloud-only prefetch buffer (in MB; 0 to disable). Default value: 40. --contamination-estimate:Double Estimate of contamination. Default value: 0.0. --contamination-table:File Tables containing contamination information. This argument may be specified 0 or more times. Default value: null. --create-output-bam-index,-OBI:Boolean If true, create a BAM/CRAM index when writing a coordinate-sorted BAM/CRAM file. Default value: true. Possible values: {true, false} --create-output-bam-md5,-OBM:Boolean If true, create a MD5 digest for any BAM/SAM/CRAM file created Default value: false. Possible values: {true, false} --create-output-variant-index,-OVI:Boolean If true, create a VCF index when writing a coordinate-sorted VCF file. Default value: true. Possible values: {true, false} --create-output-variant-md5,-OVM:Boolean If true, create a a MD5 digest any VCF file created. Default value: false. Possible values: {true, false} --disable-bam-index-caching,-DBIC:Boolean If true, don't cache bam indexes, this will reduce memory requirements but may harm performance if many intervals are specified. Caching is automatically disabled if there are no intervals specified. Default value: false. Possible values: {true, false} --disable-read-filter,-DF:String Read filters to be disabled before analysis This argument may be specified 0 or more times. Default value: null. Possible Values: {WellformedReadFilter} --disable-sequence-dictionary-validation,-disable-sequence-dictionary-validation:Boolean If specified, do not check the sequence dictionaries from our inputs for compatibility. Use at your own risk! Default value: false. Possible values: {true, false} --distance-on-haplotype:Integer On second filtering pass, variants with same PGT and PID tags as a filtered variant within this distance are filtered. Default value: 100. --exclude-intervals,-XL:StringOne or more genomic intervals to exclude from processing This argument may be specified 0 or more times. Default value: null. --f-score-beta:Double F score beta, the relative weight of recall to precision, used if OPTIMAL_F_SCORE strategy is chosen Default value: 1.0. --false-discovery-rate:Double Maximum false discovery rate allowed if FALSE_DISCOVERY_RATE threshold strategy is chosen Default value: 0.05. --filtering-stats:String The output filtering stats file Default value: null. --gatk-config-file:String A configuration file to use with the GATK. Default value: null. --gcs-max-retries,-gcs-retries:Integer If the GCS bucket channel errors out, how many times it will attempt to re-initiate the connection Default value: 20. --gcs-project-for-requester-pays:String Project to bill when accessing "requester pays" buckets. If unset, these buckets cannot be accessed. Default value: . --help,-h:Boolean display the help message Default value: false. Possible values: {true, false} --initial-threshold:Double Initial artifact probability threshold used in first iteration Default value: 0.1. --input,-I:String BAM/SAM/CRAM file containing reads This argument may be specified 0 or more times. Default value: null. --interval-exclusion-padding,-ixp:Integer Amount of padding (in bp) to add to each interval you are excluding. Default value: 0. --interval-merging-rule,-imr:IntervalMergingRule Interval merging rule for abutting intervals Default value: ALL. Possible values: {ALL, OVERLAPPING_ONLY} --interval-padding,-ip:IntegerAmount of padding (in bp) to add to each interval you are including. Default value: 0. --interval-set-rule,-isr:IntervalSetRule Set merging approach to use for combining interval inputs Default value: UNION. Possible values: {UNION, INTERSECTION} --intervals,-L:String One or more genomic intervals over which to operate This argument may be specified 0 or more times. Default value: null. --lenient,-LE:Boolean Lenient processing of VCF files Default value: false. Possible values: {true, false} --log-artifact-prior:Double Initial ln prior probability that a called site is not a technical artifact Default value: -2.302585092994046. --log-indel-prior:Double Initial ln prior probability that a site has a somatic indel Default value: -16.11809565095832. --log-snv-prior:Double Initial ln prior probability that a site has a somatic SNV Default value: -13.815510557964275. --long-indel-length:Integer Indels of this length or greater are treated specially by the mapping quality filter. Default value: 5. --max-alt-allele-count:IntegerMaximum alt alleles per site. Default value: 1. --max-events-in-region:IntegerMaximum events in a single assembly region. Filter all variants if exceeded. Default value: 2. --max-median-fragment-length-difference:Integer Maximum difference between median alt and ref fragment lengths Default value: 10000. --max-n-ratio:Double Maximum fraction of non-ref bases in the pileup that are N (unknown) Default value: Infinity. --min-allele-fraction:Double Minimum allele fraction required Default value: 0.0. --min-median-base-quality:Integer Minimum median base quality of alt reads Default value: 20. --min-median-mapping-quality:Integer Minimum median mapping quality of alt reads Default value: 30. --min-median-read-position:Integer Minimum median distance of variants from the end of reads Default value: 1. --min-reads-per-strand:IntegerMinimum alt reads required on both forward and reverse strands Default value: 0. --min-slippage-length:Integer Minimum number of reference bases in an STR to suspect polymerase slippage Default value: 8. --mitochondria-mode:Boolean Set filters to mitochondrial defaults Default value: false. Possible values: {true, false} --normal-p-value-threshold:Double P value threshold for normal artifact filter Default value: 0.001. --orientation-bias-artifact-priors,-ob-priors:File One or more .tar.gz files containing tables of prior artifact probabilities for the read orientation filter model, one table per tumor sample This argument may be specified 0 or more times. Default value: null. --pcr-slippage-rate:Double The frequency of polymerase slippage in contexts where it is suspected Default value: 0.1. --QUIET:Boolean Whether to suppress job-summary info on System.err. Default value: false. Possible values: {true, false} --read-filter,-RF:String Read filters to be applied before analysis This argument may be specified 0 or more times. Default value: null. Possible Values: {AlignmentAgreesWithHeaderReadFilter, AllowAllReadsReadFilter, AmbiguousBaseReadFilter, CigarContainsNoNOperator, FirstOfPairReadFilter, FragmentLengthReadFilter, GoodCigarReadFilter, HasReadGroupReadFilter, IntervalOverlapReadFilter, LibraryReadFilter, MappedReadFilter, MappingQualityAvailableReadFilter, MappingQualityNotZeroReadFilter, MappingQualityReadFilter, MatchingBasesAndQualsReadFilter, MateDifferentStrandReadFilter, MateDistantReadFilter, MateOnSameContigOrNoMappedMateReadFilter, MateUnmappedAndUnmappedReadFilter, MetricsReadFilter, NonChimericOriginalAlignmentReadFilter, NonZeroFragmentLengthReadFilter, NonZeroReferenceLengthAlignmentReadFilter, NotDuplicateReadFilter, NotOpticalDuplicateReadFilter, NotProperlyPairedReadFilter, NotSecondaryAlignmentReadFilter, NotSupplementaryAlignmentReadFilter, OverclippedReadFilter, PairedReadFilter, PassesVendorQualityCheckReadFilter, PlatformReadFilter, PlatformUnitReadFilter, PrimaryLineReadFilter, ProperlyPairedReadFilter, ReadGroupBlackListReadFilter, ReadGroupReadFilter, ReadLengthEqualsCigarLengthReadFilter, ReadLengthReadFilter, ReadNameReadFilter, ReadStrandFilter, SampleReadFilter, SecondOfPairReadFilter, SeqIsStoredReadFilter, SoftClippedReadFilter, ValidAlignmentEndReadFilter, ValidAlignmentStartReadFilter, WellformedReadFilter} --read-index,-read-index:String Indices to use for the read inputs. If specified, an index must be provided for every read input and in the same order as the read inputs. If this argument is not specified, the path to the index for each input will be inferred automatically. This argument may be specified 0 or more times. Default value: null. --read-validation-stringency,-VS:ValidationStringency Validation stringency for all SAM/BAM/CRAM/SRA files read by this program. The default stringency value SILENT can improve performance when processing a BAM file in which variable-length data (read, qualities, tags) do not otherwise need to be decoded. Default value: SILENT. Possible values: {STRICT, LENIENT, SILENT} --seconds-between-progress-updates,-seconds-between-progress-updates:Double Output traversal statistics every time this many seconds elapse Default value: 10.0. --sequence-dictionary,-sequence-dictionary:String Use the given sequence dictionary as the master/canonical sequence dictionary. Must be a .dict file. Default value: null. --sites-only-vcf-output:Boolean If true, don't emit genotype fields when writing vcf file output. Default value: false. Possible values: {true, false} --stats:String The Mutect stats file output by Mutect2 Default value: null. --threshold-strategy:Strategy The method for optimizing the posterior probability threshold Default value: OPTIMAL_F_SCORE. Possible values: {CONSTANT, FALSE_DISCOVERY_RATE, OPTIMAL_F_SCORE} --tmp-dir:GATKPathSpecifier Temp directory to use. Default value: null. --tumor-segmentation:File Tables containing tumor segments' minor allele fractions for germline hets emitted by CalculateContamination This argument may be specified 0 or more times. Default value: null. --unique-alt-read-count,-unique:Integer Minimum unique (i.e. deduplicated) reads supporting the alternate allele Default value: 0. --use-jdk-deflater,-jdk-deflater:Boolean Whether to use the JdkDeflater (as opposed to IntelDeflater) Default value: false. Possible values: {true, false} --use-jdk-inflater,-jdk-inflater:Boolean Whether to use the JdkInflater (as opposed to IntelInflater) Default value: false. Possible values: {true, false} --verbosity,-verbosity:LogLevel Control verbosity of logging. Default value: INFO. Possible values: {ERROR, WARNING, INFO, DEBUG} --version:Boolean display the version number for this tool Default value: false. Possible values: {true, false} Advanced Arguments: --disable-tool-default-read-filters,-disable-tool-default-read-filters:Boolean Disable all tool default read filters (WARNING: many tools will not function correctly without their default read filters on) Default value: false. Possible values: {true, false} --showHidden,-showHidden:Boolean display hidden arguments Default value: false. Possible values: {true, false} Conditional Arguments for readFilter: Valid only if "AmbiguousBaseReadFilter" is specified: --ambig-filter-bases:Integer Threshold number of ambiguous bases. If null, uses threshold fraction; otherwise, overrides threshold fraction. Default value: null. Cannot be used in conjuction with argument(s) maxAmbiguousBaseFraction --ambig-filter-frac:Double Threshold fraction of ambiguous bases Default value: 0.05. Cannot be used in conjuction with argument(s) maxAmbiguousBases Valid only if "FragmentLengthReadFilter" is specified: --max-fragment-length:Integer Maximum length of fragment (insert size) Default value: 1000000. --min-fragment-length:Integer Minimum length of fragment (insert size) Default value: 0. Valid only if "IntervalOverlapReadFilter" is specified: --keep-intervals:String One or more genomic intervals to keep This argument must be specified at least once. Required. Valid only if "LibraryReadFilter" is specified: --library,-library:String Name of the library to keep This argument must be specified at least once. Required. Valid only if "MappingQualityReadFilter" is specified: --maximum-mapping-quality:Integer Maximum mapping quality to keep (inclusive) Default value: null. --minimum-mapping-quality:Integer Minimum mapping quality to keep (inclusive) Default value: 10. Valid only if "MateDistantReadFilter" is specified: --mate-too-distant-length:Integer Minimum start location difference at which mapped mates are considered distant Default value: 1000. Valid only if "OverclippedReadFilter" is specified: --dont-require-soft-clips-both-ends:Boolean Allow a read to be filtered out based on having only 1 soft-clipped block. By default, both ends must have a soft-clipped block, setting this flag requires only 1 soft-clipped block Default value: false. Possible values: {true, false} --filter-too-short:Integer Minimum number of aligned bases Default value: 30. Valid only if "PlatformReadFilter" is specified: --platform-filter-name:String Platform attribute (PL) to match This argument must be specified at least once. Required. Valid only if "PlatformUnitReadFilter" is specified: --black-listed-lanes:String Platform unit (PU) to filter out This argument must be specified at least once. Required. Valid only if "ReadGroupBlackListReadFilter" is specified: --read-group-black-list:StringA read group filter expression in the form "attribute:value", where "attribute" is a two character read group attribute such as "RG" or "PU". This argument must be specified at least once. Required. Valid only if "ReadGroupReadFilter" is specified: --keep-read-group:String The name of the read group to keep Required. Valid only if "ReadLengthReadFilter" is specified: --max-read-length:Integer Keep only reads with length at most equal to the specified value Required. --min-read-length:Integer Keep only reads with length at least equal to the specified value Default value: 1. Valid only if "ReadNameReadFilter" is specified: --read-name:String Keep only reads with this read name Required. Valid only if "ReadStrandFilter" is specified: --keep-reverse-strand-only:Boolean Keep only reads on the reverse strand Required. Possible values: {true, false} Valid only if "SampleReadFilter" is specified: --sample,-sample:String The name of the sample(s) to keep, filtering out all others This argument must be specified at least once. Required. Valid only if "SoftClippedReadFilter" is specified: --invert-soft-clip-ratio-filter:Boolean Inverts the results from this filter, causing all variants that would pass to fail and visa-versa. Default value: false. Possible values: {true, false} --soft-clipped-leading-trailing-ratio:Double Threshold ratio of soft clipped bases (leading / trailing the cigar string) to total bases in read for read to be filtered. Default value: null. Cannot be used in conjuction with argument(s) minimumSoftClippedRatio --soft-clipped-ratio-threshold:Double Threshold ratio of soft clipped bases (anywhere in the cigar string) to total bases in read for read to be filtered. Default value: null. Cannot be used in conjuction with argument(s) minimumLeadingTrailingSoftClippedRatio ]]></help> <citations> <expand macro="citations"/> </citations> </tool>